diff options
author | Ke Wen <kwen@nvidia.com> | 2019-07-16 18:41:56 +0300 |
---|---|---|
committer | Ke Wen <kwen@nvidia.com> | 2019-07-17 16:32:33 +0300 |
commit | 920ae57c147de7a53a9d47d2171be7c10d5e845c (patch) | |
tree | 41c83ecc0dea68fedbd1c71ee78f83b289d4b25b | |
parent | c8c68fb5f79d28555bcc65c423e4b250fca85bbf (diff) |
Fix #224: prevent number of IB devices from going out of bound
-rw-r--r-- | src/transport/net_ib.cc | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc index 43d8e35..c8eb6d5 100644 --- a/src/transport/net_ib.cc +++ b/src/transport/net_ib.cc @@ -112,13 +112,13 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) { if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError; - for (int d=0; d<nIbDevs; d++) { + for (int d=0; d<nIbDevs && ncclNIbDevs<MAX_IB_DEVS; d++) { struct ibv_context * context; if (ncclSuccess != wrap_ibv_open_device(&context, devices[d]) || context == NULL) { WARN("NET/IB : Unable to open device %s", devices[d]->name); continue; } - int found = 0; + int nPorts = 0; struct ibv_device_attr devAttr; memset(&devAttr, 0, sizeof(devAttr)); if (ncclSuccess != wrap_ibv_query_device(context, &devAttr)) { @@ -148,10 +148,10 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) { ncclIbDevs[ncclNIbDevs].context = context; strncpy(ncclIbDevs[ncclNIbDevs].devName, devices[d]->name, MAXNAMESIZE); ncclNIbDevs++; - found++; + nPorts++; pthread_create(&ncclIbAsyncThread, NULL, ncclIbAsyncThreadMain, context); } - if (found == 0 && ncclSuccess != wrap_ibv_close_device(context)) { return ncclInternalError; } + if (nPorts == 0 && ncclSuccess != wrap_ibv_close_device(context)) { return ncclInternalError; } } if (nIbDevs && (ncclSuccess != wrap_ibv_free_device_list(devices))) { return ncclInternalError; }; } |