diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2020-06-24 00:08:49 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2020-06-27 02:35:54 +0300 |
commit | 195232556936b39b01cc908296e1650b80d4a3e9 (patch) | |
tree | 9cf02a3ffeb275e5a0aae0f487298f2d63bad885 | |
parent | 01afd20a77b5804e0ecf1042509dd9d20ebf9e93 (diff) |
2.7.6-1
Fix crash when NVswitch is not visible inside a VM.
-rw-r--r-- | makefiles/version.mk | 2 | ||||
-rw-r--r-- | src/graph/xml.cc | 11 |
2 files changed, 9 insertions, 4 deletions
diff --git a/makefiles/version.mk b/makefiles/version.mk index 6f3b266..6a1deca 100644 --- a/makefiles/version.mk +++ b/makefiles/version.mk @@ -1,6 +1,6 @@ ##### version NCCL_MAJOR := 2 NCCL_MINOR := 7 -NCCL_PATCH := 5 +NCCL_PATCH := 6 NCCL_SUFFIX := PKG_REVISION := 1 diff --git a/src/graph/xml.cc b/src/graph/xml.cc index 2885787..cc91b92 100644 --- a/src/graph/xml.cc +++ b/src/graph/xml.cc @@ -640,9 +640,14 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, nvmlDevice_t nvm if (index == -1) { const char* busId; NCCLCHECK(xmlGetAttr(sub, "target", &busId)); - char* path; - NCCLCHECK(getPciPath(busId, &path)); - NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass")); + if (strcmp(busId, "fffffff:ffff:ff") == 0) { + // Remote NVLink device is not visible inside this VM. Assume NVSwitch. + NCCLCHECK(xmlSetAttr(sub, "tclass", "0x068000")); + } else { + char* path; + NCCLCHECK(getPciPath(busId, &path)); + NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass")); + } } } *gpuNodeRet = gpuNode; |