From 195232556936b39b01cc908296e1650b80d4a3e9 Mon Sep 17 00:00:00 2001 From: Sylvain Jeaugey Date: Tue, 23 Jun 2020 14:08:49 -0700 Subject: 2.7.6-1 Fix crash when NVswitch is not visible inside a VM. --- makefiles/version.mk | 2 +- src/graph/xml.cc | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/makefiles/version.mk b/makefiles/version.mk index 6f3b266..6a1deca 100644 --- a/makefiles/version.mk +++ b/makefiles/version.mk @@ -1,6 +1,6 @@ ##### version NCCL_MAJOR := 2 NCCL_MINOR := 7 -NCCL_PATCH := 5 +NCCL_PATCH := 6 NCCL_SUFFIX := PKG_REVISION := 1 diff --git a/src/graph/xml.cc b/src/graph/xml.cc index 2885787..cc91b92 100644 --- a/src/graph/xml.cc +++ b/src/graph/xml.cc @@ -640,9 +640,14 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, nvmlDevice_t nvm if (index == -1) { const char* busId; NCCLCHECK(xmlGetAttr(sub, "target", &busId)); - char* path; - NCCLCHECK(getPciPath(busId, &path)); - NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass")); + if (strcmp(busId, "fffffff:ffff:ff") == 0) { + // Remote NVLink device is not visible inside this VM. Assume NVSwitch. + NCCLCHECK(xmlSetAttr(sub, "tclass", "0x068000")); + } else { + char* path; + NCCLCHECK(getPciPath(busId, &path)); + NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass")); + } } } *gpuNodeRet = gpuNode; -- cgit v1.2.3