Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Jeaugey <sjeaugey@nvidia.com>2020-04-17 00:27:50 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2020-04-17 00:27:50 +0300
commit23a9fbb788ff1150496238026bdf21eef741bbfd (patch)
tree1150b79f1f1aaf90b51d7c1b0c2c931b36621390
parenta783484ab5003e96774c12d14f555ef969a410fa (diff)
Improve robustness of PCI detection
Fallback to default values when class/speed is unknown.
-rw-r--r--src/graph/topo.cc6
-rw-r--r--src/graph/xml.h5
2 files changed, 5 insertions, 6 deletions
diff --git a/src/graph/topo.cc b/src/graph/topo.cc
index 5cd8d4e..3767ba9 100644
--- a/src/graph/topo.cc
+++ b/src/graph/topo.cc
@@ -303,8 +303,8 @@ ncclResult_t ncclTopoAddGpu(struct ncclXmlNode* xmlGpu, struct ncclTopoSystem* s
return ncclSuccess;
}
-struct kvDict kvDictPciClass[] = { { "0x060400", PCI }, { "0x068000", NVS }, { "0x068001", CPU }, { "0x03", GPU }, { "0x02", NIC }, { NULL, 0 } };
-struct kvDict kvDictPciGen[] = { { "2.5 GT/s", 15 }, { "5 GT/s", 30 }, { "8 GT/s", 60 }, { "16 GT/s", 120 }, { NULL, 0 } }; // x100 Mbps per lane
+struct kvDict kvDictPciClass[] = { { "0x060400", PCI }, { "0x068000", NVS }, { "0x068001", CPU }, { "0x03", GPU }, { "0x02", NIC }, { NULL, PCI /* Default fallback value */ } };
+struct kvDict kvDictPciGen[] = { { "2.5 GT/s", 15 }, { "5 GT/s", 30 }, { "8 GT/s", 60 }, { "16 GT/s", 120 }, { NULL, 60 /* Default fallback */ } }; // x100 Mbps per lane
ncclResult_t ncclTopoAddPci(struct ncclXmlNode* xmlPci, struct ncclTopoSystem* system, struct ncclTopoNode* parent) {
const char* str;
@@ -356,8 +356,6 @@ ncclResult_t ncclTopoAddPci(struct ncclXmlNode* xmlPci, struct ncclTopoSystem* s
// Manage cases where speed was not indicated in /sys
if (width == 0) width = 16;
- if (strlen(str) == 0 || strcasecmp(str, "Unknown speed") == 0) str = "8 GT/s";
-
NCCLCHECK(kvConvertToInt(str, &speed, kvDictPciGen)); // Values in 100Mbps, per lane (we want GB/s in the end)
NCCLCHECK(ncclTopoConnectNodes(node, parent, LINK_PCI, width*speed/80.0));
diff --git a/src/graph/xml.h b/src/graph/xml.h
index fa04527..22e016f 100644
--- a/src/graph/xml.h
+++ b/src/graph/xml.h
@@ -218,8 +218,9 @@ static ncclResult_t kvConvertToInt(const char* str, int* value, struct kvDict* d
}
d++;
}
- WARN("KV Convert to int : could not find value of '%s' in dictionary", str);
- return ncclInternalError;
+ INFO(NCCL_GRAPH, "KV Convert to int : could not find value of '%s' in dictionary, falling back to %d", str, d->value);
+ *value = d->value;
+ return ncclSuccess;
}
static ncclResult_t kvConvertToStr(int value, const char** str, struct kvDict* dict) {
struct kvDict* d = dict;