From 0ceaec9cee96ae7658aa45686853286651f36384 Mon Sep 17 00:00:00 2001 From: David Addison Date: Tue, 16 Apr 2019 15:27:06 -0700 Subject: NCCL 2.4.7-1 Performance tweaks for PowerPC builds only; Set default NCCL_MIN_NRINGS to 4 Disable PCI-E NUMA distance detection --- makefiles/version.mk | 2 +- src/misc/rings.cc | 8 +++++++- src/misc/topo.cc | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/makefiles/version.mk b/makefiles/version.mk index 7abaaaf..8341f33 100644 --- a/makefiles/version.mk +++ b/makefiles/version.mk @@ -1,6 +1,6 @@ ##### version NCCL_MAJOR := 2 NCCL_MINOR := 4 -NCCL_PATCH := 6 +NCCL_PATCH := 7 NCCL_SUFFIX := PKG_REVISION := 1 diff --git a/src/misc/rings.cc b/src/misc/rings.cc index 27ca9b6..7e1fc1b 100644 --- a/src/misc/rings.cc +++ b/src/misc/rings.cc @@ -170,7 +170,13 @@ static ncclResult_t fillCoords(int nranks, int* matrix, int* coords, int* rankTo } } -NCCL_PARAM(MinNrings, "MIN_NRINGS", 0); +#ifdef __PPC__ +// Make the default NCCL_MIN_NRINGS=4 for IBM/Power nodes +#define DEFAULT_MIN_NRINGS 4 +#else +#define DEFAULT_MIN_NRINGS 0 +#endif +NCCL_PARAM(MinNrings, "MIN_NRINGS", DEFAULT_MIN_NRINGS); NCCL_PARAM(MaxNrings, "MAX_NRINGS", 0); /* Users can force the number of threads with an environment variable */ diff --git a/src/misc/topo.cc b/src/misc/topo.cc index 6364978..3f5bdf9 100644 --- a/src/misc/topo.cc +++ b/src/misc/topo.cc @@ -39,11 +39,17 @@ int pciDistance(char* path1, char* path2) { } } if (score <= 3) { +#ifdef __PPC__ + // NUMA distance detection and PATH_SYS not supported on IBM/Power nodes + // nodes currently + return PATH_NODE; +#else /* Split the former PATH_SOC distance into PATH_NODE and PATH_SYS based on numaId */ int numaId1 = getNumaId(path1); int numaId2 = getNumaId(path2); TRACE(NCCL_INIT, "depth %d score %d path1 %s numaId %d path2 %s numaId %d", depth, score, path1, numaId1, path2, numaId2); return ((numaId1 == numaId2) ? PATH_NODE : PATH_SYS); +#endif } if (score == 4) return PATH_PHB; if (score == depth-1) return PATH_PIX; -- cgit v1.2.3