diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2020-06-22 19:36:20 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2020-06-27 00:39:49 +0300 |
commit | 01afd20a77b5804e0ecf1042509dd9d20ebf9e93 (patch) | |
tree | 04b04147f6e616b9cc9f282c341ffd0693d142c4 | |
parent | 5949d96f36d050e59d05872f8bbffd2549318e95 (diff) |
2.7.5-1
Minor fixes for A100 platforms.
Add a WARN for invalid GroupEnd call.
-rw-r--r-- | makefiles/version.mk | 2 | ||||
-rw-r--r-- | src/graph/search.cc | 4 | ||||
-rw-r--r-- | src/graph/tuning.cc | 2 | ||||
-rw-r--r-- | src/group.cc | 5 |
4 files changed, 8 insertions, 5 deletions
diff --git a/makefiles/version.mk b/makefiles/version.mk index 4a82cb9..6f3b266 100644 --- a/makefiles/version.mk +++ b/makefiles/version.mk @@ -1,6 +1,6 @@ ##### version NCCL_MAJOR := 2 NCCL_MINOR := 7 -NCCL_PATCH := 3 +NCCL_PATCH := 5 NCCL_SUFFIX := PKG_REVISION := 1 diff --git a/src/graph/search.cc b/src/graph/search.cc index 42e1bb9..cb52921 100644 --- a/src/graph/search.cc +++ b/src/graph/search.cc @@ -774,8 +774,8 @@ done: int dupChannels = std::min(graph->nChannels*2, graph->maxChannels); memcpy(graph->intra+graph->nChannels*ngpus, graph->intra, (dupChannels-graph->nChannels)*ngpus*sizeof(int)); memcpy(graph->inter+graph->nChannels*2,graph->inter, (dupChannels-graph->nChannels)*2*sizeof(int)); - graph->speedIntra /= 2; - graph->speedInter /= 2; + graph->speedIntra /= DIVUP(dupChannels, graph->nChannels); + graph->speedInter /= DIVUP(dupChannels, graph->nChannels); graph->nChannels = dupChannels; } return ncclSuccess; diff --git a/src/graph/tuning.cc b/src/graph/tuning.cc index 29424b0..62f50ef 100644 --- a/src/graph/tuning.cc +++ b/src/graph/tuning.cc @@ -107,9 +107,9 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom for (int p=0; p<NCCL_NUM_PROTOCOLS; p++) { float speed = comm->nNodes <= 2 || a == NCCL_ALGO_COLLNET ? graphs[a]->speedIntra : graphs[a]->speedInter; float busBw = graphs[a]->nChannels * speed; - if (compCap80) busBw *= 0.92; // Various model refinements + if (compCap80) busBw = std::min(busBw, 235.0f); if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL) busBw *= (comm->nNodes > 1 || coll == ncclCollAllReduce || coll == ncclCollReduce) ? 1.0/4.0 : 1.0/3.0; if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL128) busBw = std::min(busBw * (ppn < 2 ? 0.7 : 0.92 /*120.0/128.0*/), ll128MaxBwPerCh[coll]*graphs[a]->nChannels); double maxTreeBw = comm->nNodes > 2 ? diff --git a/src/group.cc b/src/group.cc index 549a4fd..5ce4901 100644 --- a/src/group.cc +++ b/src/group.cc @@ -145,7 +145,10 @@ void* ncclAsyncThreadPreconnect(void* args_) { NCCL_API(ncclResult_t, ncclGroupEnd); ncclResult_t ncclGroupEnd() { - if (ncclGroupMode == 0) return ncclInvalidUsage; + if (ncclGroupMode == 0) { + WARN("ncclGroupEnd: not in a group call."); + return ncclInvalidUsage; + } ncclGroupMode--; if (ncclGroupMode > 0) return ncclSuccess; int savedDev; |