Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Jeaugey <sjeaugey@nvidia.com>2020-06-22 19:36:20 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2020-06-27 00:39:49 +0300
commit01afd20a77b5804e0ecf1042509dd9d20ebf9e93 (patch)
tree04b04147f6e616b9cc9f282c341ffd0693d142c4
parent5949d96f36d050e59d05872f8bbffd2549318e95 (diff)
2.7.5-1
Minor fixes for A100 platforms. Add a WARN for invalid GroupEnd call.
-rw-r--r--makefiles/version.mk2
-rw-r--r--src/graph/search.cc4
-rw-r--r--src/graph/tuning.cc2
-rw-r--r--src/group.cc5
4 files changed, 8 insertions, 5 deletions
diff --git a/makefiles/version.mk b/makefiles/version.mk
index 4a82cb9..6f3b266 100644
--- a/makefiles/version.mk
+++ b/makefiles/version.mk
@@ -1,6 +1,6 @@
##### version
NCCL_MAJOR := 2
NCCL_MINOR := 7
-NCCL_PATCH := 3
+NCCL_PATCH := 5
NCCL_SUFFIX :=
PKG_REVISION := 1
diff --git a/src/graph/search.cc b/src/graph/search.cc
index 42e1bb9..cb52921 100644
--- a/src/graph/search.cc
+++ b/src/graph/search.cc
@@ -774,8 +774,8 @@ done:
int dupChannels = std::min(graph->nChannels*2, graph->maxChannels);
memcpy(graph->intra+graph->nChannels*ngpus, graph->intra, (dupChannels-graph->nChannels)*ngpus*sizeof(int));
memcpy(graph->inter+graph->nChannels*2,graph->inter, (dupChannels-graph->nChannels)*2*sizeof(int));
- graph->speedIntra /= 2;
- graph->speedInter /= 2;
+ graph->speedIntra /= DIVUP(dupChannels, graph->nChannels);
+ graph->speedInter /= DIVUP(dupChannels, graph->nChannels);
graph->nChannels = dupChannels;
}
return ncclSuccess;
diff --git a/src/graph/tuning.cc b/src/graph/tuning.cc
index 29424b0..62f50ef 100644
--- a/src/graph/tuning.cc
+++ b/src/graph/tuning.cc
@@ -107,9 +107,9 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
for (int p=0; p<NCCL_NUM_PROTOCOLS; p++) {
float speed = comm->nNodes <= 2 || a == NCCL_ALGO_COLLNET ? graphs[a]->speedIntra : graphs[a]->speedInter;
float busBw = graphs[a]->nChannels * speed;
- if (compCap80) busBw *= 0.92;
// Various model refinements
+ if (compCap80) busBw = std::min(busBw, 235.0f);
if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL) busBw *= (comm->nNodes > 1 || coll == ncclCollAllReduce || coll == ncclCollReduce) ? 1.0/4.0 : 1.0/3.0;
if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL128) busBw = std::min(busBw * (ppn < 2 ? 0.7 : 0.92 /*120.0/128.0*/), ll128MaxBwPerCh[coll]*graphs[a]->nChannels);
double maxTreeBw = comm->nNodes > 2 ?
diff --git a/src/group.cc b/src/group.cc
index 549a4fd..5ce4901 100644
--- a/src/group.cc
+++ b/src/group.cc
@@ -145,7 +145,10 @@ void* ncclAsyncThreadPreconnect(void* args_) {
NCCL_API(ncclResult_t, ncclGroupEnd);
ncclResult_t ncclGroupEnd() {
- if (ncclGroupMode == 0) return ncclInvalidUsage;
+ if (ncclGroupMode == 0) {
+ WARN("ncclGroupEnd: not in a group call.");
+ return ncclInvalidUsage;
+ }
ncclGroupMode--;
if (ncclGroupMode > 0) return ncclSuccess;
int savedDev;