diff options
-rw-r--r-- | makefiles/version.mk | 2 | ||||
-rw-r--r-- | src/graph/search.cc | 4 | ||||
-rw-r--r-- | src/graph/tuning.cc | 2 | ||||
-rw-r--r-- | src/group.cc | 5 |
4 files changed, 8 insertions, 5 deletions
diff --git a/makefiles/version.mk b/makefiles/version.mk index 4a82cb9..6f3b266 100644 --- a/makefiles/version.mk +++ b/makefiles/version.mk @@ -1,6 +1,6 @@ ##### version NCCL_MAJOR := 2 NCCL_MINOR := 7 -NCCL_PATCH := 3 +NCCL_PATCH := 5 NCCL_SUFFIX := PKG_REVISION := 1 diff --git a/src/graph/search.cc b/src/graph/search.cc index 42e1bb9..cb52921 100644 --- a/src/graph/search.cc +++ b/src/graph/search.cc @@ -774,8 +774,8 @@ done: int dupChannels = std::min(graph->nChannels*2, graph->maxChannels); memcpy(graph->intra+graph->nChannels*ngpus, graph->intra, (dupChannels-graph->nChannels)*ngpus*sizeof(int)); memcpy(graph->inter+graph->nChannels*2,graph->inter, (dupChannels-graph->nChannels)*2*sizeof(int)); - graph->speedIntra /= 2; - graph->speedInter /= 2; + graph->speedIntra /= DIVUP(dupChannels, graph->nChannels); + graph->speedInter /= DIVUP(dupChannels, graph->nChannels); graph->nChannels = dupChannels; } return ncclSuccess; diff --git a/src/graph/tuning.cc b/src/graph/tuning.cc index 29424b0..62f50ef 100644 --- a/src/graph/tuning.cc +++ b/src/graph/tuning.cc @@ -107,9 +107,9 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom for (int p=0; p<NCCL_NUM_PROTOCOLS; p++) { float speed = comm->nNodes <= 2 || a == NCCL_ALGO_COLLNET ? graphs[a]->speedIntra : graphs[a]->speedInter; float busBw = graphs[a]->nChannels * speed; - if (compCap80) busBw *= 0.92; // Various model refinements + if (compCap80) busBw = std::min(busBw, 235.0f); if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL) busBw *= (comm->nNodes > 1 || coll == ncclCollAllReduce || coll == ncclCollReduce) ? 1.0/4.0 : 1.0/3.0; if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL128) busBw = std::min(busBw * (ppn < 2 ? 0.7 : 0.92 /*120.0/128.0*/), ll128MaxBwPerCh[coll]*graphs[a]->nChannels); double maxTreeBw = comm->nNodes > 2 ? diff --git a/src/group.cc b/src/group.cc index 549a4fd..5ce4901 100644 --- a/src/group.cc +++ b/src/group.cc @@ -145,7 +145,10 @@ void* ncclAsyncThreadPreconnect(void* args_) { NCCL_API(ncclResult_t, ncclGroupEnd); ncclResult_t ncclGroupEnd() { - if (ncclGroupMode == 0) return ncclInvalidUsage; + if (ncclGroupMode == 0) { + WARN("ncclGroupEnd: not in a group call."); + return ncclInvalidUsage; + } ncclGroupMode--; if (ncclGroupMode > 0) return ncclSuccess; int savedDev; |