Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/graph/connect.cc')
-rw-r--r--src/graph/connect.cc190
1 files changed, 107 insertions, 83 deletions
diff --git a/src/graph/connect.cc b/src/graph/connect.cc
index af481d2..a0f1265 100644
--- a/src/graph/connect.cc
+++ b/src/graph/connect.cc
@@ -1,5 +1,5 @@
/*************************************************************************
- * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@@ -14,7 +14,7 @@
/******************************************************************/
ncclResult_t ncclTopoPreset(struct ncclComm* comm,
- struct ncclTopoGraph* treeGraph, struct ncclTopoGraph* ringGraph,
+ struct ncclTopoGraph* treeGraph, struct ncclTopoGraph* ringGraph, struct ncclTopoGraph* collNetGraph,
struct ncclTopoRanks* topoRanks) {
int rank = comm->rank;
int localRanks = comm->localRanks;
@@ -23,13 +23,14 @@ ncclResult_t ncclTopoPreset(struct ncclComm* comm,
for (int c=0; c<nChannels; c++) {
struct ncclChannel* channel = comm->channels+c;
channel->ring.prev = channel->ring.next = -1;
- channel->treeUp.up = -1;
- for (int i=0; i<NCCL_MAX_TREE_ARITY; i++) channel->treeUp.down[i] = -1;
- channel->treeDn.up = -1;
- for (int i=0; i<NCCL_MAX_TREE_ARITY; i++) channel->treeDn.down[i] = -1;
+ channel->tree.up = -1;
+ for (int i=0; i<NCCL_MAX_TREE_ARITY; i++) channel->tree.down[i] = -1;
+ channel->collTree.up = -1;
+ for (int i=0; i<NCCL_MAX_TREE_ARITY; i++) channel->collTree.down[i] = -1;
int* ringIntra = ringGraph->intra+c*localRanks;
int* treeIntra = treeGraph->intra+c*localRanks;
+ int* collNetIntra = collNetGraph->intra+c*localRanks;
for (int i=0; i<localRanks; i++) {
if (ringIntra[i] == rank) {
@@ -39,23 +40,21 @@ ncclResult_t ncclTopoPreset(struct ncclComm* comm,
channel->ring.next = (i == localRanks-1) ? -1 : ringIntra[i+1];
}
if (treeIntra[i] == rank) {
- int recvIndex = 0, sendIndex = treeGraph->pattern == NCCL_TOPO_PATTERN_TREE ? 0 : 1;
- int prev = (i-1+localRanks)%localRanks, next = (i+1)%localRanks;
+ int parentIndex = 0;
+ int child0Index = treeGraph->pattern == NCCL_TOPO_PATTERN_TREE ? 0 : 1;
+ int child1Index = treeGraph->pattern == NCCL_TOPO_PATTERN_SPLIT_TREE ? 1 : 0;
- // Tree loop always flows in the same direction. Other trees are symmetric, i.e.
- // up/down go in reverse directions
- int sym = treeGraph->pattern == NCCL_TOPO_PATTERN_SPLIT_TREE_LOOP ? 0 : 1;
+ topoRanks->treeToParent[c] = treeIntra[parentIndex];
+ topoRanks->treeToChild0[c] = treeIntra[child0Index];
+ topoRanks->treeToChild1[c] = treeIntra[child1Index];
+ channel->tree.up = i == 0 ? -1 : treeIntra[i-1];
+ channel->tree.down[0] = i == localRanks-1 ? -1 : treeIntra[i+1];
+ }
+ if (collNetIntra[i] == rank) {
+ int prev = (i-1+localRanks)%localRanks, next = (i+1)%localRanks;
- // Down tree is common
- topoRanks->treeDnRecv[c] = treeIntra[recvIndex];
- topoRanks->treeDnSend[c] = treeIntra[sendIndex];
- channel->treeDn.up = treeIntra[prev];
- channel->treeDn.down[0] = treeIntra[next];
- // Up tree depends on the pattern
- topoRanks->treeUpRecv[c] = sym ? topoRanks->treeDnSend[c] : topoRanks->treeDnRecv[c];
- topoRanks->treeUpSend[c] = sym ? topoRanks->treeDnRecv[c] : topoRanks->treeDnSend[c];
- channel->treeUp.down[0] = sym ? channel->treeDn.down[0] : channel->treeDn.up ;
- channel->treeUp.up = sym ? channel->treeDn.up : channel->treeDn.down[0];
+ channel->collTree.up = collNetIntra[prev];
+ channel->collTree.down[0] = collNetIntra[next];
}
}
topoRanks->ringPrev[c] = channel->ring.prev;
@@ -105,72 +104,100 @@ static ncclResult_t getIndexes(int* ranks, int* indexes, int nNodes, int* firstR
return ncclSuccess;
}
-static ncclResult_t setTreeUp(struct ncclTree* tree0, struct ncclTree* tree1, int* indexes, int u0, int u1) {
- if (u0 != -1) tree0->up = indexes[u0];
- if (u1 != -1) tree1->up = indexes[u1];
+static ncclResult_t setTreeUp(struct ncclTree* tree, int* indexes, int u) {
+ if (u == -1) return ncclSuccess;
+ tree->up = indexes[u];
return ncclSuccess;
}
-static ncclResult_t addRanksDown(int* down, int* indexes, int r0, int r1) {
+static ncclResult_t setTreeDown(struct ncclTree* tree, int* indexes, int d) {
+ if (d == -1) return ncclSuccess;
int x = 0;
- if (down[x] >= 0) x++;
- if (down[x] >= 0) {
- WARN("Internal error : tree already has more than one child (%d %d %d)\n", down[0], down[1], down[2]);
+ while (x < NCCL_MAX_TREE_ARITY && tree->down[x] >= 0) x++;
+ if (x == NCCL_MAX_TREE_ARITY) {
+ WARN("Internal error : tree already has %d children (%d %d %d)\n", x, tree->down[0], tree->down[1], tree->down[2]);
return ncclInternalError;
}
- if (r0 != -1) down[x++] = indexes[r0];
- if (r1 != -1) down[x++] = indexes[r1];
- return ncclSuccess;
-}
-
-static ncclResult_t setTreeDown(struct ncclTree* tree0, struct ncclTree* tree1, int* indexes, int d0_0, int d0_1, int d1_0, int d1_1) {
- NCCLCHECK(addRanksDown(tree0->down, indexes, d0_0, d0_1));
- NCCLCHECK(addRanksDown(tree1->down, indexes, d1_0, d1_1));
+ tree->down[x] = indexes[d];
return ncclSuccess;
}
-static ncclResult_t openRing(struct ncclTree* tree, int rank, int upRank) {
- if (tree->down[0] == upRank) tree->down[0] = -1;
- if (rank == upRank) tree->up = -1;
- return ncclSuccess;
-}
-
-static ncclResult_t connectTrees(struct ncclComm* comm, int* treeUpRecv, int* treeUpSend, int* treeDnRecv, int* treeDnSend, int* firstRanks) {
+static ncclResult_t connectTrees(struct ncclComm* comm, int* treeToParent, int* treeToChild0, int* treeToChild1, int* firstRanks, int* treePatterns) {
const int nChannels = comm->nChannels, nNodes = comm->nNodes, node = comm->node;
- int* indexesSend, *indexesRecv;
- NCCLCHECK(ncclCalloc(&indexesSend, nNodes));
- NCCLCHECK(ncclCalloc(&indexesRecv, nNodes));
+ int* ranksToParent, *ranksToChild0, *ranksToChild1;
+ NCCLCHECK(ncclCalloc(&ranksToParent, nNodes));
+ NCCLCHECK(ncclCalloc(&ranksToChild0, nNodes));
+ NCCLCHECK(ncclCalloc(&ranksToChild1, nNodes));
// Compute tree depth. Not an exact value but a good approximation in most
// cases
int depth = comm->nRanks/nNodes - 1 + log2i(nNodes);
- int u0, d0_0, d0_1, u1, d1_0, d1_1;
- NCCLCHECK(ncclGetDtree(nNodes, node, &u0, &d0_0, &d0_1, &u1, &d1_0, &d1_1));
+ int t0u, t0d0, t0d1, t0ChildType, t1u, t1d0, t1d1, t1ChildType;
+ NCCLCHECK(ncclGetDtree(nNodes, node, &t0u, &t0d0, &t0d1, &t0ChildType, &t1u, &t1d0, &t1d1, &t1ChildType));
for (int c=0; c<nChannels; c++) {
struct ncclChannel* channel0 = comm->channels+c;
struct ncclChannel* channel1 = channel0+nChannels;
- NCCLCHECK(getIndexes(treeUpSend+c*comm->nRanks, indexesSend, nNodes, firstRanks));
- NCCLCHECK(getIndexes(treeUpRecv+c*comm->nRanks, indexesRecv, nNodes, firstRanks));
- NCCLCHECK(openRing(&channel0->treeUp, comm->rank, indexesSend[node]));
- NCCLCHECK(openRing(&channel1->treeUp, comm->rank, indexesSend[node]));
- int root = indexesSend[node];
- if (indexesSend[node] == comm->rank) NCCLCHECK(setTreeUp(&channel0->treeUp, &channel1->treeUp, indexesRecv, u0, u1));
- if (indexesRecv[node] == comm->rank) NCCLCHECK(setTreeDown(&channel0->treeUp, &channel1->treeUp, indexesSend, d0_0, d0_1, d1_0, d1_1));
- NCCLCHECK(getIndexes(treeDnSend+c*comm->nRanks, indexesSend, nNodes, firstRanks));
- NCCLCHECK(getIndexes(treeDnRecv+c*comm->nRanks, indexesRecv, nNodes, firstRanks));
- NCCLCHECK(openRing(&channel0->treeDn, comm->rank, u0 == -1 ? root : indexesRecv[node]));
- NCCLCHECK(openRing(&channel1->treeDn, comm->rank, u1 == -1 ? root : indexesRecv[node]));
- if (indexesSend[node] == comm->rank) NCCLCHECK(setTreeDown(&channel0->treeDn, &channel1->treeDn, indexesRecv, d0_0, d0_1, d1_0, d1_1));
- if (indexesRecv[node] == comm->rank) NCCLCHECK(setTreeUp(&channel0->treeDn, &channel1->treeDn, indexesSend, u0, u1));
- TRACE(NCCL_GRAPH, "TreeUp %d : %d -> %d/%d/%d", c, channel0->treeUp.up, channel0->treeUp.down[0], channel0->treeUp.down[1], channel0->treeUp.down[2]);
- TRACE(NCCL_GRAPH, "TreeUp %d : %d -> %d/%d/%d", c+nChannels, channel1->treeUp.up, channel1->treeUp.down[0], channel1->treeUp.down[1], channel1->treeUp.down[2]);
- TRACE(NCCL_GRAPH, "TreeDn %d : %d -> %d/%d/%d", c, channel0->treeDn.up, channel0->treeDn.down[0], channel0->treeDn.down[1], channel0->treeDn.down[2]);
- TRACE(NCCL_GRAPH, "TreeDn %d : %d -> %d/%d/%d", c+nChannels, channel1->treeDn.up, channel1->treeDn.down[0], channel1->treeDn.down[1], channel1->treeDn.down[2]);
- channel0->treeUp.depth = channel1->treeUp.depth = depth;
+ NCCLCHECK(getIndexes(treeToParent+c*comm->nRanks, ranksToParent, nNodes, firstRanks));
+ NCCLCHECK(getIndexes(treeToChild0+c*comm->nRanks, ranksToChild0, nNodes, firstRanks));
+ NCCLCHECK(getIndexes(treeToChild1+c*comm->nRanks, ranksToChild1, nNodes, firstRanks));
+ if (comm->rank == ranksToParent[node]) {
+ NCCLCHECK(setTreeUp(&channel0->tree, t0ChildType == 0 ? ranksToChild0 : ranksToChild1, t0u));
+ NCCLCHECK(setTreeUp(&channel1->tree, t1ChildType == 0 ? ranksToChild0 : ranksToChild1, t1u));
+ }
+ if (comm->rank == ranksToChild0[node]) {
+ NCCLCHECK(setTreeDown(&channel0->tree, ranksToParent, t0d0));
+ NCCLCHECK(setTreeDown(&channel1->tree, ranksToParent, t1d0));
+ }
+ if (comm->rank == ranksToChild1[node]) {
+ NCCLCHECK(setTreeDown(&channel0->tree, ranksToParent, t0d1));
+ NCCLCHECK(setTreeDown(&channel1->tree, ranksToParent, t1d1));
+ }
+ if (comm->rank == ranksToParent[node] ||
+ comm->rank == ranksToChild0[node] ||
+ comm->rank == ranksToChild1[node]) {
+ INFO(NCCL_GRAPH, "Tree %d : %d -> %d -> %d/%d/%d", c, channel0->tree.up, comm->rank, channel0->tree.down[0], channel0->tree.down[1], channel0->tree.down[2]);
+ INFO(NCCL_GRAPH, "Tree %d : %d -> %d -> %d/%d/%d", c+nChannels, channel1->tree.up, comm->rank, channel1->tree.down[0], channel1->tree.down[1], channel1->tree.down[2]);
+ }
+ channel0->tree.depth = channel1->tree.depth = depth;
+ }
+ free(ranksToParent);
+ free(ranksToChild0);
+ free(ranksToChild1);
+ return ncclSuccess;
+}
+
+ncclResult_t ncclTopoConnectCollNet(struct ncclComm* comm, struct ncclTopoGraph* collNetGraph, int rank) {
+ int nranks = comm->nRanks;
+ int depth = nranks/comm->nNodes;
+ int sendIndex = collNetGraph->pattern == NCCL_TOPO_PATTERN_TREE ? 0 : 1; // send GPU index depends on topo pattern
+ int sendEndIndex = (sendIndex+comm->localRanks-1)%comm->localRanks;
+ for (int c=0; c<comm->nChannels/2; c++) {
+ struct ncclChannel* channel = comm->channels+c;
+ // Set root of collTree to id nranks
+ if (rank == collNetGraph->intra[sendIndex+c*comm->localRanks]) { // is master
+ channel->collTree.up = nranks;
+ }
+ if (rank == collNetGraph->intra[sendEndIndex+c*comm->localRanks]) { // is bottom of intra-node chain
+ channel->collTree.down[0] = -1;
+ }
+ channel->collTree.depth = depth;
+ INFO(NCCL_GRAPH, "CollNet Channel %d rank %d up %d down %d", c, rank, channel->collTree.up, channel->collTree.down[0]);
+ }
+ int recvIndex = 0; // recv GPU index is always 0
+ int recvEndIndex = (recvIndex+comm->localRanks-1)%comm->localRanks;
+ for (int c=0; c<comm->nChannels/2; c++) {
+ struct ncclChannel* channel = comm->channels+comm->nChannels/2+c;
+ // Set root of collTree to id nranks
+ if (rank == collNetGraph->intra[recvIndex+c*comm->localRanks]) { // is master
+ channel->collTree.up = nranks;
+ }
+ if (rank == collNetGraph->intra[recvEndIndex+c*comm->localRanks]) { // is bottom of intra-node chain
+ channel->collTree.down[0] = -1;
+ }
+ channel->collTree.depth = depth;
+ INFO(NCCL_GRAPH, "CollNet Channel %d rank %d up %d down %d", comm->nChannels/2+c, rank, channel->collTree.up, channel->collTree.down[0]);
}
- free(indexesSend);
- free(indexesRecv);
return ncclSuccess;
}
@@ -204,35 +231,33 @@ int ncclMaxNchannels() {
return maxNchannels;
}
-ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct ncclTopoRanks** allTopoRanks, int* rings) {
+ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePatterns, struct ncclTopoRanks** allTopoRanks, int* rings) {
// Gather data from all ranks
- int *ringRecv, *ringSend, *ringPrev, *ringNext, *treeUpRecv, *treeUpSend, *treeDnRecv,*treeDnSend;
+ int *ringRecv, *ringSend, *ringPrev, *ringNext, *treeToParent, *treeToChild0, *treeToChild1;
int nranks = comm->nRanks;
int nChannels = comm->nChannels;
NCCLCHECK(ncclCalloc(&ringRecv, nranks*MAXCHANNELS));
NCCLCHECK(ncclCalloc(&ringSend, nranks*MAXCHANNELS));
NCCLCHECK(ncclCalloc(&ringPrev, nranks*MAXCHANNELS));
NCCLCHECK(ncclCalloc(&ringNext, nranks*MAXCHANNELS));
- NCCLCHECK(ncclCalloc(&treeUpRecv, nranks*MAXCHANNELS));
- NCCLCHECK(ncclCalloc(&treeUpSend, nranks*MAXCHANNELS));
- NCCLCHECK(ncclCalloc(&treeDnRecv, nranks*MAXCHANNELS));
- NCCLCHECK(ncclCalloc(&treeDnSend, nranks*MAXCHANNELS));
+ NCCLCHECK(ncclCalloc(&treeToParent, nranks*MAXCHANNELS));
+ NCCLCHECK(ncclCalloc(&treeToChild0, nranks*MAXCHANNELS));
+ NCCLCHECK(ncclCalloc(&treeToChild1, nranks*MAXCHANNELS));
for (int i=0; i<nranks; i++) {
for (int c=0; c<nChannels;c++) {
ringRecv[c*nranks+i] = allTopoRanks[i]->ringRecv[c];
ringSend[c*nranks+i] = allTopoRanks[i]->ringSend[c];
ringPrev[c*nranks+i] = allTopoRanks[i]->ringPrev[c];
ringNext[c*nranks+i] = allTopoRanks[i]->ringNext[c];
- treeUpRecv[c*nranks+i] = allTopoRanks[i]->treeUpRecv[c];
- treeUpSend[c*nranks+i] = allTopoRanks[i]->treeUpSend[c];
- treeDnRecv[c*nranks+i] = allTopoRanks[i]->treeDnRecv[c];
- treeDnSend[c*nranks+i] = allTopoRanks[i]->treeDnSend[c];
+ treeToParent[c*nranks+i] = allTopoRanks[i]->treeToParent[c];
+ treeToChild0[c*nranks+i] = allTopoRanks[i]->treeToChild0[c];
+ treeToChild1[c*nranks+i] = allTopoRanks[i]->treeToChild1[c];
}
}
// Connect rings and trees. This should also duplicate the channels.
NCCLCHECK(connectRings(comm, ringRecv, ringSend, ringPrev, ringNext, firstRanks));
- NCCLCHECK(connectTrees(comm, treeUpRecv, treeUpSend, treeDnRecv, treeDnSend, firstRanks));
+ NCCLCHECK(connectTrees(comm, treeToParent, treeToChild0, treeToChild1, firstRanks, treePatterns));
// Duplicate ringPrev/ringNext for ncclBuildRing
memcpy(ringPrev+nChannels*nranks, ringPrev, nChannels*nranks*sizeof(int));
@@ -259,10 +284,9 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct nccl
free(ringSend);
free(ringPrev);
free(ringNext);
- free(treeUpRecv);
- free(treeUpSend);
- free(treeDnRecv);
- free(treeDnSend);
+ free(treeToParent);
+ free(treeToChild0);
+ free(treeToChild1);
return ncclSuccess;
}