diff options
Diffstat (limited to 'src/graph/trees.cc')
-rw-r--r-- | src/graph/trees.cc | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/src/graph/trees.cc b/src/graph/trees.cc new file mode 100644 index 0000000..722e61b --- /dev/null +++ b/src/graph/trees.cc @@ -0,0 +1,106 @@ +/************************************************************************* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "nccl.h" + +#define RANK_TO_INDEX(r) (rank > root ? rank-1 : rank) + +/* Btree which alternates leaves and nodes. + * Assumes root is 0, which conveniently builds a tree on powers of two, + * (because we have pow2-1 ranks) which lets us manipulate bits. + * Find first non-zero bit, then : + * Find the parent : + * xx01[0] -> xx10[0] (1,5,9 below) or xx00[0] if xx10[0] is out of bounds (13 below) + * xx11[0] -> xx10[0] (3,7,11 below) + * Find the children : + * xx10[0] -> xx01[0] (2,4,6,8,10,12) or -1 (1,3,5,7,9,11,13) + * xx10[0] -> xx11[0] (2,4,6,8,10) or xx101[0] (12) or xx1001[0] ... or -1 (1,3,5,7,9,11,13) + * + * Illustration : + * 0---------------8 + * ______/ \______ + * 4 12 + * / \ / \ + * 2 6 10 \ + * / \ / \ / \ \ + * 1 3 5 7 9 11 13 + */ +ncclResult_t ncclGetBtree(int nranks, int rank, int* u, int* d0, int* d1) { + int up, down0, down1; + int bit; + for (bit=1; bit<nranks; bit<<=1) { + if (bit & rank) break; + } + + if (rank == 0) { + *u = -1; + *d0 = nranks > 1 ? bit >> 1 : -1; + *d1 = -1; + return ncclSuccess; + } + + up = (rank ^ bit) | (bit << 1); + if (up >= nranks) up = (rank ^ bit); + *u = up; + + int lowbit = bit >> 1; + // down0 is always within bounds + down0 = lowbit == 0 ? -1 : rank-lowbit; + + down1 = lowbit == 0 ? -1 : rank+lowbit; + // Make sure down1 is within bounds + while (down1 >= nranks) { + down1 = lowbit == 0 ? -1 : rank+lowbit; + lowbit >>= 1; + } + *d0 = down0; *d1 = down1; + + return ncclSuccess; +} + +/* Build a double binary tree. Take the previous tree for the first tree. + * For the second tree, we use a mirror tree (if nranks is odd) + * + * 8---------0---------5 + * ______/ \______ _____/ \______ + * 4 12 1 9 + * / \ / \ / \ + * 2 6 10 3 7 10 + * / \ / \ / \ / \ / \ / \ + * 1 3 5 7 9 11 2 4 6 8 11 12 + * + * or shift it by one rank (if nranks is even) + * + * 8---------0--------------9 + * ______/ \ ______/ \ + * 4 \ 5 \ + * / \ \ / \ \ + * 2 6 10 3 7 11 + * / \ / \ / \ / \ / \ / \ + * 1 3 5 7 9 11 2 4 6 8 10 1 + */ +ncclResult_t ncclGetDtree(int nranks, int rank, int* s0, int* d0_0, int* d0_1, int* s1, int* d1_0, int* d1_1) { + // First tree ... use a btree + ncclGetBtree(nranks, rank, s0, d0_0, d0_1); + // Second tree ... mirror or shift + if (nranks % 2 == 0) { + // shift + int shiftrank = (rank-1+nranks) % nranks; + int u, d0, d1; + ncclGetBtree(nranks, shiftrank, &u, &d0, &d1); + *s1 = u == -1 ? -1 : (u+1) % nranks; + *d1_0 = d0 == -1 ? -1 : (d0+1) % nranks; + *d1_1 = d1 == -1 ? -1 : (d1+1) % nranks; + } else { + // mirror + int u, d0, d1; + ncclGetBtree(nranks, nranks-1-rank, &u, &d0, &d1); + *s1 = u == -1 ? -1 : nranks-1-u; + *d1_0 = d0 == -1 ? -1 : nranks-1-d0; + *d1_1 = d1 == -1 ? -1 : nranks-1-d1; + } + return ncclSuccess; +} |