Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/graph/trees.cc')
-rw-r--r--src/graph/trees.cc106
1 files changed, 106 insertions, 0 deletions
diff --git a/src/graph/trees.cc b/src/graph/trees.cc
new file mode 100644
index 0000000..722e61b
--- /dev/null
+++ b/src/graph/trees.cc
@@ -0,0 +1,106 @@
+/*************************************************************************
+ * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * See LICENSE.txt for license information
+ ************************************************************************/
+
+#include "nccl.h"
+
+#define RANK_TO_INDEX(r) (rank > root ? rank-1 : rank)
+
+/* Btree which alternates leaves and nodes.
+ * Assumes root is 0, which conveniently builds a tree on powers of two,
+ * (because we have pow2-1 ranks) which lets us manipulate bits.
+ * Find first non-zero bit, then :
+ * Find the parent :
+ * xx01[0] -> xx10[0] (1,5,9 below) or xx00[0] if xx10[0] is out of bounds (13 below)
+ * xx11[0] -> xx10[0] (3,7,11 below)
+ * Find the children :
+ * xx10[0] -> xx01[0] (2,4,6,8,10,12) or -1 (1,3,5,7,9,11,13)
+ * xx10[0] -> xx11[0] (2,4,6,8,10) or xx101[0] (12) or xx1001[0] ... or -1 (1,3,5,7,9,11,13)
+ *
+ * Illustration :
+ * 0---------------8
+ * ______/ \______
+ * 4 12
+ * / \ / \
+ * 2 6 10 \
+ * / \ / \ / \ \
+ * 1 3 5 7 9 11 13
+ */
+ncclResult_t ncclGetBtree(int nranks, int rank, int* u, int* d0, int* d1) {
+ int up, down0, down1;
+ int bit;
+ for (bit=1; bit<nranks; bit<<=1) {
+ if (bit & rank) break;
+ }
+
+ if (rank == 0) {
+ *u = -1;
+ *d0 = nranks > 1 ? bit >> 1 : -1;
+ *d1 = -1;
+ return ncclSuccess;
+ }
+
+ up = (rank ^ bit) | (bit << 1);
+ if (up >= nranks) up = (rank ^ bit);
+ *u = up;
+
+ int lowbit = bit >> 1;
+ // down0 is always within bounds
+ down0 = lowbit == 0 ? -1 : rank-lowbit;
+
+ down1 = lowbit == 0 ? -1 : rank+lowbit;
+ // Make sure down1 is within bounds
+ while (down1 >= nranks) {
+ down1 = lowbit == 0 ? -1 : rank+lowbit;
+ lowbit >>= 1;
+ }
+ *d0 = down0; *d1 = down1;
+
+ return ncclSuccess;
+}
+
+/* Build a double binary tree. Take the previous tree for the first tree.
+ * For the second tree, we use a mirror tree (if nranks is odd)
+ *
+ * 8---------0---------5
+ * ______/ \______ _____/ \______
+ * 4 12 1 9
+ * / \ / \ / \
+ * 2 6 10 3 7 10
+ * / \ / \ / \ / \ / \ / \
+ * 1 3 5 7 9 11 2 4 6 8 11 12
+ *
+ * or shift it by one rank (if nranks is even)
+ *
+ * 8---------0--------------9
+ * ______/ \ ______/ \
+ * 4 \ 5 \
+ * / \ \ / \ \
+ * 2 6 10 3 7 11
+ * / \ / \ / \ / \ / \ / \
+ * 1 3 5 7 9 11 2 4 6 8 10 1
+ */
+ncclResult_t ncclGetDtree(int nranks, int rank, int* s0, int* d0_0, int* d0_1, int* s1, int* d1_0, int* d1_1) {
+ // First tree ... use a btree
+ ncclGetBtree(nranks, rank, s0, d0_0, d0_1);
+ // Second tree ... mirror or shift
+ if (nranks % 2 == 0) {
+ // shift
+ int shiftrank = (rank-1+nranks) % nranks;
+ int u, d0, d1;
+ ncclGetBtree(nranks, shiftrank, &u, &d0, &d1);
+ *s1 = u == -1 ? -1 : (u+1) % nranks;
+ *d1_0 = d0 == -1 ? -1 : (d0+1) % nranks;
+ *d1_1 = d1 == -1 ? -1 : (d1+1) % nranks;
+ } else {
+ // mirror
+ int u, d0, d1;
+ ncclGetBtree(nranks, nranks-1-rank, &u, &d0, &d1);
+ *s1 = u == -1 ? -1 : nranks-1-u;
+ *d1_0 = d0 == -1 ? -1 : nranks-1-d0;
+ *d1_1 = d1 == -1 ? -1 : nranks-1-d1;
+ }
+ return ncclSuccess;
+}