Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Jeaugey <sjeaugey@nvidia.com>2016-04-19 20:47:27 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2016-04-19 20:47:27 +0300
commitdba3ec94281c052b943eb24ddee7cc3691caeaaf (patch)
tree8d6fe772e372cf4842db279e16b4cf5480503c35
parent9de361a1b97b55f1669107e94efc95c1a299234c (diff)
Fix random deadlock during ncclCommInitRank.
-rw-r--r--src/core.cu4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core.cu b/src/core.cu
index 6adeb36..cec2794 100644
--- a/src/core.cu
+++ b/src/core.cu
@@ -188,7 +188,7 @@ static void syncRingDirect(RankGather* gather, int* ringDirectOk) {
swapped = __sync_bool_compare_and_swap(&gather->bar, bar_tmp, bar_tmp+1);
} while(!swapped);
- while (gather->bar != 2*ndev) // Wait for all ranks to arrive at this second barrier
+ while (gather->bar < 2*ndev) // Wait for all ranks to arrive at this second barrier
sched_yield();
__sync_synchronize();
@@ -203,7 +203,7 @@ static ncclResult_t closeGather(RankGather* gather, int ndev) {
swapped = __sync_bool_compare_and_swap(&gather->bar, bar_tmp, bar_tmp+1);
} while(!swapped);
- while (gather->bar != 3*ndev) // Wait for all ranks to arrive at this third barrier
+ while (gather->bar < 3*ndev) // Wait for all ranks to arrive at this third barrier
sched_yield();
__sync_synchronize();