diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2016-04-19 20:47:27 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2016-04-19 20:47:27 +0300 |
commit | dba3ec94281c052b943eb24ddee7cc3691caeaaf (patch) | |
tree | 8d6fe772e372cf4842db279e16b4cf5480503c35 | |
parent | 9de361a1b97b55f1669107e94efc95c1a299234c (diff) |
Fix random deadlock during ncclCommInitRank.
-rw-r--r-- | src/core.cu | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core.cu b/src/core.cu index 6adeb36..cec2794 100644 --- a/src/core.cu +++ b/src/core.cu @@ -188,7 +188,7 @@ static void syncRingDirect(RankGather* gather, int* ringDirectOk) { swapped = __sync_bool_compare_and_swap(&gather->bar, bar_tmp, bar_tmp+1); } while(!swapped); - while (gather->bar != 2*ndev) // Wait for all ranks to arrive at this second barrier + while (gather->bar < 2*ndev) // Wait for all ranks to arrive at this second barrier sched_yield(); __sync_synchronize(); @@ -203,7 +203,7 @@ static ncclResult_t closeGather(RankGather* gather, int ndev) { swapped = __sync_bool_compare_and_swap(&gather->bar, bar_tmp, bar_tmp+1); } while(!swapped); - while (gather->bar != 3*ndev) // Wait for all ranks to arrive at this third barrier + while (gather->bar < 3*ndev) // Wait for all ranks to arrive at this third barrier sched_yield(); __sync_synchronize(); |