Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNathan Luehr <nluehr@nvidia.com>2016-07-23 03:29:13 +0300
committerNathan Luehr <nluehr@nvidia.com>2016-07-25 20:10:30 +0300
commit55c42ad681e1df00e34281dc69dd2ea6745da149 (patch)
treed195556e5f806cb557f74b98dae182a4fdeacfe0 /src
parent7a1aa6b563e36f414f6fcc0990bc3523220f5be9 (diff)
Fixed redundant contexts in multi-process apps
Change-Id: If787014450fd281304f0c7baf01d25963e40905d
Diffstat (limited to 'src')
-rw-r--r--src/core.cu62
1 files changed, 34 insertions, 28 deletions
diff --git a/src/core.cu b/src/core.cu
index a7170a3..036991a 100644
--- a/src/core.cu
+++ b/src/core.cu
@@ -422,40 +422,46 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
canpeer = 0;
}
- if (canpeer) {
- cudaError_t err;
- err = cudaDeviceEnablePeerAccess(iDev, 0);
- if (err == cudaErrorPeerAccessAlreadyEnabled) {
- cudaGetLastError();
- } else if (err != cudaSuccess) {
- INFO("peer access failed between rank %d (dev %d) and rank %d (dev %d)\n",
- rank, myDev, iRank, iDev);
-
- canpeer = 0;
- }
- }
-
if (iPid == myPid) {
- if (canpeer || myDev == iDev) {
- INFO("rank access %d -> %d via P2P device mem", rank, iRank);
+ if (myDev == iDev) {
+ INFO("rank access %d -> %d via common device", rank, iRank);
comm->ptrs[i].local = ranks[myId].devptr;
comm->ptrs[i].remote = ranks[i].devptr;
comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
- } else { // go through hostmem
- INFO("rank access %d -> %d via zero-copy host mem", rank, iRank);
- if (j <= 2)
- *ringDirectFailed = 1;
- if (cudaHostGetDevicePointer(&comm->ptrs[i].local, ranks[myId].hostptr, 0) != cudaSuccess) {
- WARN("rank %d failed to map zero copy buffer to device", rank);
- commClearMaps(comm);
- return ncclUnhandledCudaError;
+ } else {
+ int peer_enabled = canpeer;
+ if (canpeer) {
+ cudaError_t p2pErr = cudaDeviceEnablePeerAccess(iDev, 0);
+ if (p2pErr == cudaErrorPeerAccessAlreadyEnabled) {
+ cudaGetLastError();
+ } else if (p2pErr != cudaSuccess) {
+ INFO("peer access failed between rank %d (dev %d) and rank %d (dev %d)\n",
+ rank, myDev, iRank, iDev);
+ peer_enabled = 0;
+ }
}
- if (cudaHostGetDevicePointer(&comm->ptrs[i].remote, ranks[i].hostptr, 0) != cudaSuccess) {
- WARN("rank %d failed to map %d's zero copy buffer to device", rank, iRank);
- commClearMaps(comm);
- return ncclUnhandledCudaError;
+
+ if (peer_enabled) {
+ INFO("rank access %d -> %d via P2P device mem", rank, iRank);
+ comm->ptrs[i].local = ranks[myId].devptr;
+ comm->ptrs[i].remote = ranks[i].devptr;
+ comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
+ } else { // go through hostmem
+ INFO("rank access %d -> %d via zero-copy host mem", rank, iRank);
+ if (j <= 2)
+ *ringDirectFailed = 1;
+ if (cudaHostGetDevicePointer(&comm->ptrs[i].local, ranks[myId].hostptr, 0) != cudaSuccess) {
+ WARN("rank %d failed to map zero copy buffer to device", rank);
+ commClearMaps(comm);
+ return ncclUnhandledCudaError;
+ }
+ if (cudaHostGetDevicePointer(&comm->ptrs[i].remote, ranks[i].hostptr, 0) != cudaSuccess) {
+ WARN("rank %d failed to map %d's zero copy buffer to device", rank, iRank);
+ commClearMaps(comm);
+ return ncclUnhandledCudaError;
+ }
+ comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
}
- comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
}
} else { // multi-process!
*ringDirectFailed = 1;