Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSylvain Jeaugey <sjeaugey@nvidia.com>2016-06-08 02:27:51 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2016-06-08 02:27:51 +0300
commitd5e507fc7f9579ae442db80a3f1b96b2b79c9465 (patch)
tree6d5d0be836abf06e64c5c1c6366866d6842d03e5 /src
parent620491a64915354a827839dd8a2b86651cb0c187 (diff)
Only call the CUDA runtime. That may fix #27.
Diffstat (limited to 'src')
-rw-r--r--src/core.cu19
-rw-r--r--src/libwrap.cu71
-rw-r--r--src/libwrap.h5
3 files changed, 9 insertions, 86 deletions
diff --git a/src/core.cu b/src/core.cu
index cec2794..6d26f87 100644
--- a/src/core.cu
+++ b/src/core.cu
@@ -36,7 +36,6 @@
#include <sched.h>
#include <fcntl.h>
#include <unistd.h>
-#include <cuda.h>
#include <cuda_runtime.h>
#include <string.h>
#include <errno.h>
@@ -110,7 +109,7 @@ typedef struct {
pid_t pid;
ncclMem* hostptr;
ncclMem* devptr;
- CUipcMemHandle devipc;
+ cudaIpcMemHandle_t devipc;
size_t buffSize;
} RankEntry;
@@ -299,7 +298,7 @@ static ncclResult_t populateRankInfo(RankEntry* info, int rank, ncclComm_t comm)
info->buffSize = comm->buffSize;
info->hostptr = comm->hostMem;
info->devptr = comm->devMem;
- if (wrapCuIpcGetMemHandle(&info->devipc, (CUdeviceptr)comm->devMem) != ncclSuccess) {
+ if (cudaIpcGetMemHandle(&info->devipc, (void*)comm->devMem) != cudaSuccess) {
WARN("rank %d failed to open CUDA IPC handle", rank);
return ncclUnhandledCudaError;
}
@@ -321,11 +320,11 @@ static ncclResult_t commClearMaps(ncclComm_t comm) {
case CLEANUP_NONE:
break;
case CLEANUP_CUIPC:
- res = wrapCuIpcCloseMemHandle((CUdeviceptr)comm->ptrs[d].cleanupHandle);
- if (res != ncclSuccess) {
+ cures = cudaIpcCloseMemHandle((void*)comm->ptrs[d].cleanupHandle);
+ if (cures != cudaSuccess) {
WARN("rank %d failed to close IPC handle to rank %d",
comm->userFromRing[comm->ncclId], comm->userFromRing[d]);
- retval = (retval == ncclSuccess) ? res : retval;
+ retval = (retval == ncclSuccess) ? ncclUnhandledCudaError : retval;
}
break;
case CLEANUP_UNMAP:
@@ -333,13 +332,13 @@ static ncclResult_t commClearMaps(ncclComm_t comm) {
if (cures != cudaSuccess) {
WARN("rank %d failed to unregister handle to rank %d",
comm->userFromRing[comm->ncclId], comm->userFromRing[d]);
- retval = (retval == ncclSuccess) ? ncclUnhandledCudaError : retval;
+ retval = (retval == ncclSuccess) ? ncclUnhandledCudaError : retval;
}
res = shmUnmap(comm->ptrs[d].cleanupHandle, offsetof(ncclMem, buff) + comm->buffSize);
if (res != ncclSuccess) {
WARN("rank %d failed to unmap handle to rank %d",
comm->userFromRing[comm->ncclId], comm->userFromRing[d]);
- retval = (retval == ncclSuccess) ? res : retval;
+ retval = (retval == ncclSuccess) ? res : retval;
}
break;
default:
@@ -462,8 +461,8 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
if (canpeer || myDev == iDev) {
INFO("rank access %d -> %d via Ipc P2P device mem", rank, iRank);
comm->ptrs[i].local = ranks[myId].devptr;
- if (wrapCuIpcOpenMemHandle((CUdeviceptr*)(&comm->ptrs[i].remote),
- ranks[i].devipc, CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS) != ncclSuccess) {
+ if (cudaIpcOpenMemHandle((void**)(&comm->ptrs[i].remote),
+ ranks[i].devipc, cudaIpcMemLazyEnablePeerAccess) != cudaSuccess) {
WARN("rank %d failed to open Ipc handle to rank %d", rank, iRank);
commClearMaps(comm);
return ncclUnhandledCudaError;
diff --git a/src/libwrap.cu b/src/libwrap.cu
index c4ae737..93cb818 100644
--- a/src/libwrap.cu
+++ b/src/libwrap.cu
@@ -41,12 +41,6 @@ static RetCode (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
static RetCode (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
static const char* (*nvmlInternalErrorString)(RetCode r);
-static CUresult (*cuInternalGetErrorString)(CUresult error, const char** pStr);
-static CUresult (*cuInternalIpcGetMemHandle)(CUipcMemHandle* pHandle, CUdeviceptr dptr);
-static CUresult (*cuInternalIpcOpenMemHandle)(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags);
-static CUresult (*cuInternalIpcCloseMemHandle)(CUdeviceptr dptr);
-
-
ncclResult_t wrapSymbols(void) {
if (symbolsLoaded)
@@ -93,11 +87,6 @@ ncclResult_t wrapSymbols(void) {
LOAD_SYM(nvmlhandle, "nvmlDeviceClearCpuAffinity", nvmlInternalDeviceClearCpuAffinity);
LOAD_SYM(nvmlhandle, "nvmlErrorString", nvmlInternalErrorString);
- LOAD_SYM(cuhandle, "cuGetErrorString", cuInternalGetErrorString);
- LOAD_SYM(cuhandle, "cuIpcGetMemHandle", cuInternalIpcGetMemHandle);
- LOAD_SYM(cuhandle, "cuIpcOpenMemHandle", cuInternalIpcOpenMemHandle);
- LOAD_SYM(cuhandle, "cuIpcCloseMemHandle", cuInternalIpcCloseMemHandle);
-
symbolsLoaded = 1;
return ncclSuccess;
@@ -109,11 +98,6 @@ ncclResult_t wrapSymbols(void) {
nvmlInternalDeviceSetCpuAffinity = NULL;
nvmlInternalDeviceClearCpuAffinity = NULL;
- cuInternalGetErrorString = NULL;
- cuInternalIpcGetMemHandle = NULL;
- cuInternalIpcOpenMemHandle = NULL;
- cuInternalIpcCloseMemHandle = NULL;
-
if (cuhandle != NULL) dlclose(cuhandle);
if (nvmlhandle != NULL) dlclose(nvmlhandle);
return ncclSystemError;
@@ -203,58 +187,3 @@ ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device) {
}
return ncclSuccess;
}
-
-ncclResult_t wrapCuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr) {
- if (cuInternalIpcGetMemHandle == NULL) {
- WARN("lib wrapper not initilaized.");
- return ncclLibWrapperNotSet;
- }
- CUresult ret = cuInternalIpcGetMemHandle(pHandle, dptr);
- if (ret != CUDA_SUCCESS) {
- const char* reason = NULL;
- cuInternalGetErrorString(ret, &reason);
- if (reason != NULL)
- WARN("cuInternalIpcGetMemHandle() failed: %s ", reason);
- else
- WARN("cuInternalIpcGetMemHandle() failed: %d ", ret);
- return ncclSystemError;
- }
- return ncclSuccess;
-}
-
-ncclResult_t wrapCuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags) {
- if (cuInternalIpcOpenMemHandle == NULL) {
- WARN("lib wrapper not initilaized.");
- return ncclLibWrapperNotSet;
- }
- CUresult ret = cuInternalIpcOpenMemHandle(pdptr, handle, Flags);
- if (ret != CUDA_SUCCESS) {
- const char* reason = NULL;
- cuInternalGetErrorString(ret, &reason);
- if (reason != NULL)
- WARN("cuInternalIpcOpenMemHandle() failed: %s ", reason);
- else
- WARN("cuInternalIpcOpenMemHandle() failed: %d ", ret);
- return ncclSystemError;
- }
- return ncclSuccess;
-}
-
-ncclResult_t wrapCuIpcCloseMemHandle(CUdeviceptr dptr) {
- if (cuInternalIpcCloseMemHandle == NULL) {
- WARN("lib wrapper not initilaized.");
- return ncclLibWrapperNotSet;
- }
- CUresult ret = cuInternalIpcCloseMemHandle(dptr);
- if (ret != CUDA_SUCCESS) {
- const char* reason = NULL;
- cuInternalGetErrorString(ret, &reason);
- if (reason != NULL)
- WARN("cuInternalIpcCloseMemHandle() failed: %s ", reason);
- else
- WARN("cuInternalIpcCloseMemHandle() failed: %d ", ret);
- return ncclSystemError;
- }
- return ncclSuccess;
-}
-
diff --git a/src/libwrap.h b/src/libwrap.h
index ad63f1e..b89f54d 100644
--- a/src/libwrap.h
+++ b/src/libwrap.h
@@ -33,7 +33,6 @@
#define SRC_LIBWRAP_H_
#include "core.h"
-#include "cuda.h"
typedef struct nvmlDevice_st* nvmlDevice_t;
@@ -46,9 +45,5 @@ ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device);
ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device);
-ncclResult_t wrapCuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
-ncclResult_t wrapCuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags);
-ncclResult_t wrapCuIpcCloseMemHandle(CUdeviceptr dptr);
-
#endif // End include guard