diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2016-06-17 02:50:14 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2016-06-17 03:04:41 +0300 |
commit | e51e9229245146a9c348ff63257f33fae0370cb0 (patch) | |
tree | 68f828f947f39188b013c5c86b4b3477b1253b7c /src | |
parent | 9fcc5234859643f0e9ceccdfa690ea5d0fd4ce14 (diff) |
Add a debug level to NCCL and CUDA versions at init
Diffstat (limited to 'src')
-rw-r--r-- | src/core.cu | 15 | ||||
-rw-r--r-- | src/core.h | 2 |
2 files changed, 16 insertions, 1 deletions
diff --git a/src/core.cu b/src/core.cu index cec2794..a7170a3 100644 --- a/src/core.cu +++ b/src/core.cu @@ -501,6 +501,8 @@ static void initDebug() { const char* nccl_debug = getenv("NCCL_DEBUG"); if (nccl_debug == NULL) { ncclDebugLevel = NONE; + } else if (strcmp(nccl_debug, "VERSION") == 0) { + ncclDebugLevel = VERSION; } else if (strcmp(nccl_debug, "WARN") == 0) { ncclDebugLevel = WARN; } else if (strcmp(nccl_debug, "INFO") == 0) { @@ -654,8 +656,19 @@ static ncclResult_t commUnlinkHostMem(ncclComm_t comm, ncclUniqueId commId, int return shmUnlink(rankname); } +static void showVersion() { + static int shown = 0; + if (shown == 0 && ncclDebugLevel >= VERSION) { + printf("NCCL version %d.%d.%d compiled with CUDA %d.%d\n", NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH, CUDA_MAJOR, CUDA_MINOR); + fflush(stdout); \ + shown = 1; + } +} + extern "C" DSOGLOBAL ncclResult_t ncclCommInitRank(ncclComm_t* newcomm, int ndev, ncclUniqueId commId, int myrank) { + if (myrank == 0) showVersion(); + if (strlen(commId.internal) < 1 || strlen(commId.internal) >= NCCL_UNIQUE_ID_BYTES) { WARN("rank %d invalid commId", myrank); @@ -735,6 +748,8 @@ extern "C" DSOGLOBAL ncclResult_t ncclCommInitAll(ncclComm_t* comms, int ndev, int* devlist) { initDebug(); + showVersion(); + ncclResult_t res; int savedDevice; RankEntry* ranks = NULL; @@ -110,7 +110,7 @@ struct ncclComm { ncclNodeRef ptrs[1]; }; -typedef enum {NONE=0, WARN=1, INFO=2, ABORT=3} DebugLevel; +typedef enum {NONE=0, VERSION=1, WARN=2, INFO=3, ABORT=4} DebugLevel; extern DebugLevel ncclDebugLevel; #define WARN(...) do { \ |