NCCL 2.4.6-1

Added detection of IBM/Power NVLink bridge device. Add NUMA support to PCI distance calculations. Added NCCL_IGNORE_CPU_AFFINITY env var. Fix memory leaks; GithubIssue#180 Compiler warning fix; GithubIssue#178 Replace non-standard variable length arrays. GithubIssue#171 Fix Tree+Shared Memory crash. GithubPR#185 Fix LL cleanup hang during long running DL jobs. Fix NCCL_RINGS environment variable handling. Added extra checks to catch repeat calls to ncclCommDestroy() GithubIssue#191 Improve bootstrap socket connection reliability at scale. Fix hostname hashing issue. GithubIssue#187 Code cleanup to rename all non device files from *.cu to *.cc
author: David Addison <daddison@nvidia.com> 2019-03-15 05:39:20 +0300
committer: David Addison <daddison@nvidia.com> 2019-04-05 23:05:45 +0300
commit: f40ce73e8987d2990e4b9ef6c75f4b3423acce78 (patch)
tree: 8df24e6ebc127a82a6562eb60fc6e80590bb3c55 /src/collectives/broadcast.cc
parent: 14e0cf644b9ba2214f2b6d2e299e8218f6145d32 (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/src/collectives/broadcast.cc b/src/collectives/broadcast.cc
new file mode 100644
index 0000000..042301b
--- /dev/null
+++ b/src/collectives/broadcast.cc
@@ -0,0 +1,26 @@
+/*************************************************************************
+ * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * See LICENSE.txt for license information
+ ************************************************************************/
+
+#include "enqueue.h"
+#include "collectives.h"
+
+NCCL_API(ncclResult_t, ncclBroadcast, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, int root,
+    ncclComm_t comm, cudaStream_t stream);
+ncclResult_t ncclBroadcast(const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, int root,
+    ncclComm_t comm, cudaStream_t stream) {
+  struct ncclInfo info = { ncclCollBroadcast, "Broadcast",
+    sendbuff, recvbuff, count, datatype, ncclSum, root, comm, stream, /* Args */
+    BROADCAST_CHUNKSTEPS, BROADCAST_SLICESTEPS };
+  return ncclEnqueueCheck(&info);
+}
+/* Deprecated original "in place" function, similar to MPI */
+NCCL_API(ncclResult_t, ncclBcast, void* buff, size_t count, ncclDataType_t datatype, int root,
+    ncclComm_t comm, cudaStream_t stream);
+ncclResult_t ncclBcast(void* buff, size_t count, ncclDataType_t datatype, int root,
+    ncclComm_t comm, cudaStream_t stream) {
+  return ncclBroadcast(buff, buff, count, datatype, root, comm, stream);
+}
+
author	David Addison <daddison@nvidia.com>	2019-03-15 05:39:20 +0300
committer	David Addison <daddison@nvidia.com>	2019-04-05 23:05:45 +0300
commit	f40ce73e8987d2990e4b9ef6c75f4b3423acce78 (patch)
tree	8df24e6ebc127a82a6562eb60fc6e80590bb3c55 /src/collectives/broadcast.cc
parent	14e0cf644b9ba2214f2b6d2e299e8218f6145d32 (diff)