Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Addison <daddison@nvidia.com>2019-03-15 05:39:20 +0300
committerDavid Addison <daddison@nvidia.com>2019-04-05 23:05:45 +0300
commitf40ce73e8987d2990e4b9ef6c75f4b3423acce78 (patch)
tree8df24e6ebc127a82a6562eb60fc6e80590bb3c55 /src/collectives/device/reduce.h
parent14e0cf644b9ba2214f2b6d2e299e8218f6145d32 (diff)
NCCL 2.4.6-1
Added detection of IBM/Power NVLink bridge device. Add NUMA support to PCI distance calculations. Added NCCL_IGNORE_CPU_AFFINITY env var. Fix memory leaks; GithubIssue#180 Compiler warning fix; GithubIssue#178 Replace non-standard variable length arrays. GithubIssue#171 Fix Tree+Shared Memory crash. GithubPR#185 Fix LL cleanup hang during long running DL jobs. Fix NCCL_RINGS environment variable handling. Added extra checks to catch repeat calls to ncclCommDestroy() GithubIssue#191 Improve bootstrap socket connection reliability at scale. Fix hostname hashing issue. GithubIssue#187 Code cleanup to rename all non device files from *.cu to *.cc
Diffstat (limited to 'src/collectives/device/reduce.h')
-rw-r--r--src/collectives/device/reduce.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/collectives/device/reduce.h b/src/collectives/device/reduce.h
index 302d053..d2d5d3b 100644
--- a/src/collectives/device/reduce.h
+++ b/src/collectives/device/reduce.h
@@ -1,10 +1,10 @@
/*************************************************************************
- * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
-#include "core.h"
+#include "devcomm.h"
#include "primitives.h"
#include "collectives.h"
@@ -13,7 +13,7 @@ __device__ void ncclReduceRingKernel(struct CollectiveArgs* args) {
const int tid = threadIdx.x;
const int nthreads = blockDim.x - 1;
const int bid = args->bid;
- struct ncclComm* comm = args->comm;
+ struct ncclDevComm* comm = args->comm;
struct ncclChannel* channel = comm->channels+blockIdx.x;
struct ncclRing* ring = &channel->ring;
const ssize_t size = args->N;
@@ -55,7 +55,7 @@ __device__ void ncclReduceRingLLKernel(struct CollectiveArgs* args) {
const int tid = threadIdx.x;
const int bid = args->bid;
const int nthreads = args->nThreads;
- struct ncclComm* comm = args->comm;
+ struct ncclDevComm* comm = args->comm;
struct ncclChannel* channel = comm->channels+blockIdx.x;
struct ncclRing* ring = &channel->ring;