Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Sigg <csigg@google.com>2018-12-13 18:09:12 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2018-12-14 01:17:58 +0300
commit346fc49514741fe97a2707a7daffcf4e1785e0bc (patch)
treedb521bd27aee7dea6e89a71bb6e8d823d9d591d4
parentd08e9b5279133dbcc55195bd2eac2f069dda3e6d (diff)
Two temporary workarounds for cuda-clang issues.
-rw-r--r--src/collectives/device/functions.cu8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/collectives/device/functions.cu b/src/collectives/device/functions.cu
index 16f1865..1fb8108 100644
--- a/src/collectives/device/functions.cu
+++ b/src/collectives/device/functions.cu
@@ -56,9 +56,17 @@
// Must be consistent with the ncclFuncSet enum
__device__ ncclKern_t ncclFuncs[ncclCollCount*ncclNumOps*ncclNumTypes*2] = {
+// Don't try to initialize the host shadow copy of this device-side global
+// variable. There is no host pointer to a device-side function, which
+// confuses clang. This will be fixed in the next clang release.
+#if __CUDA_ARCH__
NCCL_FUNCS2B(ncclBroadcast),
NCCL_FUNCS2A(ncclReduce),
NCCL_FUNCS2B(ncclAllGather),
NCCL_FUNCS2A(ncclReduceScatter),
NCCL_FUNCS2A(ncclAllReduce)
+#endif
};
+
+// Workaround for https://reviews.llvm.org/D55580
+__device__ void ncclWorkaroundClangD55580() {}