Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'makefiles/common.mk')
-rw-r--r--makefiles/common.mk8
1 files changed, 5 insertions, 3 deletions
diff --git a/makefiles/common.mk b/makefiles/common.mk
index 2ad5c73..37e81be 100644
--- a/makefiles/common.mk
+++ b/makefiles/common.mk
@@ -25,8 +25,7 @@ CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2)
# Better define NVCC_GENCODE in your environment to the minimal set
# of archs to reduce compile time.
-CUDA8_GENCODE = -gencode=arch=compute_30,code=sm_30 \
- -gencode=arch=compute_35,code=sm_35 \
+CUDA8_GENCODE = -gencode=arch=compute_35,code=sm_35 \
-gencode=arch=compute_50,code=sm_50 \
-gencode=arch=compute_60,code=sm_60 \
-gencode=arch=compute_61,code=sm_61
@@ -46,7 +45,10 @@ endif
CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisibility=hidden
CXXFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -std=c++11 -Wvla
CXXFLAGS += -I $(CUDA_INC)
-NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -lineinfo -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
+# Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors)
+# 512 : 120, 640 : 96, 768 : 80, 1024 : 60
+# We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions.
+NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
# Use addprefix so that we can specify more than one path
NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt