diff options
Diffstat (limited to 'makefiles/common.mk')
-rw-r--r-- | makefiles/common.mk | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/makefiles/common.mk b/makefiles/common.mk index 2ad5c73..37e81be 100644 --- a/makefiles/common.mk +++ b/makefiles/common.mk @@ -25,8 +25,7 @@ CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2) # Better define NVCC_GENCODE in your environment to the minimal set # of archs to reduce compile time. -CUDA8_GENCODE = -gencode=arch=compute_30,code=sm_30 \ - -gencode=arch=compute_35,code=sm_35 \ +CUDA8_GENCODE = -gencode=arch=compute_35,code=sm_35 \ -gencode=arch=compute_50,code=sm_50 \ -gencode=arch=compute_60,code=sm_60 \ -gencode=arch=compute_61,code=sm_61 @@ -46,7 +45,10 @@ endif CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisibility=hidden CXXFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -std=c++11 -Wvla CXXFLAGS += -I $(CUDA_INC) -NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -lineinfo -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all +# Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors) +# 512 : 120, 640 : 96, 768 : 80, 1024 : 60 +# We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions. +NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all # Use addprefix so that we can specify more than one path NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt |