Merged PR 10999: Splitting up add_all.h into *.h, *.cu and *.inc

Splitting up header file into header and *.cu, comes with the price of having to include specializations for combinations of types as for element.inc and add.inc. No code changes otherwise. Add CMake options to disable specific compute capabilities. When run with `make -j16` this compiles in about 6 minutes instead of 7 minutes. Selecting only SM70 during compilation brings down the time to 3 minutes.
author: Martin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com> 2020-01-06 22:14:00 +0300
committer: Martin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com> 2020-01-06 22:14:00 +0300
commit: 164d26cc36204316324a2fc76f7538a044dd192a (patch)
tree: 630189bdeca50fe62c1b76b4e9c6b3e7ba24fca5 /CMakeLists.txt
parent: 88d998058917648510ebe66bf764a418ed1d3a8f (diff)
1 files changed, 17 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ab5460e6..c442931f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,6 +13,10 @@ set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")
 # Custom CMake options
 option(COMPILE_CPU "Compile CPU version" ON)
 option(COMPILE_CUDA "Compile GPU version" ON)
+option(COMPILE_CUDA_SM35 "Compile GPU version with SM35 support" ON)
+option(COMPILE_CUDA_SM50 "Compile GPU version with SM50 support" ON)
+option(COMPILE_CUDA_SM60 "Compile GPU version with SM60 support" ON)
+option(COMPILE_CUDA_SM70 "Compile GPU version with SM70 support" ON)
 option(COMPILE_EXAMPLES "Compile examples" OFF)
 option(COMPILE_SERVER "Compile marian-server" OFF)
 option(COMPILE_TESTS "Compile tests" OFF)
@@ -181,8 +185,6 @@ set(EXT_LIBS ${EXT_LIBS} ${CMAKE_DL_LIBS})
 
 if(COMPILE_CUDA)
 
-LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61;)
-
 if(USE_STATIC_LIBS)
   # link statically to stdlib libraries
   set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++")
@@ -202,10 +204,19 @@ if(CUDA_FOUND)
   if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2"))
       message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}")
   endif()
-
-  if(CUDA_VERSION VERSION_GREATER "8.0")
-    LIST(APPEND COMPUTE -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_70,code=compute_70)
-  endif()
+  
+  if(COMPILE_CUDA_SM35)
+    LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35;)                             # Tesla K40 and above
+  endif(COMPILE_CUDA_SM35)
+  if(COMPILE_CUDA_SM50)
+    LIST(APPEND COMPUTE -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52;)     # Maxwell GPUs
+  endif(COMPILE_CUDA_SM50)
+  if(COMPILE_CUDA_SM60)
+    LIST(APPEND COMPUTE -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61;)     # Pascal GPUs
+  endif(COMPILE_CUDA_SM60)
+  if(COMPILE_CUDA_SM70)
+    LIST(APPEND COMPUTE -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_70,code=compute_70) # Volta GPUs
+  endif(COMPILE_CUDA_SM70)
 
   if(USE_STATIC_LIBS)
     find_library(CUDA_culibos_LIBRARY NAMES culibos PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
author	Martin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>	2020-01-06 22:14:00 +0300
committer	Martin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>	2020-01-06 22:14:00 +0300
commit	164d26cc36204316324a2fc76f7538a044dd192a (patch)
tree	630189bdeca50fe62c1b76b4e9c6b3e7ba24fca5 /CMakeLists.txt
parent	88d998058917648510ebe66bf764a418ed1d3a8f (diff)