From b14ec1860127eddf9a46132c4877bd4a7ee385a2 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 4 Dec 2018 12:34:59 +0100 Subject: Cycles: add initial CUDA 10.0 support, but only recommend use for Turing cards. There may still be rendering errors when used for older graphics cards. --- CMakeLists.txt | 2 +- build_files/buildbot/slave_compile.py | 1 - build_files/cmake/config/blender_release.cmake | 2 +- extern/cuew/src/cuew.c | 6 +++++- intern/cycles/CMakeLists.txt | 7 +++++++ intern/cycles/kernel/CMakeLists.txt | 6 ++++-- intern/cycles/kernel/kernels/cuda/kernel_config.h | 18 +++++++++++++++--- 7 files changed, 33 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7359151d9a3..1912b8f6a34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -408,7 +408,7 @@ option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF) option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) -set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_72 sm_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) diff --git a/build_files/buildbot/slave_compile.py b/build_files/buildbot/slave_compile.py index d5ec5630e39..375357670d7 100644 --- a/build_files/buildbot/slave_compile.py +++ b/build_files/buildbot/slave_compile.py @@ -126,7 +126,6 @@ if 'cmake' in builder: # Prepare CMake options needed to configure cuda binaries compilation, 64bit only. if bits == 64: cuda_cmake_options.append("-DWITH_CYCLES_CUDA_BINARIES=%s" % ('ON' if build_cubins else 'OFF')) - cuda_cmake_options.append("-DCYCLES_CUDA_BINARIES_ARCH=sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70") if build_cubins or 'cuda' in targets: cuda_cmake_options.append("-DCUDA_64_BIT_DEVICE_CODE=ON") diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 23d6b55b9c0..a43470f4e1e 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -54,7 +54,7 @@ set(WITH_X11_XF86VMODE ON CACHE BOOL "" FORCE) set(WITH_PLAYER ON CACHE BOOL "" FORCE) set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) -set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61 CACHE STRING "" FORCE) +set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_72;sm_75 CACHE STRING "" FORCE) # platform dependent options if(UNIX AND NOT APPLE) diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index ad216e66452..a22f6fda570 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -619,7 +619,11 @@ static int cuewNvrtcInit(void) { /* Library paths. */ #ifdef _WIN32 /* Expected in c:/windows/system or similar, no path needed. */ - const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL}; + const char *nvrtc_paths[] = {"nvrtc64_80.dll", + "nvrtc64_90.dll", + "nvrtc64_91.dll", + "nvrtc64_10_0.dll", + NULL}; #elif defined(__APPLE__) /* Default installation path. */ const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL}; diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 3028b7c67d3..8f0d838e881 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -315,6 +315,13 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) endif() endif() +# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC. +if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER) + if(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0) + message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.") + set(WITH_CYCLES_CUBIN_COMPILER OFF) + endif() +endif() # Subdirectories diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d4145225b77..3ba43a9e0bd 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -343,11 +343,11 @@ if(WITH_CYCLES_CUDA_BINARIES) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") # warn for other versions - if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91") + if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91" OR CUDA_VERSION MATCHES "100") else() message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " - "build may succeed but only CUDA 9.0 and 9.1 are officially supported") + "build may succeed but only CUDA 9.0, 9.1 and 10.0 are officially supported") endif() # build for each arch @@ -442,6 +442,8 @@ if(WITH_CYCLES_CUDA_BINARIES) foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) if(${arch} MATCHES "sm_2.") message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") + elseif(${arch} MATCHES "sm_7." AND NOT (${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)) + message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.") else() # Compile regular kernel CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE) diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h index 3808898c5ca..6d41dc15785 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -52,8 +52,8 @@ # define CUDA_KERNEL_MAX_REGISTERS 63 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 -/* 5.0, 5.2, 5.3, 6.0, 6.1 */ -#elif __CUDA_ARCH__ >= 500 +/* 5.x, 6.x */ +#elif __CUDA_ARCH__ <= 699 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 # define CUDA_BLOCK_MAX_THREADS 1024 @@ -62,13 +62,25 @@ /* tunable parameters */ # define CUDA_THREADS_BLOCK_WIDTH 16 /* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of registers */ -# if __CUDACC_VER_MAJOR__ == 9 && __CUDA_ARCH__ >= 600 +# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600 # define CUDA_KERNEL_MAX_REGISTERS 64 # else # define CUDA_KERNEL_MAX_REGISTERS 48 # endif # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 +/* 7.x */ +#elif __CUDA_ARCH__ <= 799 +# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 +# define CUDA_BLOCK_MAX_THREADS 1024 +# define CUDA_THREAD_MAX_REGISTERS 255 + +/* tunable parameters */ +# define CUDA_THREADS_BLOCK_WIDTH 16 +# define CUDA_KERNEL_MAX_REGISTERS 64 +# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72 + /* unknown architecture */ #else -- cgit v1.2.3 From f63da3dcf59f87b34aa916b2c65ce5a40a48fd92 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 4 Dec 2018 11:49:57 +0100 Subject: Buildbot: enable support for NVIDIA Turing cards in Cycles (like GTX 20xx). We currently only build the sm_7x kernels with CUDA 10.0, older cards still use 9.1 until rendering errors are solved for them. --- build_files/buildbot/slave_compile.py | 20 +++++++++++++++++--- intern/cycles/kernel/CMakeLists.txt | 22 +++++++++++++++++----- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/build_files/buildbot/slave_compile.py b/build_files/buildbot/slave_compile.py index 375357670d7..c282e3624c2 100644 --- a/build_files/buildbot/slave_compile.py +++ b/build_files/buildbot/slave_compile.py @@ -73,9 +73,6 @@ if 'cmake' in builder: if builder.endswith('x86_64_10_9_cmake'): cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64') cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.9') - # Used to trick CUDFA to see CLang as an older version. - # cmake_extra_options.append('-DCUDA_HOST_COMPILER=/usr/local/cuda-hack/clang') - # cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/nvcc') elif builder.startswith('win'): if builder.endswith('_vs2017'): @@ -121,6 +118,23 @@ if 'cmake' in builder: cmake_extra_options.extend(["-DCMAKE_C_COMPILER=/usr/bin/gcc-7", "-DCMAKE_CXX_COMPILER=/usr/bin/g++-7"]) + # Workaround to build only sm_7x kernels with CUDA 10, until + # older kernels work well with this version. + if builder.startswith('win'): + cmake_extra_options.append('-DCUDA_VERSION=9.1') + cmake_extra_options.append('-DCUDA_TOOLKIT_INCLUDE:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/include') + cmake_extra_options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1') + cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/bin/nvcc.exe') + cmake_extra_options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/nvcc.exe') + cmake_extra_options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0') + elif builder.startswith('linux'): + cmake_extra_options.append('-DCUDA_VERSION=9.1') + cmake_extra_options.append('-DCUDA_TOOLKIT_INCLDUE:PATH=/usr/local/cuda-9.1/include') + cmake_extra_options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-9.1') + cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-9.1/bin/nvcc') + cmake_extra_options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.0/bin/nvcc') + cmake_extra_options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.0') + cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file)) # Prepare CMake options needed to configure cuda binaries compilation, 64bit only. diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 3ba43a9e0bd..73ff7aeda15 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -397,17 +397,29 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) endif() - if(WITH_CYCLES_CUBIN_COMPILER) + # Workaround to build only sm_7x kernels with CUDA 10, until + # older kernels work well with this version. + if(DEFINED CUDA10_NVCC_EXECUTABLE AND (${arch} MATCHES "sm_7.")) + set(with_cubin_compiler OFF) + set(cuda_nvcc_executable "${CUDA10_NVCC_EXECUTABLE}") + set(cuda_toolkit_root_dir "${CUDA10_TOOLKIT_ROOT_DIR}") + else() + set(with_cubin_compiler ${WITH_CYCLES_CUBIN_COMPILER}) + set(cuda_nvcc_executable "${CUDA_NVCC_EXECUTABLE}") + set(cuda_toolkit_root_dir "${CUDA_TOOLKIT_ROOT_DIR}") + endif() + + if(with_cubin_compiler) string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) # Needed to find libnvrtc-builtins.so. Can't do it from inside # cycles_cubin_cc since the env variable is read before main() if(APPLE) set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") + -E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib") elseif(UNIX) set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") + -E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64") endif() add_custom_command( @@ -418,12 +430,12 @@ if(WITH_CYCLES_CUDA_BINARIES) -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} ${cuda_flags} -v - -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" + -cuda-toolkit-dir "${cuda_toolkit_root_dir}" DEPENDS ${kernel_sources} cycles_cubin_cc) else() add_custom_command( OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} + COMMAND ${cuda_nvcc_executable} -arch=${arch} ${CUDA_NVCC_FLAGS} --cubin -- cgit v1.2.3