diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-cuda-glibc211-i686.py | 2 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/darwin-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/linux-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win32-mingw-config.py | 4 | ||||
-rw-r--r-- | build_files/scons/config/win32-vc-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win64-mingw-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win64-vc-config.py | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/SConscript | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cu | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cuda.h | 2 |
14 files changed, 28 insertions, 22 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index cb104149d33..53dcb0ce4ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,7 +264,7 @@ option(WITH_CYCLES_STANDALONE "Build cycles standalone application" OFF) option(WITH_CYCLES_STANDALONE_GUI "Build cycles standalone with GUI" OFF) option(WITH_CYCLES_OSL "Build Cycles with OSL support" OFF) option(WITH_CYCLES_CUDA_BINARIES "Build cycles CUDA binaries" OFF) -set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 sm_50 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) diff --git a/build_files/buildbot/config/user-config-cuda-glibc211-i686.py b/build_files/buildbot/config/user-config-cuda-glibc211-i686.py index 69053d7ff39..854f535398b 100644 --- a/build_files/buildbot/config/user-config-cuda-glibc211-i686.py +++ b/build_files/buildbot/config/user-config-cuda-glibc211-i686.py @@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-i686' BF_INSTALLDIR = '../blender-install/linux-glibc211-i686' BF_NUMJOBS = 1 -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] diff --git a/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py b/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py index c9b765f55ac..7e928948762 100644 --- a/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py +++ b/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py @@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-x86_64' BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64' BF_NUMJOBS = 1 -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] diff --git a/build_files/scons/config/darwin-config.py b/build_files/scons/config/darwin-config.py index 2f77c6be6e9..aac7ed4fa04 100644 --- a/build_files/scons/config/darwin-config.py +++ b/build_files/scons/config/darwin-config.py @@ -199,7 +199,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc' -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] #Freestyle WITH_BF_FREESTYLE = True diff --git a/build_files/scons/config/linux-config.py b/build_files/scons/config/linux-config.py index ce2d07f782c..8f2c5ca30f4 100644 --- a/build_files/scons/config/linux-config.py +++ b/build_files/scons/config/linux-config.py @@ -206,7 +206,7 @@ WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc' -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] WITH_BF_OPENMP = True diff --git a/build_files/scons/config/win32-mingw-config.py b/build_files/scons/config/win32-mingw-config.py index 3a5a02fb385..a6d1a7d7996 100644 --- a/build_files/scons/config/win32-mingw-config.py +++ b/build_files/scons/config/win32-mingw-config.py @@ -145,7 +145,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada' WITH_BF_CYCLES = True WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] WITH_BF_OIIO = True BF_OIIO = LIBDIR + '/openimageio' @@ -175,7 +175,7 @@ WITH_BF_OPENMP = True #CUDA WITH_BF_CYCLES_CUDA_BINARIES = False #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] #Freestyle WITH_BF_FREESTYLE = True diff --git a/build_files/scons/config/win32-vc-config.py b/build_files/scons/config/win32-vc-config.py index bb9bcc310ab..16b105d188d 100644 --- a/build_files/scons/config/win32-vc-config.py +++ b/build_files/scons/config/win32-vc-config.py @@ -226,7 +226,7 @@ WITH_BF_CYCLES_CUDA_BINARIES = False if VC_VERSION == '11.0': BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] else: - BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] + BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] #Ray trace optimization WITH_BF_RAYOPTIMIZATION = True diff --git a/build_files/scons/config/win64-mingw-config.py b/build_files/scons/config/win64-mingw-config.py index 6efbf5b7197..dcdea6583d7 100644 --- a/build_files/scons/config/win64-mingw-config.py +++ b/build_files/scons/config/win64-mingw-config.py @@ -144,7 +144,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada' WITH_BF_CYCLES = True WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] WITH_BF_OIIO = True BF_OIIO = LIBDIR + '/openimageio' diff --git a/build_files/scons/config/win64-vc-config.py b/build_files/scons/config/win64-vc-config.py index 9c32972572c..3ec284ed34a 100644 --- a/build_files/scons/config/win64-vc-config.py +++ b/build_files/scons/config/win64-vc-config.py @@ -224,7 +224,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' #CUDA WITH_BF_CYCLES_CUDA_BINARIES = False #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50'] #Ray trace optimization WITH_BF_RAYOPTIMIZATION = True diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0d2f6cdfe19..72453e83864 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -222,7 +222,7 @@ public: /* In order to use full 6GB of memory on Titan cards, use arrays instead * of textures. On earlier cards this seems slower, but on Titan it is * actually slightly faster in tests. */ - use_texture_storage = (cuDevArchitecture < 350); + use_texture_storage = (cuDevArchitecture < 300); cuda_pop_context(); } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 1527d154c86..d18f4fa2998 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -146,11 +146,11 @@ if(WITH_CYCLES_CUDA_BINARIES) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") # warn for other versions - if(CUDA_VERSION MATCHES "50") + if(CUDA_VERSION MATCHES "60") else() message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " - "build may succeed but only CUDA 5.0 is officially supported") + "build may succeed but only CUDA 6.0 is officially supported") endif() # build for each arch @@ -162,8 +162,10 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_math_flags "--use_fast_math") - - if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") + + if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50") + message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping") + elseif(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping") else() add_custom_command( diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 5316ec9926b..04e1bad7538 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -69,8 +69,8 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0] cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1]) - if cuda_version != 50: - print("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported." % (cuda_version/10, cuda_version%10)) + if cuda_version != 60: + print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10)) # nvcc flags nvcc_flags = "-m%s" % (bits) @@ -85,6 +85,10 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: # add command for each cuda architecture for arch in cuda_archs: + if cuda_version < 60 and arch == "sm_50": + print("Can't build kernel for CUDA sm_50 architecture, skipping") + continue + cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) if env['BF_CYCLES_CUDA_ENV']: diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index d91c6d97123..636e48b5456 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -49,8 +49,8 @@ /* tunable parameters */ #define CUDA_THREADS_BLOCK_WIDTH 16 -#define CUDA_KERNEL_MAX_REGISTERS 32 -#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 /* 5.0 */ #elif __CUDA_ARCH__ == 500 @@ -61,8 +61,8 @@ /* tunable parameters */ #define CUDA_THREADS_BLOCK_WIDTH 16 -#define CUDA_KERNEL_MAX_REGISTERS 32 -#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 /* unknown architecture */ #else diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 15e7353ec38..e4c20d26ff1 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -60,7 +60,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; /* In order to use full 6GB of memory on Titan cards, use arrays instead * of textures. On earlier cards this seems slower, but on Titan it is * actually slightly faster in tests. */ -#if __CUDA_ARCH__ < 350 +#if __CUDA_ARCH__ < 300 #define __KERNEL_CUDA_TEX_STORAGE__ #endif |