diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-cuda-glibc211-i686.py | 2 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py | 2 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-mac-i386.py | 2 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-mac-x86_64.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/darwin-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/linux-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win32-mingw-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win32-vc-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win64-mingw-config.py | 2 | ||||
-rw-r--r-- | build_files/scons/config/win64-vc-config.py | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 28 | ||||
-rw-r--r-- | intern/cycles/kernel/SConscript | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 2 |
15 files changed, 33 insertions, 25 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index e2dda8553d6..acd01f43fcd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,7 +264,7 @@ option(WITH_CYCLES "Enable cycles Render Engine" ON) option(WITH_CYCLES_TEST "Build cycles test application" OFF) option(WITH_CYCLES_OSL "Build Cycles with OSL support" OFF) option(WITH_CYCLES_CUDA_BINARIES "Build cycles CUDA binaries" OFF) -set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) diff --git a/build_files/buildbot/config/user-config-cuda-glibc211-i686.py b/build_files/buildbot/config/user-config-cuda-glibc211-i686.py index 0e56c4326f9..69053d7ff39 100644 --- a/build_files/buildbot/config/user-config-cuda-glibc211-i686.py +++ b/build_files/buildbot/config/user-config-cuda-glibc211-i686.py @@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-i686' BF_INSTALLDIR = '../blender-install/linux-glibc211-i686' BF_NUMJOBS = 1 -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] diff --git a/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py b/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py index 59725522e18..c9b765f55ac 100644 --- a/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py +++ b/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py @@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-x86_64' BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64' BF_NUMJOBS = 1 -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] diff --git a/build_files/buildbot/config/user-config-mac-i386.py b/build_files/buildbot/config/user-config-mac-i386.py index e7b1b688100..60f8b5705ac 100644 --- a/build_files/buildbot/config/user-config-mac-i386.py +++ b/build_files/buildbot/config/user-config-mac-i386.py @@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' WITH_BF_CYCLES_CUDA_BINARIES = True BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc' -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] #Freestyle WITH_BF_FREESTYLE = True diff --git a/build_files/buildbot/config/user-config-mac-x86_64.py b/build_files/buildbot/config/user-config-mac-x86_64.py index f5f6bf84796..4fab8d6d566 100644 --- a/build_files/buildbot/config/user-config-mac-x86_64.py +++ b/build_files/buildbot/config/user-config-mac-x86_64.py @@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' WITH_BF_CYCLES_CUDA_BINARIES = True BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc' -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] #Freestyle WITH_BF_FREESTYLE = True diff --git a/build_files/scons/config/darwin-config.py b/build_files/scons/config/darwin-config.py index 0e413711210..f7192e3dd81 100644 --- a/build_files/scons/config/darwin-config.py +++ b/build_files/scons/config/darwin-config.py @@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc' -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] #Freestyle WITH_BF_FREESTYLE = True diff --git a/build_files/scons/config/linux-config.py b/build_files/scons/config/linux-config.py index 418d2ef41b8..6106142f8a9 100644 --- a/build_files/scons/config/linux-config.py +++ b/build_files/scons/config/linux-config.py @@ -210,7 +210,7 @@ WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc' -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] WITH_BF_OPENMP = True diff --git a/build_files/scons/config/win32-mingw-config.py b/build_files/scons/config/win32-mingw-config.py index 5d3c285f530..d71feb8d2e9 100644 --- a/build_files/scons/config/win32-mingw-config.py +++ b/build_files/scons/config/win32-mingw-config.py @@ -149,7 +149,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada' WITH_BF_CYCLES = True WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] WITH_BF_OIIO = True BF_OIIO = LIBDIR + '/openimageio' diff --git a/build_files/scons/config/win32-vc-config.py b/build_files/scons/config/win32-vc-config.py index bceab628b47..101d0402757 100644 --- a/build_files/scons/config/win32-vc-config.py +++ b/build_files/scons/config/win32-vc-config.py @@ -215,7 +215,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' #CUDA WITH_BF_CYCLES_CUDA_BINARIES = False #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] #Ray trace optimization WITH_BF_RAYOPTIMIZATION = True diff --git a/build_files/scons/config/win64-mingw-config.py b/build_files/scons/config/win64-mingw-config.py index b817f24da11..e82b6e8a73f 100644 --- a/build_files/scons/config/win64-mingw-config.py +++ b/build_files/scons/config/win64-mingw-config.py @@ -146,7 +146,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada' WITH_BF_CYCLES = True WITH_BF_CYCLES_CUDA_BINARIES = False BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] WITH_BF_OIIO = True BF_OIIO = LIBDIR + '/openimageio' diff --git a/build_files/scons/config/win64-vc-config.py b/build_files/scons/config/win64-vc-config.py index c7bf2fa0bd2..803cac144ab 100644 --- a/build_files/scons/config/win64-vc-config.py +++ b/build_files/scons/config/win64-vc-config.py @@ -212,7 +212,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib' #CUDA WITH_BF_CYCLES_CUDA_BINARIES = False #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler -BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30'] +BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35'] #Ray trace optimization WITH_BF_RAYOPTIMIZATION = True diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 1f96ed0ae83..27c54af5153 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -304,7 +304,7 @@ public: } } else { - /* CUDA 4.x */ + /* CUDA 5.x */ if(major == 1) { /* sm_1x */ arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 5e9dd15b812..2fa1393e935 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -130,6 +130,12 @@ if(WITH_CYCLES_CUDA_BINARIES) string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") + # warn for other versions + if(CUDA_VERSION MATCHES "50") + else() + message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported") + endif() + # build for each arch set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) @@ -139,12 +145,6 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") - # warn for other versions - if(CUDA_VERSION MATCHES "50") - else() - message(STATUS "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported") - endif() - # build flags depending on CUDA version and arch if(CUDA_VERSION LESS 50) # CUDA 4.x @@ -176,13 +176,17 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_math_flags "--use_fast_math") endif() - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC - DEPENDS ${cuda_sources}) + if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") + message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping") + else() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC + DEPENDS ${cuda_sources}) - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_cubin}) + endif() endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 353ec1ce9d8..6459c3ed183 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -88,6 +88,10 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: # build flags depending on CUDA version and arch if cuda_version < 50: + if arch == "sm_35": + print("Can't build kernel for CUDA sm_35 architecture, skipping") + continue + # CUDA 4.x if arch.startswith("sm_1"): # sm_1x diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 4fd1e9d8807..a67c55acf3d 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -517,7 +517,7 @@ __device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t& #else -/* somewhat slower version for SSE3 */ +/* somewhat slower version for SSE2 */ typedef int shuffle_swap_t; __device_inline const shuffle_swap_t shuffle_swap_identity(void) |