diff options
author | Patrick Mours <pmours@nvidia.com> | 2020-07-17 16:06:55 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-02-08 15:32:40 +0300 |
commit | 08aaa07adbd46e27f4226f29559be156f14a524b (patch) | |
tree | c1b123cadf013e9e9c9d42d0da8dc0be14643330 | |
parent | 510541563efa8f34e3ed6632e53aef31c3665a2f (diff) |
Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found
This patch changes the discovery of pre-compiled kernels, to look for any PTX, even if
it does not match the current architecture version exactly. It works because the driver can
JIT-compile PTX generated for architectures less than or equal to the current one.
This e.g. makes it possible to render on a new GPU architecture even if no pre-compiled
binary kernel was distributed for it as part of the Blender installation.
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D8332
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | build_files/cmake/config/blender_release.cmake | 2 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 4 |
4 files changed, 22 insertions, 9 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 83f547eb593..6f705ffbe44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF) option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) -set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 01a59e451aa..2d52fb22c86 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -52,7 +52,7 @@ set(WITH_USD ON CACHE BOOL "" FORCE) set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE) -set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE) +set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING "" FORCE) set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE) # platform dependent options diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index ba5d479e0e7..870f9f9ecf9 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -352,11 +352,24 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu } } - const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; - if (path_exists(ptx)) { - VLOG(1) << "Using precompiled kernel."; - return ptx; + /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */ + int ptx_major = major, ptx_minor = minor; + while (ptx_major >= 3) { + const string ptx = path_get( + string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } + + if (ptx_minor > 0) { + ptx_minor--; + } + else { + ptx_major--; + ptx_minor = 9; + } } } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 2e839a616e9..6ab0b9d39d2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -549,7 +549,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) ${SRC_UTIL_HEADERS} COMMAND ${CUBIN_CC_ENV} "$<TARGET_FILE:cycles_cubin_cc>" - -target 30 + -target 52 -ptx -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} ${cuda_flags} @@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} --ptx - -arch=sm_30 + -arch=sm_52 ${cuda_flags} ${input} WORKING_DIRECTORY |