From a9644c812fc17b38503828d6edf7d259b6fe0e74 Mon Sep 17 00:00:00 2001 From: Patrick Mours Date: Fri, 17 Jul 2020 15:06:55 +0200 Subject: Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found This patch changes the discovery of pre-compiled kernels, to look for any PTX, even if it does not match the current architecture version exactly. It works because the driver can JIT-compile PTX generated for architectures less than or equal to the current one. This e.g. makes it possible to render on a new GPU architecture even if no pre-compiled binary kernel was distributed for it as part of the Blender installation. Reviewed By: brecht Differential Revision: https://developer.blender.org/D8332 --- intern/cycles/device/cuda/device_cuda_impl.cpp | 23 ++++++++++++++++++----- intern/cycles/kernel/CMakeLists.txt | 4 ++-- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 0be2c322dfa..3a2eb8df95b 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -383,11 +383,24 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu } } - const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; - if (path_exists(ptx)) { - VLOG(1) << "Using precompiled kernel."; - return ptx; + /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */ + int ptx_major = major, ptx_minor = minor; + while (ptx_major >= 3) { + const string ptx = path_get( + string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } + + if (ptx_minor > 0) { + ptx_minor--; + } + else { + ptx_major--; + ptx_minor = 9; + } } } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 7cc0d32d521..5533eeb006d 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -539,7 +539,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) ${SRC_UTIL_HEADERS} COMMAND ${CUBIN_CC_ENV} "$" - -target 30 + -target 52 -ptx -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} ${cuda_flags} @@ -563,7 +563,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} --ptx - -arch=sm_30 + -arch=sm_52 ${cuda_flags} ${input} WORKING_DIRECTORY -- cgit v1.2.3