diff options
author | Stefan Werner <stefan.werner@tangent-animation.com> | 2019-10-16 11:29:04 +0300 |
---|---|---|
committer | Stefan Werner <stefan.werner@tangent-animation.com> | 2019-10-16 11:29:04 +0300 |
commit | 35a545b752319833f8a231ea4ebbb46fc2650515 (patch) | |
tree | 398f85040d71b69a8e59153f3bf3e7120e8a2fdb /intern/cycles | |
parent | 76e8d2cc174315e3f71b561872e4fb88cfd75743 (diff) |
Cycles: Allow PTX targets for CUDA kernel build.
This is intended for developers on Windows primarily:
Now, CUDA architectures of type compute_xx are supported. This allows for quicker builds,
at the expense of the CUDA driver running ptxas the first time a kernel is loaded.
Differential Revision: https://developer.blender.org/D5953
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 24 |
2 files changed, 23 insertions, 7 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 00dd37f089c..b5e10b0c2cb 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -454,6 +454,12 @@ class CUDADevice : public Device { VLOG(1) << "Using precompiled kernel."; return cubin; } + const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } } const string common_cflags = compile_kernel_get_common_cflags( diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index ea8aa197b6f..78da584e132 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -389,11 +389,20 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) - set(cuda_cubin ${name}_${arch}.cubin) + if(${arch} MATCHES "compute_.*") + set(format "ptx") + else() + set(format "cubin") + endif() + set(cuda_file ${name}_${arch}.${format}) set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + if(${prev_arch} MATCHES "compute_.*") + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) + else() + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + endif() endif() set(cuda_kernel_src "/kernels/cuda/${name}.cu") @@ -406,7 +415,7 @@ if(WITH_CYCLES_CUDA_BINARIES) -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda --use_fast_math - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}) if(${experimental}) set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) @@ -440,20 +449,21 @@ if(WITH_CYCLES_CUDA_BINARIES) -v -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" DEPENDS ${kernel_sources} cycles_cubin_cc) + set(cuda_file ${cuda_cubin}) else() add_custom_command( - OUTPUT ${cuda_cubin} + OUTPUT ${cuda_file} COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} ${CUDA_NVCC_FLAGS} - --cubin + --${format} ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} --ptxas-options="-v" ${cuda_flags} DEPENDS ${kernel_sources}) endif() - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_file}) unset(cuda_debug_flags) endmacro() |