From 35a545b752319833f8a231ea4ebbb46fc2650515 Mon Sep 17 00:00:00 2001 From: Stefan Werner Date: Wed, 16 Oct 2019 10:29:04 +0200 Subject: Cycles: Allow PTX targets for CUDA kernel build. This is intended for developers on Windows primarily: Now, CUDA architectures of type compute_xx are supported. This allows for quicker builds, at the expense of the CUDA driver running ptxas the first time a kernel is loaded. Differential Revision: https://developer.blender.org/D5953 --- intern/cycles/device/device_cuda.cpp | 6 ++++++ intern/cycles/kernel/CMakeLists.txt | 24 +++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 00dd37f089c..b5e10b0c2cb 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -454,6 +454,12 @@ class CUDADevice : public Device { VLOG(1) << "Using precompiled kernel."; return cubin; } + const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } } const string common_cflags = compile_kernel_get_common_cflags( diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index ea8aa197b6f..78da584e132 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -389,11 +389,20 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) - set(cuda_cubin ${name}_${arch}.cubin) + if(${arch} MATCHES "compute_.*") + set(format "ptx") + else() + set(format "cubin") + endif() + set(cuda_file ${name}_${arch}.${format}) set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + if(${prev_arch} MATCHES "compute_.*") + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) + else() + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + endif() endif() set(cuda_kernel_src "/kernels/cuda/${name}.cu") @@ -406,7 +415,7 @@ if(WITH_CYCLES_CUDA_BINARIES) -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda --use_fast_math - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}) if(${experimental}) set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) @@ -440,20 +449,21 @@ if(WITH_CYCLES_CUDA_BINARIES) -v -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" DEPENDS ${kernel_sources} cycles_cubin_cc) + set(cuda_file ${cuda_cubin}) else() add_custom_command( - OUTPUT ${cuda_cubin} + OUTPUT ${cuda_file} COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} ${CUDA_NVCC_FLAGS} - --cubin + --${format} ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} --ptxas-options="-v" ${cuda_flags} DEPENDS ${kernel_sources}) endif() - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_file}) unset(cuda_debug_flags) endmacro() -- cgit v1.2.3