From a5052770b85fefe00511886429e6fc1f5056e1e8 Mon Sep 17 00:00:00 2001 From: Ray Molenkamp Date: Sat, 3 Feb 2018 10:59:09 -0700 Subject: cycles: Add an nvrtc based cubin cli compiler. nvcc is very picky regarding compiler versions, severely limiting the compiler we can use, this commit adds a nvrtc based compiler that'll allow us to build the cubins even if the host compiler is unsupported. for details see D2913. Differential Revision: http://developer.blender.org/D2913 --- intern/cycles/kernel/CMakeLists.txt | 90 +++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 39 deletions(-) (limited to 'intern/cycles/kernel/CMakeLists.txt') diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 8f7bc7996a4..3b76b3403e7 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -356,55 +356,67 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch name flags sources experimental) + set(cuda_cubin ${name}_${arch}.cubin) + set(cuda_kernel_src "/kernels/cuda/${name}.cu") + + set(cuda_flags + -D CCL_NAMESPACE_BEGIN= + -D CCL_NAMESPACE_END= + -D NVCC + -m ${CUDA_BITS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/.. + -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda + --use_fast_math + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + if(${experimental}) - set(flags ${flags} -D__KERNEL_EXPERIMENTAL__) + set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) set(name ${name}_experimental) endif() - set(cuda_cubin ${name}_${arch}.cubin) - if(WITH_CYCLES_DEBUG) - set(cuda_debug_flags "-D__KERNEL_DEBUG__") - else() - set(cuda_debug_flags "") + set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) endif() - set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE}) - set(cuda_nvcc_version ${CUDA_VERSION}) - - set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}") - set(cuda_math_flags "--use_fast_math") - - set(cuda_kernel_src "/kernels/cuda/${name}.cu") - - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${cuda_nvcc_command} - -arch=${arch} - ${CUDA_NVCC_FLAGS} - -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} - --ptxas-options="-v" - ${cuda_arch_flags} - ${cuda_version_flags} - ${cuda_math_flags} - ${flags} - ${cuda_debug_flags} - -I${CMAKE_CURRENT_SOURCE_DIR}/.. - -DCCL_NAMESPACE_BEGIN= - -DCCL_NAMESPACE_END= - -DNVCC - DEPENDS ${sources}) + if(WITH_CYCLES_CUBIN_COMPILER) + string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) + + # Needed to find libnvrtc-builtins.so. Can't do it from inside + # cycles_cubin_cc since the env variable is read before main() + if(APPLE) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") + elseif(UNIX) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUBIN_CC_ENV} + "$" + -target ${CUDA_ARCH} + -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + ${cuda_flags} + -v + -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" + DEPENDS ${sources} cycles_cubin_cc) + else() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + ${CUDA_NVCC_FLAGS} + --cubin + ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + --ptxas-options="-v" + ${cuda_flags} + DEPENDS ${sources}) + endif() delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND cuda_cubins ${cuda_cubin}) - unset(cuda_extra_flags) unset(cuda_debug_flags) - - unset(cuda_nvcc_command) - unset(cuda_nvcc_version) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) @@ -412,12 +424,12 @@ if(WITH_CYCLES_CUDA_BINARIES) message(STATUS "CUDA binaries for ${arch} disabled, not supported by CUDA 9.") else() # Compile regular kernel - CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE) CYCLES_CUDA_KERNEL_ADD(${arch} filter "" "${cuda_filter_sources}" FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE) if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) # Compile split kernel - CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D__SPLIT__" ${cuda_sources} FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE) endif() endif() endforeach() -- cgit v1.2.3