From 86c61ce64f6e8c921d8770fcd42ed2c21d01ca3a Mon Sep 17 00:00:00 2001 From: Ray Molenkamp Date: Thu, 26 Mar 2020 11:41:44 -0600 Subject: Cycles: Restore cycles_cubin_cc to working order Reviewed by: brecht pmoursnv Differential Revision: https://developer.blender.org/D7136 --- intern/cycles/CMakeLists.txt | 6 +-- intern/cycles/app/cycles_cubin_cc.cpp | 31 ++++++++---- intern/cycles/kernel/CMakeLists.txt | 75 ++++++++++++++++++++++-------- intern/cycles/kernel/kernel_compat_cuda.h | 7 ++- intern/cycles/kernel/kernel_compat_optix.h | 8 ++-- intern/cycles/util/util_static_assert.h | 4 +- 6 files changed, 91 insertions(+), 40 deletions(-) (limited to 'intern') diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 024a94ea437..121c8bdad6e 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -313,9 +313,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) set(MAX_MSVC 1910) elseif(${CUDA_VERSION} EQUAL "9.1") set(MAX_MSVC 1911) - elseif(${CUDA_VERSION} EQUAL "10.0") - set(MAX_MSVC 1999) - elseif(${CUDA_VERSION} EQUAL "10.1") + elseif(${CUDA_VERSION} LESS "11.0") set(MAX_MSVC 1999) endif() if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang") @@ -332,7 +330,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) endif() # NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC. -if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER) +if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE) if(NOT (${CUDA_VERSION} VERSION_LESS 10.0)) message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.") set(WITH_CYCLES_CUBIN_COMPILER OFF) diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp index 9d4d52b34ac..7631cb9bed5 100644 --- a/intern/cycles/app/cycles_cubin_cc.cpp +++ b/intern/cycles/app/cycles_cubin_cc.cpp @@ -43,7 +43,8 @@ template std::string to_string(const T &n) class CompilationSettings { public: - CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false) + CompilationSettings() + : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false) { } @@ -57,12 +58,13 @@ class CompilationSettings { int bits; bool verbose; bool fast_math; + bool ptx_only; }; static bool compile_cuda(CompilationSettings &settings) { - const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"}; - const char *header_content[] = {"\n", "\n", "\n", "\n"}; + const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"}; + const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"}; printf("Building %s\n", settings.input_file.c_str()); @@ -83,6 +85,8 @@ static bool compile_cuda(CompilationSettings &settings) options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion())); options.push_back("-arch=compute_" + std::to_string(settings.target_arch)); options.push_back("--device-as-default-execution-space"); + options.push_back("-DCYCLES_CUBIN_CC"); + options.push_back("--std=c++11"); if (settings.fast_math) options.push_back("--use_fast_math"); @@ -134,10 +138,14 @@ static bool compile_cuda(CompilationSettings &settings) fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result); return false; } - - /* Write a file in the temp folder with the ptx code. */ - settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + - OIIO::Filesystem::unique_path(); + if (settings.ptx_only) { + settings.ptx_file = settings.output_file; + } + else { + /* Write a file in the temp folder with the ptx code. */ + settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + + OIIO::Filesystem::unique_path(); + } FILE *f = fopen(settings.ptx_file.c_str(), "wb"); fwrite(&ptx_code[0], 1, ptx_size, f); fclose(f); @@ -249,6 +257,9 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s "-D %L", &settings.defines, "Add additional defines", + "-ptx", + &settings.ptx_only, + "emit PTX code", "-v", &settings.verbose, "Use verbose logging", @@ -303,8 +314,10 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } - if (!link_ptxas(settings)) { - exit(EXIT_FAILURE); + if (!settings.ptx_only) { + if (!link_ptxas(settings)) { + exit(EXIT_FAILURE); + } } return 0; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 0dd0da65f82..3264b5afea2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -452,7 +452,7 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() add_custom_command( - OUTPUT ${cuda_cubin} + OUTPUT ${cuda_file} COMMAND ${CUBIN_CC_ENV} "$" -target ${CUDA_ARCH} @@ -461,7 +461,6 @@ if(WITH_CYCLES_CUDA_BINARIES) -v -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" DEPENDS ${kernel_sources} cycles_cubin_cc) - set(cuda_file ${cuda_cubin}) else() add_custom_command( OUTPUT ${cuda_file} @@ -517,7 +516,6 @@ if(WITH_CYCLES_DEVICE_OPTIX) -I "${OPTIX_INCLUDE_DIR}" -I "${CMAKE_CURRENT_SOURCE_DIR}/.." -I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda" - -arch=sm_30 --use_fast_math -o ${output}) @@ -525,25 +523,62 @@ if(WITH_CYCLES_DEVICE_OPTIX) set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) endif() + if(WITH_CYCLES_CUBIN_COMPILER) - add_custom_command( - OUTPUT - ${output} - DEPENDS - ${input} - ${SRC_HEADERS} - ${SRC_KERNELS_CUDA_HEADERS} - ${SRC_KERNELS_OPTIX_HEADERS} - ${SRC_BVH_HEADERS} - ${SRC_SVM_HEADERS} - ${SRC_GEOM_HEADERS} - ${SRC_CLOSURE_HEADERS} - ${SRC_UTIL_HEADERS} - COMMAND - ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input} - WORKING_DIRECTORY - "${CMAKE_CURRENT_SOURCE_DIR}") + # Needed to find libnvrtc-builtins.so. Can't do it from inside + # cycles_cubin_cc since the env variable is read before main() + if(APPLE) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") + elseif(UNIX) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + add_custom_command( + OUTPUT ${output} + DEPENDS + ${input} + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPTIX_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_UTIL_HEADERS} + COMMAND ${CUBIN_CC_ENV} + "$" + -target 30 + -ptx + -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} + ${cuda_flags} + -v + -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" + DEPENDS ${kernel_sources} cycles_cubin_cc) + else() + add_custom_command( + OUTPUT + ${output} + DEPENDS + ${input} + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPTIX_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_UTIL_HEADERS} + COMMAND + ${CUDA_NVCC_EXECUTABLE} + --ptx + -arch=sm_30 + ${cuda_flags} + ${input} + WORKING_DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}") + endif() list(APPEND optix_ptx ${output}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 4f508d7cdaa..3c5a10540d5 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -37,8 +37,11 @@ typedef unsigned long long uint64_t; typedef unsigned short half; typedef unsigned long long CUtexObject; -#define FLT_MIN 1.175494350822287507969e-38f -#define FLT_MAX 340282346638528859811704183484516925440.0f +#ifdef CYCLES_CUBIN_CC +# define FLT_MIN 1.175494350822287507969e-38f +# define FLT_MAX 340282346638528859811704183484516925440.0f +# define FLT_EPSILON 1.192092896e-07F +#endif __device__ half __float2half(const float f) { diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h index 61b9d87a020..7068acc3a32 100644 --- a/intern/cycles/kernel/kernel_compat_optix.h +++ b/intern/cycles/kernel/kernel_compat_optix.h @@ -35,9 +35,11 @@ typedef unsigned int uint32_t; typedef unsigned long long uint64_t; typedef unsigned short half; typedef unsigned long long CUtexObject; - -#define FLT_MIN 1.175494350822287507969e-38f -#define FLT_MAX 340282346638528859811704183484516925440.0f +#ifdef CYCLES_CUBIN_CC +# define FLT_MIN 1.175494350822287507969e-38f +# define FLT_MAX 340282346638528859811704183484516925440.0f +# define FLT_EPSILON 1.192092896e-07F +#endif __device__ half __float2half(const float f) { diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h index 2dca1d64a76..d809f2e06d7 100644 --- a/intern/cycles/util/util_static_assert.h +++ b/intern/cycles/util/util_static_assert.h @@ -24,9 +24,9 @@ CCL_NAMESPACE_BEGIN -#ifdef __KERNEL_OPENCL__ +#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC) # define static_assert(statement, message) -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_OPENCL__ */ #define static_assert_align(st, align) \ static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT -- cgit v1.2.3