Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorRay Molenkamp <github@lazydodo.com>2020-03-26 20:41:44 +0300
committerRay Molenkamp <github@lazydodo.com>2020-03-26 20:41:44 +0300
commit86c61ce64f6e8c921d8770fcd42ed2c21d01ca3a (patch)
treee13f1941567bffbc159063a6891431ee9b158bac /intern
parent58ea0d93f193adf84162d736c3c69500584e1aef (diff)
Cycles: Restore cycles_cubin_cc to working order
Reviewed by: brecht pmoursnv Differential Revision: https://developer.blender.org/D7136
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/CMakeLists.txt6
-rw-r--r--intern/cycles/app/cycles_cubin_cc.cpp31
-rw-r--r--intern/cycles/kernel/CMakeLists.txt75
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h7
-rw-r--r--intern/cycles/kernel/kernel_compat_optix.h8
-rw-r--r--intern/cycles/util/util_static_assert.h4
6 files changed, 91 insertions, 40 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 024a94ea437..121c8bdad6e 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -313,9 +313,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
- elseif(${CUDA_VERSION} EQUAL "10.0")
- set(MAX_MSVC 1999)
- elseif(${CUDA_VERSION} EQUAL "10.1")
+ elseif(${CUDA_VERSION} LESS "11.0")
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
@@ -332,7 +330,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
-if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
+if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp
index 9d4d52b34ac..7631cb9bed5 100644
--- a/intern/cycles/app/cycles_cubin_cc.cpp
+++ b/intern/cycles/app/cycles_cubin_cc.cpp
@@ -43,7 +43,8 @@ template<typename T> std::string to_string(const T &n)
class CompilationSettings {
public:
- CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
+ CompilationSettings()
+ : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
{
}
@@ -57,12 +58,13 @@ class CompilationSettings {
int bits;
bool verbose;
bool fast_math;
+ bool ptx_only;
};
static bool compile_cuda(CompilationSettings &settings)
{
- const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
- const char *header_content[] = {"\n", "\n", "\n", "\n"};
+ const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
+ const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str());
@@ -83,6 +85,8 @@ static bool compile_cuda(CompilationSettings &settings)
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
+ options.push_back("-DCYCLES_CUBIN_CC");
+ options.push_back("--std=c++11");
if (settings.fast_math)
options.push_back("--use_fast_math");
@@ -134,10 +138,14 @@ static bool compile_cuda(CompilationSettings &settings)
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
-
- /* Write a file in the temp folder with the ptx code. */
- settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
- OIIO::Filesystem::unique_path();
+ if (settings.ptx_only) {
+ settings.ptx_file = settings.output_file;
+ }
+ else {
+ /* Write a file in the temp folder with the ptx code. */
+ settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
+ OIIO::Filesystem::unique_path();
+ }
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
@@ -249,6 +257,9 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s
"-D %L",
&settings.defines,
"Add additional defines",
+ "-ptx",
+ &settings.ptx_only,
+ "emit PTX code",
"-v",
&settings.verbose,
"Use verbose logging",
@@ -303,8 +314,10 @@ int main(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- if (!link_ptxas(settings)) {
- exit(EXIT_FAILURE);
+ if (!settings.ptx_only) {
+ if (!link_ptxas(settings)) {
+ exit(EXIT_FAILURE);
+ }
}
return 0;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 0dd0da65f82..3264b5afea2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -452,7 +452,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
add_custom_command(
- OUTPUT ${cuda_cubin}
+ OUTPUT ${cuda_file}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH}
@@ -461,7 +461,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
-v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
- set(cuda_file ${cuda_cubin})
else()
add_custom_command(
OUTPUT ${cuda_file}
@@ -517,7 +516,6 @@ if(WITH_CYCLES_DEVICE_OPTIX)
-I "${OPTIX_INCLUDE_DIR}"
-I "${CMAKE_CURRENT_SOURCE_DIR}/.."
-I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda"
- -arch=sm_30
--use_fast_math
-o ${output})
@@ -525,25 +523,62 @@ if(WITH_CYCLES_DEVICE_OPTIX)
set(cuda_flags ${cuda_flags}
-D __KERNEL_DEBUG__)
endif()
+ if(WITH_CYCLES_CUBIN_COMPILER)
- add_custom_command(
- OUTPUT
- ${output}
- DEPENDS
- ${input}
- ${SRC_HEADERS}
- ${SRC_KERNELS_CUDA_HEADERS}
- ${SRC_KERNELS_OPTIX_HEADERS}
- ${SRC_BVH_HEADERS}
- ${SRC_SVM_HEADERS}
- ${SRC_GEOM_HEADERS}
- ${SRC_CLOSURE_HEADERS}
- ${SRC_UTIL_HEADERS}
- COMMAND
- ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input}
- WORKING_DIRECTORY
- "${CMAKE_CURRENT_SOURCE_DIR}")
+ # Needed to find libnvrtc-builtins.so. Can't do it from inside
+ # cycles_cubin_cc since the env variable is read before main()
+ if(APPLE)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+ elseif(UNIX)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+ endif()
+ add_custom_command(
+ OUTPUT ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND ${CUBIN_CC_ENV}
+ "$<TARGET_FILE:cycles_cubin_cc>"
+ -target 30
+ -ptx
+ -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
+ ${cuda_flags}
+ -v
+ -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+ DEPENDS ${kernel_sources} cycles_cubin_cc)
+ else()
+ add_custom_command(
+ OUTPUT
+ ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND
+ ${CUDA_NVCC_EXECUTABLE}
+ --ptx
+ -arch=sm_30
+ ${cuda_flags}
+ ${input}
+ WORKING_DIRECTORY
+ "${CMAKE_CURRENT_SOURCE_DIR}")
+ endif()
list(APPEND optix_ptx ${output})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 4f508d7cdaa..3c5a10540d5 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -37,8 +37,11 @@ typedef unsigned long long uint64_t;
typedef unsigned short half;
typedef unsigned long long CUtexObject;
-#define FLT_MIN 1.175494350822287507969e-38f
-#define FLT_MAX 340282346638528859811704183484516925440.0f
+#ifdef CYCLES_CUBIN_CC
+# define FLT_MIN 1.175494350822287507969e-38f
+# define FLT_MAX 340282346638528859811704183484516925440.0f
+# define FLT_EPSILON 1.192092896e-07F
+#endif
__device__ half __float2half(const float f)
{
diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h
index 61b9d87a020..7068acc3a32 100644
--- a/intern/cycles/kernel/kernel_compat_optix.h
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -35,9 +35,11 @@ typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef unsigned short half;
typedef unsigned long long CUtexObject;
-
-#define FLT_MIN 1.175494350822287507969e-38f
-#define FLT_MAX 340282346638528859811704183484516925440.0f
+#ifdef CYCLES_CUBIN_CC
+# define FLT_MIN 1.175494350822287507969e-38f
+# define FLT_MAX 340282346638528859811704183484516925440.0f
+# define FLT_EPSILON 1.192092896e-07F
+#endif
__device__ half __float2half(const float f)
{
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index 2dca1d64a76..d809f2e06d7 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -24,9 +24,9 @@
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_OPENCL__
+#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC)
# define static_assert(statement, message)
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_OPENCL__ */
#define static_assert_align(st, align) \
static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT