diff options
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | build_files/cmake/macros.cmake | 7 | ||||
-rw-r--r-- | build_files/cmake/platform/platform_win32.cmake | 6 | ||||
-rw-r--r-- | extern/cuew/include/cuew.h | 1 | ||||
-rw-r--r-- | extern/cuew/src/cuew.c | 11 | ||||
-rw-r--r-- | intern/cycles/CMakeLists.txt | 21 | ||||
-rw-r--r-- | intern/cycles/app/CMakeLists.txt | 24 | ||||
-rw-r--r-- | intern/cycles/app/cycles_cubin_cc.cpp | 284 | ||||
-rw-r--r-- | intern/cycles/bvh/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/cmake/macros.cmake | 12 | ||||
-rw-r--r-- | intern/cycles/device/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/graph/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 93 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cuda.h | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/CMakeLists.txt | 4 | ||||
-rw-r--r-- | intern/cycles/render/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/subd/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/util/CMakeLists.txt | 2 |
20 files changed, 448 insertions, 57 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index fb74675088e..650f5ec4ff2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -415,12 +415,14 @@ option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF) option(WITH_CYCLES_OSL "Build Cycles with OSL support" ${_init_CYCLES_OSL}) option(WITH_CYCLES_OPENSUBDIV "Build Cycles with OpenSubdiv support" ${_init_CYCLES_OPENSUBDIV}) option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF) +option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) option(WITH_CYCLES_DEBUG "Build Cycles with extra debug capabilities" OFF) option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF) +mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER) mark_as_advanced(WITH_CYCLES_LOGGING) mark_as_advanced(WITH_CYCLES_DEBUG) mark_as_advanced(WITH_CYCLES_NATIVE_ONLY) @@ -531,6 +533,9 @@ if(WIN32) set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.") mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD) + + option(WINDOWS_USE_VISUAL_STUDIO_FOLDERS "Organize the visual studio project according to source folders." ON) + mark_as_advanced(WINDOWS_USE_VISUAL_STUDIO_FOLDERS) endif() # avoid using again diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake index cf6a94303cf..6f014d7eb3b 100644 --- a/build_files/cmake/macros.cmake +++ b/build_files/cmake/macros.cmake @@ -242,6 +242,13 @@ function(blender_add_lib__impl # listed is helpful for IDE's (QtCreator/MSVC) blender_source_group("${sources}") + #if enabled, set the FOLDER property for visual studio projects + if(WINDOWS_USE_VISUAL_STUDIO_FOLDERS) + get_filename_component(FolderDir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) + string(REPLACE ${CMAKE_SOURCE_DIR} "" FolderDir ${FolderDir}) + set_target_properties(${name} PROPERTIES FOLDER ${FolderDir}) + endif() + list_assert_duplicates("${sources}") list_assert_duplicates("${includes}") # Not for system includes because they can resolve to the same path diff --git a/build_files/cmake/platform/platform_win32.cmake b/build_files/cmake/platform/platform_win32.cmake index 58c7a749bbd..7e978a71ad4 100644 --- a/build_files/cmake/platform/platform_win32.cmake +++ b/build_files/cmake/platform/platform_win32.cmake @@ -31,6 +31,12 @@ endif() # Libraries configuration for Windows when compiling with MSVC. +set_property(GLOBAL PROPERTY USE_FOLDERS ${WINDOWS_USE_VISUAL_STUDIO_FOLDERS}) + +if(NOT WITH_PYTHON_MODULE) + set_property(DIRECTORY PROPERTY VS_STARTUP_PROJECT blender) +endif() + macro(warn_hardcoded_paths package_name ) if(WITH_WINDOWS_FIND_MODULES) diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 0eace96bc3f..f5009d4f2c7 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -1323,6 +1323,7 @@ int cuewInit(void); const char *cuewErrorString(CUresult result); const char *cuewCompilerPath(void); int cuewCompilerVersion(void); +int cuewNvrtcVersion(void); #ifdef __cplusplus } diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index 962059bfcce..b68dc597049 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -329,7 +329,7 @@ int cuewInit(void) { #ifdef _WIN32 /* Expected in c:/windows/system or similar, no path needed. */ const char *cuda_paths[] = {"nvcuda.dll", NULL}; - const char *nvrtc_paths[] = {"nvrtc.dll", NULL}; + const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL}; #elif defined(__APPLE__) /* Default installation path. */ const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL}; @@ -766,6 +766,15 @@ const char *cuewCompilerPath(void) { return NULL; } +int cuewNvrtcVersion(void) { + int major, minor; + if (nvrtcVersion) { + nvrtcVersion(&major, &minor); + return 10 * major + minor; + } + return 0; +} + int cuewCompilerVersion(void) { const char *path = cuewCompilerPath(); const char *marker = "Cuda compilation tools, release "; diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 543fb5b2cf8..f720d389cbf 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -9,6 +9,7 @@ endif() # External Libraries include(cmake/external_libs.cmake) +include(cmake/macros.cmake) # Build Flags # todo: this code could be refactored a bit to avoid duplication @@ -242,6 +243,24 @@ if(CMAKE_COMPILER_IS_GNUCXX) unset(_has_no_error_unused_macros) endif() +if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) + if(MSVC) + set(MAX_MSVC 1800) + if(${CUDA_VERSION} EQUAL "8.0") + set(MAX_MSVC 1900) + elseif(${CUDA_VERSION} EQUAL "9.0") + set(MAX_MSVC 1910) + elseif(${CUDA_VERSION} EQUAL "9.1") + set(MAX_MSVC 1911) + endif() + if (NOT MSVC_VERSION LESS ${MAX_MSVC}) + message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.") + set(WITH_CYCLES_CUBIN_COMPILER ON) + endif() + unset(MAX_MSVC) + endif() +endif() + # Subdirectories @@ -254,7 +273,7 @@ if(WITH_CYCLES_NETWORK) add_definitions(-DWITH_NETWORK) endif() -if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK) +if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER) add_subdirectory(app) endif() diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index 08a3931ef46..9ebeceb1659 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -120,3 +120,27 @@ if(WITH_CYCLES_NETWORK) endif() unset(SRC) endif() + +if(WITH_CYCLES_CUBIN_COMPILER) + # 32 bit windows is special, nvrtc is not supported on x86, so even + # though we are building 32 bit blender a 64 bit cubin_cc will have + # to be build to compile the cubins. + if(MSVC AND NOT CMAKE_CL_64) + Message("cycles_cubin_cc not supported on x86") + else() + set(SRC cycles_cubin_cc.cpp) + set(INC ../../../extern/cuew/include) + add_executable(cycles_cubin_cc ${SRC}) + include_directories(${INC}) + target_link_libraries(cycles_cubin_cc + extern_cuew + ${OPENIMAGEIO_LIBRARIES} + ${PLATFORM_LINKLIBS} + ) + if(NOT CYCLES_STANDALONE_REPOSITORY) + target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc) + endif() + unset(SRC) + unset(INC) + endif() +endif() diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp new file mode 100644 index 00000000000..c1f3974be6d --- /dev/null +++ b/intern/cycles/app/cycles_cubin_cc.cpp @@ -0,0 +1,284 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdint.h> + +#include <string> +#include <vector> + +#include <OpenImageIO/argparse.h> +#include <OpenImageIO/filesystem.h> + +#include "cuew.h" + +#ifdef _MSC_VER +# include <Windows.h> +#endif + +using std::string; +using std::vector; + +class CompilationSettings +{ +public: + CompilationSettings() + : target_arch(0), + bits(64), + verbose(false), + fast_math(false) + {} + + string cuda_toolkit_dir; + string input_file; + string output_file; + string ptx_file; + vector<string> defines; + vector<string> includes; + int target_arch; + int bits; + bool verbose; + bool fast_math; +}; + +bool compile_cuda(CompilationSettings &settings) +{ + const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"}; + const char* header_content[] = {"\n", "\n", "\n", "\n"}; + + printf("Building %s\n", settings.input_file.c_str()); + + string code; + if(!OIIO::Filesystem::read_text_file(settings.input_file, code)) { + fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str()); + return false; + } + + vector<string> options; + for(size_t i = 0; i < settings.includes.size(); i++) { + options.push_back("-I" + settings.includes[i]); + } + + for(size_t i = 0; i < settings.defines.size(); i++) { + options.push_back("-D" + settings.defines[i]); + } + + options.push_back("-arch=compute_" + std::to_string(settings.target_arch)); + options.push_back("--device-as-default-execution-space"); + if(settings.fast_math) + options.push_back("--use_fast_math"); + + nvrtcProgram prog; + nvrtcResult result = nvrtcCreateProgram(&prog, + code.c_str(), // buffer + NULL, // name + sizeof(headers) / sizeof(void*), // numHeaders + header_content, // headers + headers); // includeNames + + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcCreateProgram failed (%x)\n\n", result); + return false; + } + + /* Tranfer options to a classic C array. */ + vector<const char*> opts(options.size()); + for(size_t i = 0; i < options.size(); i++) { + opts[i] = options[i].c_str(); + } + + result = nvrtcCompileProgram(prog, options.size(), &opts[0]); + + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcCompileProgram failed (%x)\n\n", result); + + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + + vector<char> log(log_size); + nvrtcGetProgramLog(prog, &log[0]); + fprintf(stderr, "%s\n", &log[0]); + + return false; + } + + /* Retrieve the ptx code. */ + size_t ptx_size; + result = nvrtcGetPTXSize(prog, &ptx_size); + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcGetPTXSize failed (%x)\n\n", result); + return false; + } + + vector<char> ptx_code(ptx_size); + result = nvrtcGetPTX(prog, &ptx_code[0]); + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcGetPTX failed (%x)\n\n", result); + return false; + } + + /* Write a file in the temp folder with the ptx code. */ + settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path(); + FILE * f= fopen(settings.ptx_file.c_str(), "wb"); + fwrite(&ptx_code[0], 1, ptx_size, f); + fclose(f); + + return true; +} + +bool link_ptxas(CompilationSettings &settings) +{ + string cudapath = ""; + if(settings.cuda_toolkit_dir.size()) + cudapath = settings.cuda_toolkit_dir + "/bin/"; + + string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file + + " -o " + settings.output_file + + " --gpu-name sm_" + std::to_string(settings.target_arch) + + " -m" + std::to_string(settings.bits); + + if(settings.verbose) + ptx += " --verbose"; + + int pxresult = system(ptx.c_str()); + if(pxresult) { + fprintf(stderr, "Error: ptxas failed (%x)\n\n", pxresult); + return false; + } + + if(!OIIO::Filesystem::remove(settings.ptx_file)) { + fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str()); + } + + return true; +} + +bool init(CompilationSettings &settings) +{ +#ifdef _MSC_VER + if(settings.cuda_toolkit_dir.size()) { + SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str()); + } +#endif + + int cuewresult = cuewInit(); + if(cuewresult != CUEW_SUCCESS) { + fprintf(stderr, "Error: cuew init fialed (0x%x)\n\n", cuewresult); + return false; + } + + if(cuewNvrtcVersion() < 80) { + fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion()); + return false; + } + + if(!nvrtcCreateProgram) { + fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n"); + return false; + } + + if(!nvrtcCompileProgram) { + fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n"); + return false; + } + + if(!nvrtcGetProgramLogSize) { + fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n"); + return false; + } + + if(!nvrtcGetProgramLog) { + fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n"); + return false; + } + + if(!nvrtcGetPTXSize) { + fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n"); + return false; + } + + if(!nvrtcGetPTX) { + fprintf(stderr, "Error: nvrtcGetPTX not resolved\n"); + return false; + } + + return true; +} + +bool parse_parameters(int argc, const char **argv, CompilationSettings &settings) +{ + OIIO::ArgParse ap; + ap.options("Usage: cycles_cubin_cc [options]", + "-target %d", &settings.target_arch, "target shader model", + "-m %d", &settings.bits, "Cuda architecture bits", + "-i %s", &settings.input_file, "Input source filename", + "-o %s", &settings.output_file, "Output cubin filename", + "-I %L", &settings.includes, "Add additional includepath", + "-D %L", &settings.defines, "Add additional defines", + "-v", &settings.verbose, "Use verbose logging", + "--use_fast_math", &settings.fast_math, "Use fast math", + "-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory", + NULL); + + if(ap.parse(argc, argv) < 0) { + fprintf(stderr, "%s\n", ap.geterror().c_str()); + ap.usage(); + return false; + } + + if(!settings.output_file.size()) { + fprintf(stderr, "Error: Output file not set(-o), required\n\n"); + return false; + } + + if(!settings.input_file.size()) { + fprintf(stderr, "Error: Input file not set(-i, required\n\n"); + return false; + } + + if(!settings.target_arch) { + fprintf(stderr, "Error: target shader model not set (-target), required\n\n"); + return false; + } + + return true; +} + +int main(int argc, const char **argv) +{ + CompilationSettings settings; + + if(!parse_parameters(argc, argv, settings)) { + fprintf(stderr, "Error: invalid parameters, exiting\n"); + exit(EXIT_FAILURE); + } + + if(!init(settings)) { + fprintf(stderr, "Error: initialization error, exiting\n"); + exit(EXIT_FAILURE); + } + + if(!compile_cuda(settings)) { + fprintf(stderr, "Error: compilation error, exiting\n"); + exit(EXIT_FAILURE); + } + + if(!link_ptxas(settings)) { + exit(EXIT_FAILURE); + } + + return 0; +} diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt index 6078db5a8ca..b8171e7f70d 100644 --- a/intern/cycles/bvh/CMakeLists.txt +++ b/intern/cycles/bvh/CMakeLists.txt @@ -34,4 +34,4 @@ set(SRC_HEADERS include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -add_library(cycles_bvh ${SRC} ${SRC_HEADERS}) +cycles_add_library(cycles_bvh ${SRC} ${SRC_HEADERS}) diff --git a/intern/cycles/cmake/macros.cmake b/intern/cycles/cmake/macros.cmake new file mode 100644 index 00000000000..f3ca06ac6b8 --- /dev/null +++ b/intern/cycles/cmake/macros.cmake @@ -0,0 +1,12 @@ +function(cycles_set_solution_folder target) + if(WINDOWS_USE_VISUAL_STUDIO_FOLDERS) + get_filename_component(folderdir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) + string(REPLACE ${CMAKE_SOURCE_DIR} "" folderdir ${folderdir}) + set_target_properties(${target} PROPERTIES FOLDER ${folderdir}) + endif() +endfunction() + +macro(cycles_add_library target) + add_library(${target} ${ARGN}) + cycles_set_solution_folder(${target}) +endmacro() diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 959c0aa97c9..75e78e038ea 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -77,4 +77,4 @@ endif() include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -add_library(cycles_device ${SRC} ${SRC_OPENCL} ${SRC_HEADERS}) +cycles_add_library(cycles_device ${SRC} ${SRC_OPENCL} ${SRC_HEADERS}) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index a7d20c67b6d..24ae79e5074 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -348,7 +348,6 @@ public: const DeviceRequestedFeatures& requested_features, bool filter=false, bool split=false) { - const int cuda_version = cuewCompilerVersion(); const int machine = system_cpu_bits(); const string source_path = path_get("source"); const string include_path = source_path; @@ -356,10 +355,8 @@ public: "--ptxas-options=\"-v\" " "--use_fast_math " "-DNVCC " - "-D__KERNEL_CUDA_VERSION__=%d " "-I\"%s\"", machine, - cuda_version, include_path.c_str()); if(!filter && use_adaptive_compilation()) { cflags += " " + requested_features.get_build_options(); diff --git a/intern/cycles/graph/CMakeLists.txt b/intern/cycles/graph/CMakeLists.txt index e70a18137bd..168ca0210e7 100644 --- a/intern/cycles/graph/CMakeLists.txt +++ b/intern/cycles/graph/CMakeLists.txt @@ -19,5 +19,5 @@ set(SRC_HEADERS include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -add_library(cycles_graph ${SRC} ${SRC_HEADERS}) +cycles_add_library(cycles_graph ${SRC} ${SRC_HEADERS}) diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 8f7bc7996a4..d981b67559e 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -356,55 +356,67 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch name flags sources experimental) + set(cuda_cubin ${name}_${arch}.cubin) + set(cuda_kernel_src "/kernels/cuda/${name}.cu") + + set(cuda_flags + -D CCL_NAMESPACE_BEGIN= + -D CCL_NAMESPACE_END= + -D NVCC + -m ${CUDA_BITS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/.. + -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda + --use_fast_math + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + if(${experimental}) - set(flags ${flags} -D__KERNEL_EXPERIMENTAL__) + set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) set(name ${name}_experimental) endif() - set(cuda_cubin ${name}_${arch}.cubin) - if(WITH_CYCLES_DEBUG) - set(cuda_debug_flags "-D__KERNEL_DEBUG__") - else() - set(cuda_debug_flags "") + set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) endif() - set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE}) - set(cuda_nvcc_version ${CUDA_VERSION}) - - set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}") - set(cuda_math_flags "--use_fast_math") - - set(cuda_kernel_src "/kernels/cuda/${name}.cu") - - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${cuda_nvcc_command} - -arch=${arch} - ${CUDA_NVCC_FLAGS} - -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} - --ptxas-options="-v" - ${cuda_arch_flags} - ${cuda_version_flags} - ${cuda_math_flags} - ${flags} - ${cuda_debug_flags} - -I${CMAKE_CURRENT_SOURCE_DIR}/.. - -DCCL_NAMESPACE_BEGIN= - -DCCL_NAMESPACE_END= - -DNVCC - DEPENDS ${sources}) + if(WITH_CYCLES_CUBIN_COMPILER) + string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) + + # Needed to find libnvrtc-builtins.so. Can't do it from inside + # cycles_cubin_cc since the env variable is read before main() + if(APPLE) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") + elseif(UNIX) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUBIN_CC_ENV} + "$<TARGET_FILE:cycles_cubin_cc>" + -target ${CUDA_ARCH} + -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + ${cuda_flags} + -v + -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" + DEPENDS ${sources} cycles_cubin_cc) + else() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + ${CUDA_NVCC_FLAGS} + --cubin + ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + --ptxas-options="-v" + ${cuda_flags} + DEPENDS ${sources}) + endif() delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND cuda_cubins ${cuda_cubin}) - unset(cuda_extra_flags) unset(cuda_debug_flags) - - unset(cuda_nvcc_command) - unset(cuda_nvcc_version) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) @@ -412,17 +424,18 @@ if(WITH_CYCLES_CUDA_BINARIES) message(STATUS "CUDA binaries for ${arch} disabled, not supported by CUDA 9.") else() # Compile regular kernel - CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE) CYCLES_CUDA_KERNEL_ADD(${arch} filter "" "${cuda_filter_sources}" FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE) if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) # Compile split kernel - CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D__SPLIT__" ${cuda_sources} FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE) endif() endif() endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) + cycles_set_solution_folder(cycles_kernel_cuda) endif() # OSL module @@ -465,7 +478,7 @@ if(CXX_HAS_AVX2) set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") endif() -add_library(cycles_kernel +cycles_add_library(cycles_kernel ${SRC_CPU_KERNELS} ${SRC_CUDA_KERNELS} ${SRC_OPENCL_KERNELS} diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index fa512f80e41..7b66bdc169e 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -30,10 +30,22 @@ # define __NODES_FEATURES__ NODE_FEATURE_ALL #endif -#include <cuda.h> -#include <cuda_fp16.h> -#include <float.h> -#include <stdint.h> +/* Manual definitions so we can compile without CUDA toolkit. */ + +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef unsigned short half; +typedef unsigned long long CUtexObject; + +#define FLT_MAX 1.175494350822287507969e-38f +#define FLT_MIN 340282346638528859811704183484516925440.0f + +__device__ half __float2half(const float f) +{ + half val; + asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); + return val; +} /* Qualifier wrappers for different names on different devices */ diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index d2eb89e0e0a..159de63a044 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -30,5 +30,5 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -add_library(cycles_kernel_osl ${SRC} ${HEADER_SRC}) +cycles_add_library(cycles_kernel_osl ${SRC} ${HEADER_SRC}) diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index 5e8e98773d9..19b7769200e 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -104,6 +104,7 @@ set(SRC_OSO # TODO, add a module to compile OSL foreach(_file ${SRC_OSL}) set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file}) + set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE) string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE}) string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE}) add_custom_command( @@ -118,7 +119,8 @@ foreach(_file ${SRC_OSL}) unset(_OSO_FILE) endforeach() -add_custom_target(cycles_osl_shaders ALL DEPENDS ${SRC_OSO} ${SRC_OSL_HEADERS} ${OSL_COMPILER}) +add_custom_target(cycles_osl_shaders ALL DEPENDS ${SRC_OSO} ${SRC_OSL_HEADERS} ${OSL_COMPILER} SOURCES ${SRC_OSL}) +cycles_set_solution_folder(cycles_osl_shaders) # CMAKE_CURRENT_SOURCE_DIR is already included in OSO paths delayed_install("" "${SRC_OSO}" ${CYCLES_INSTALL_PATH}/shader) diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 17ac66644e2..580d76e7777 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -71,4 +71,4 @@ include_directories(SYSTEM ${INC_SYS}) add_definitions(${GL_DEFINITIONS}) -add_library(cycles_render ${SRC} ${SRC_HEADERS}) +cycles_add_library(cycles_render ${SRC} ${SRC_HEADERS}) diff --git a/intern/cycles/subd/CMakeLists.txt b/intern/cycles/subd/CMakeLists.txt index fe0c221ab0d..7f952dd43ce 100644 --- a/intern/cycles/subd/CMakeLists.txt +++ b/intern/cycles/subd/CMakeLists.txt @@ -24,4 +24,4 @@ set(SRC_HEADERS include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -add_library(cycles_subd ${SRC} ${SRC_HEADERS}) +cycles_add_library(cycles_subd ${SRC} ${SRC_HEADERS}) diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index bc9def7ca41..66c4f22a7e2 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -127,4 +127,4 @@ include_directories(SYSTEM ${INC_SYS}) add_definitions(${GL_DEFINITIONS}) -add_library(cycles_util ${SRC} ${SRC_HEADERS}) +cycles_add_library(cycles_util ${SRC} ${SRC_HEADERS}) |