diff options
-rw-r--r-- | extern/cuew/src/cuew.c | 68 | ||||
-rw-r--r-- | intern/cycles/CMakeLists.txt | 5 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_cuda.h | 12 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 150 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 25 |
6 files changed, 125 insertions, 137 deletions
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index a0146741494..f477ec48a18 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -683,23 +683,23 @@ static int cuewNvrtcInit(void) { int cuewInit(cuuint32_t flags) { - int result = CUEW_SUCCESS; - - if (flags & CUEW_INIT_CUDA) { - result = cuewCudaInit(); - if (result != CUEW_SUCCESS) { - return result; - } - } - - if (flags & CUEW_INIT_NVRTC) { - result = cuewNvrtcInit(); - if (result != CUEW_SUCCESS) { - return result; - } - } - - return result; + int result = CUEW_SUCCESS; + + if (flags & CUEW_INIT_CUDA) { + result = cuewCudaInit(); + if (result != CUEW_SUCCESS) { + return result; + } + } + + if (flags & CUEW_INIT_NVRTC) { + result = cuewNvrtcInit(); + if (result != CUEW_SUCCESS) { + return result; + } + } + + return result; } @@ -798,7 +798,10 @@ static int path_exists(const char *path) { const char *cuewCompilerPath(void) { #ifdef _WIN32 - const char *defaultpaths[] = {"C:/CUDA/bin", NULL}; + const char *defaultpaths[] = { + "C:/CUDA/bin", + "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin", + NULL}; const char *executable = "nvcc.exe"; #else const char *defaultpaths[] = { @@ -832,9 +835,12 @@ const char *cuewCompilerPath(void) { } } -#ifndef _WIN32 { +#ifdef _WIN32 + FILE *handle = popen("where nvcc", "r"); +#else FILE *handle = popen("which nvcc", "r"); +#endif if (handle) { char buffer[4096] = {0}; int len = fread(buffer, 1, sizeof(buffer) - 1, handle); @@ -845,7 +851,6 @@ const char *cuewCompilerPath(void) { } } } -#endif return NULL; } @@ -859,23 +864,6 @@ int cuewNvrtcVersion(void) { return 0; } -static size_t safe_strnlen(const char *s, size_t maxlen) { - size_t length; - for (length = 0; length < maxlen; s++, length++) { - if (*s == '\0') { - break; - } - } - return length; -} - -static char *safe_strncpy(char *dest, const char *src, size_t n) { - const size_t src_len = safe_strnlen(src, n - 1); - memcpy(dest, src, src_len); - dest[src_len] = '\0'; - return dest; -} - int cuewCompilerVersion(void) { const char *path = cuewCompilerPath(); const char *marker = "Cuda compilation tools, release "; @@ -891,8 +879,9 @@ int cuewCompilerVersion(void) { } /* get --version output */ - safe_strncpy(command, path, sizeof(command)); - strncat(command, " --version", sizeof(command) - strlen(path)); + strncat(command, "\"", 1); + strncat(command, path, sizeof(command) - 1); + strncat(command, "\" --version", sizeof(command) - strlen(path) - 1); pipe = popen(command, "r"); if (!pipe) { fprintf(stderr, "CUDA: failed to run compiler to retrieve version"); @@ -922,4 +911,3 @@ int cuewCompilerVersion(void) { return 10 * major + minor; } - diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 6f6bd7ec2cc..1014831c403 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -228,11 +228,8 @@ if(WITH_CYCLES_DEVICE_OPTIX) SYSTEM ${OPTIX_INCLUDE_DIR} ) - - # Need pre-compiled CUDA binaries in the OptiX device - set(WITH_CYCLES_CUDA_BINARIES ON) else() - message(STATUS "Optix not found, disabling it from Cycles") + message(STATUS "OptiX not found, disabling it from Cycles") set(WITH_CYCLES_DEVICE_OPTIX OFF) endif() endif() diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h index 0f4543e6007..5820b525fd6 100644 --- a/intern/cycles/device/cuda/device_cuda.h +++ b/intern/cycles/device/cuda/device_cuda.h @@ -109,15 +109,13 @@ class CUDADevice : public Device { bool use_split_kernel(); - string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features, - bool filter = false, - bool split = false); - - bool compile_check_compiler(); + virtual string compile_kernel_get_common_cflags( + const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false); string compile_kernel(const DeviceRequestedFeatures &requested_features, - bool filter = false, - bool split = false); + const char *name, + const char *base = "cuda", + bool force_ptx = false); virtual bool load_kernels(const DeviceRequestedFeatures &requested_features); diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index a4e1c026263..cd37c4dd407 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -329,70 +329,27 @@ string CUDADevice::compile_kernel_get_common_cflags( return cflags; } -bool CUDADevice::compile_check_compiler() -{ - const char *nvcc = cuewCompilerPath(); - if (nvcc == NULL) { - cuda_error_message( - "CUDA nvcc compiler not found. " - "Install CUDA toolkit in default location."); - return false; - } - const int cuda_version = cuewCompilerVersion(); - VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << "."; - const int major = cuda_version / 10, minor = cuda_version % 10; - if (cuda_version == 0) { - cuda_error_message("CUDA nvcc compiler version could not be parsed."); - return false; - } - if (cuda_version < 80) { - printf( - "Unsupported CUDA version %d.%d detected, " - "you need CUDA 8.0 or newer.\n", - major, - minor); - return false; - } - else if (cuda_version != 101) { - printf( - "CUDA version %d.%d detected, build may succeed but only " - "CUDA 10.1 is officially supported.\n", - major, - minor); - } - return true; -} - string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features, - bool filter, - bool split) + const char *name, + const char *base, + bool force_ptx) { - const char *name, *source; - if (filter) { - name = "filter"; - source = "filter.cu"; - } - else if (split) { - name = "kernel_split"; - source = "kernel_split.cu"; - } - else { - name = "kernel"; - source = "kernel.cu"; - } - /* Compute cubin name. */ + /* Compute kernel name. */ int major, minor; cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); /* Attempt to use kernel provided with Blender. */ if (!use_adaptive_compilation()) { - const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; - if (path_exists(cubin)) { - VLOG(1) << "Using precompiled kernel."; - return cubin; + if (!force_ptx) { + const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; + if (path_exists(cubin)) { + VLOG(1) << "Using precompiled kernel."; + return cubin; + } } + const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; if (path_exists(ptx)) { @@ -401,19 +358,21 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu } } - const string common_cflags = compile_kernel_get_common_cflags(requested_features, filter, split); - /* Try to use locally compiled kernel. */ - const string source_path = path_get("source"); - const string kernel_md5 = path_files_md5_hash(source_path); + string source_path = path_get("source"); + const string source_md5 = path_files_md5_hash(source_path); /* We include cflags into md5 so changing cuda toolkit or changing other * compiler command line arguments makes sure cubin gets re-built. */ - const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags); + string common_cflags = compile_kernel_get_common_cflags( + requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL); + const string kernel_md5 = util_md5_string(source_md5 + common_cflags); + const char *const kernel_ext = force_ptx ? "ptx" : "cubin"; + const char *const kernel_arch = force_ptx ? "compute" : "sm"; const string cubin_file = string_printf( - "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str()); + "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext); const string cubin = path_cache_get(path_join("kernels", cubin_file)); VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; if (path_exists(cubin)) { @@ -422,7 +381,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu } # ifdef _WIN32 - if (have_precompiled_kernels()) { + if (!use_adaptive_compilation() && have_precompiled_kernels()) { if (major < 3) { cuda_error_message( string_printf("CUDA device requires compute capability 3.0 or up, " @@ -437,42 +396,69 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu major, minor)); } - return ""; + return string(); } # endif /* Compile. */ - if (!compile_check_compiler()) { - return ""; + const char *const nvcc = cuewCompilerPath(); + if (nvcc == NULL) { + cuda_error_message( + "CUDA nvcc compiler not found. " + "Install CUDA toolkit in default location."); + return string(); } - const char *nvcc = cuewCompilerPath(); - const string kernel = path_join(path_join(source_path, "kernel"), - path_join("kernels", path_join("cuda", source))); + + const int nvcc_cuda_version = cuewCompilerVersion(); + VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << "."; + if (nvcc_cuda_version < 80) { + printf( + "Unsupported CUDA version %d.%d detected, " + "you need CUDA 8.0 or newer.\n", + nvcc_cuda_version / 10, + nvcc_cuda_version % 10); + return string(); + } + else if (nvcc_cuda_version != 101) { + printf( + "CUDA version %d.%d detected, build may succeed but only " + "CUDA 10.1 is officially supported.\n", + nvcc_cuda_version / 10, + nvcc_cuda_version % 10); + } + double starttime = time_dt(); - printf("Compiling CUDA kernel ...\n"); path_create_directories(cubin); + source_path = path_join(path_join(source_path, "kernel"), + path_join("kernels", path_join(base, string_printf("%s.cu", name)))); + string command = string_printf( "\"%s\" " - "-arch=sm_%d%d " - "--cubin \"%s\" " + "-arch=%s_%d%d " + "--%s \"%s\" " "-o \"%s\" " - "%s ", + "%s", nvcc, + kernel_arch, major, minor, - kernel.c_str(), + kernel_ext, + source_path.c_str(), cubin.c_str(), common_cflags.c_str()); - printf("%s\n", command.c_str()); + printf("Compiling CUDA kernel ...\n%s\n", command.c_str()); - if (system(command.c_str()) == -1) { +#ifdef _WIN32 + command = "call " + command; +#endif + if (system(command.c_str()) != 0) { cuda_error_message( "Failed to execute compilation command, " "see console for details."); - return ""; + return string(); } /* Verify if compilation succeeded */ @@ -480,7 +466,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu cuda_error_message( "CUDA kernel compilation failed, " "see console for details."); - return ""; + return string(); } printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); @@ -509,12 +495,14 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features) return false; /* get kernel */ - string cubin = compile_kernel(requested_features, false, use_split_kernel()); - if (cubin == "") + const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel"; + string cubin = compile_kernel(requested_features, kernel_name); + if (cubin.empty()) return false; - string filter_cubin = compile_kernel(requested_features, true, false); - if (filter_cubin == "") + const char *filter_name = "filter"; + string filter_cubin = compile_kernel(requested_features, filter_name); + if (filter_cubin.empty()) return false; /* open module */ diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 5f991563738..6957adb478f 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -43,7 +43,6 @@ bool device_cuda_init() VLOG(1) << "Found precompiled kernels"; result = true; } -# ifndef _WIN32 else if (cuewCompilerPath() != NULL) { VLOG(1) << "Found CUDA compiler " << cuewCompilerPath(); result = true; @@ -52,7 +51,6 @@ bool device_cuda_init() VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found," << " unable to use CUDA"; } -# endif } else { VLOG(1) << "CUEW initialization failed: " diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 39110cc0959..2ce8bed3783 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -293,6 +293,23 @@ class OptiXDevice : public CUDADevice { return BVH_LAYOUT_OPTIX; } + string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features, + bool filter, + bool /*split*/) override + { + // Split kernel is not supported in OptiX + string common_cflags = CUDADevice::compile_kernel_get_common_cflags( + requested_features, filter, false); + + // Add OptiX SDK include directory to include paths + const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR"); + if (optix_sdk_path) { + common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path); + } + + return common_cflags; + } + bool load_kernels(const DeviceRequestedFeatures &requested_features) override { if (have_error()) { @@ -367,9 +384,11 @@ class OptiXDevice : public CUDADevice { } { // Load and compile PTX module with OptiX kernels - string ptx_data; - const string ptx_filename = "lib/kernel_optix.ptx"; - if (!path_read_text(path_get(ptx_filename), ptx_data)) { + string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx"); + if (use_adaptive_compilation()) { + ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true); + } + if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) { set_error("Failed loading OptiX kernel " + ptx_filename + "."); return false; } |