diff options
author | Patrick Mours <pmours@nvidia.com> | 2020-06-12 17:42:49 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2020-06-12 19:24:15 +0300 |
commit | b586f801fc921f9f420260fb3ff4f26cb6773157 (patch) | |
tree | 8370736aec591a2424c09aa35863a4ba3f92dae4 /intern | |
parent | 5dca72dfc924ff931ae46b35a6342beec87f9fc4 (diff) |
Cycles: Improve CUDA and OptiX error reporting in the viewport
This patch makes the infamous "Cancel" error in the viewport a thing of the past. Instead it
now shows a more useful error message and streamlines the error handling process in CUDA.
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D8008
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/cuda/device_cuda.h | 6 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 122 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 18 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 4 |
4 files changed, 65 insertions, 85 deletions
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h index 9f31ed12cf4..1aa2fdd0967 100644 --- a/intern/cycles/device/cuda/device_cuda.h +++ b/intern/cycles/device/cuda/device_cuda.h @@ -100,11 +100,7 @@ class CUDADevice : public Device { virtual BVHLayoutMask get_bvh_layout_mask() const; - void cuda_error_documentation(); - - bool cuda_error_(CUresult result, const string &stmt); - - void cuda_error_message(const string &message); + void set_error(const string &error) override; CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_); diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 64c7f5e7d34..7aa63ff48c3 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -135,8 +135,10 @@ BVHLayoutMask CUDADevice::get_bvh_layout_mask() const return BVH_LAYOUT_BVH2; } -void CUDADevice::cuda_error_documentation() +void CUDADevice::set_error(const string &error) { + Device::set_error(error); + if (first_error) { fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n"); fprintf(stderr, @@ -148,42 +150,13 @@ void CUDADevice::cuda_error_documentation() # define cuda_assert(stmt) \ { \ CUresult result = stmt; \ -\ if (result != CUDA_SUCCESS) { \ - string message = string_printf( \ - "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \ - if (error_msg == "") \ - error_msg = message; \ - fprintf(stderr, "%s\n", message.c_str()); \ - /*cuda_abort();*/ \ - cuda_error_documentation(); \ + const char *name = cuewErrorString(result); \ + set_error(string_printf("%s in %s (device_cuda_impl.cpp:%d)", name, #stmt, __LINE__)); \ } \ } \ (void)0 -bool CUDADevice::cuda_error_(CUresult result, const string &stmt) -{ - if (result == CUDA_SUCCESS) - return false; - - string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result)); - if (error_msg == "") - error_msg = message; - fprintf(stderr, "%s\n", message.c_str()); - cuda_error_documentation(); - return true; -} - -# define cuda_error(stmt) cuda_error_(stmt, # stmt) - -void CUDADevice::cuda_error_message(const string &message) -{ - if (error_msg == "") - error_msg = message; - fprintf(stderr, "%s\n", message.c_str()); - cuda_error_documentation(); -} - CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL) { @@ -212,12 +185,19 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool functions.loaded = false; /* Intialize CUDA. */ - if (cuda_error(cuInit(0))) + CUresult result = cuInit(0); + if (result != CUDA_SUCCESS) { + set_error(string_printf("Failed to initialize CUDA runtime (%s)", cuewErrorString(result))); return; + } /* Setup device and context. */ - if (cuda_error(cuDeviceGet(&cuDevice, cuDevId))) + result = cuDeviceGet(&cuDevice, cuDevId); + if (result != CUDA_SUCCESS) { + set_error(string_printf("Failed to get CUDA device handle from ordinal (%s)", + cuewErrorString(result))); return; + } /* CU_CTX_MAP_HOST for mapping host memory when out of device memory. * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, @@ -235,8 +215,6 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool } /* Create context. */ - CUresult result; - if (background) { result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); } @@ -249,8 +227,10 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool } } - if (cuda_error_(result, "cuCtxCreate")) + if (result != CUDA_SUCCESS) { + set_error(string_printf("Failed to create CUDA context (%s)", cuewErrorString(result))); return; + } int major, minor; cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); @@ -280,10 +260,8 @@ bool CUDADevice::support_device(const DeviceRequestedFeatures & /*requested_feat /* We only support sm_30 and above */ if (major < 3) { - cuda_error_message( - string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", - major, - minor)); + set_error(string_printf( + "CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor)); return false; } @@ -319,13 +297,19 @@ bool CUDADevice::check_peer_access(Device *peer_device) // Enable peer access in both directions { const CUDAContextScope scope(this); - if (cuda_error(cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0))) { + CUresult result = cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0); + if (result != CUDA_SUCCESS) { + set_error(string_printf("Failed to enable peer access on CUDA context (%s)", + cuewErrorString(result))); return false; } } { const CUDAContextScope scope(peer_device_cuda); - if (cuda_error(cuCtxEnablePeerAccess(cuContext, 0))) { + CUresult result = cuCtxEnablePeerAccess(cuContext, 0); + if (result != CUDA_SUCCESS) { + set_error(string_printf("Failed to enable peer access on CUDA context (%s)", + cuewErrorString(result))); return false; } } @@ -432,14 +416,14 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu # ifdef _WIN32 if (!use_adaptive_compilation() && have_precompiled_kernels()) { if (major < 3) { - cuda_error_message( - string_printf("CUDA device requires compute capability 3.0 or up, " - "found %d.%d. Your GPU is not supported.", + set_error( + string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. " + "Your GPU is not supported.", major, minor)); } else { - cuda_error_message( + set_error( string_printf("CUDA binary kernel for this graphics card compute " "capability (%d.%d) not found.", major, @@ -452,7 +436,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu /* Compile. */ const char *const nvcc = cuewCompilerPath(); if (nvcc == NULL) { - cuda_error_message( + set_error( "CUDA nvcc compiler not found. " "Install CUDA toolkit in default location."); return string(); @@ -504,7 +488,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu command = "call " + command; # endif if (system(command.c_str()) != 0) { - cuda_error_message( + set_error( "Failed to execute compilation command, " "see console for details."); return string(); @@ -512,7 +496,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu /* Verify if compilation succeeded */ if (!path_exists(cubin)) { - cuda_error_message( + set_error( "CUDA kernel compilation failed, " "see console for details."); return string(); @@ -565,16 +549,19 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features) else result = CUDA_ERROR_FILE_NOT_FOUND; - if (cuda_error_(result, "cuModuleLoad")) - cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str())); + if (result != CUDA_SUCCESS) + set_error(string_printf( + "Failed to load CUDA kernel from '%s' (%s)", cubin.c_str(), cuewErrorString(result))); if (path_read_text(filter_cubin, cubin_data)) result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str()); else result = CUDA_ERROR_FILE_NOT_FOUND; - if (cuda_error_(result, "cuModuleLoad")) - cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str())); + if (result != CUDA_SUCCESS) + set_error(string_printf("Failed to load CUDA kernel from '%s' (%s)", + filter_cubin.c_str(), + cuewErrorString(result))); if (result == CUDA_SUCCESS) { reserve_local_memory(requested_features); @@ -870,7 +857,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ if (mem_alloc_result != CUDA_SUCCESS) { status = " failed, out of device and host memory"; - cuda_assert(mem_alloc_result); + set_error("System is out of GPU and shared host memory"); } if (mem.name) { @@ -2458,14 +2445,10 @@ void CUDADevice::task_cancel() # define cuda_assert(stmt) \ { \ CUresult result = stmt; \ -\ if (result != CUDA_SUCCESS) { \ - string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \ - if (device->error_msg == "") \ - device->error_msg = message; \ - fprintf(stderr, "%s\n", message.c_str()); \ - /*cuda_abort();*/ \ - device->cuda_error_documentation(); \ + const char *name = cuewErrorString(result); \ + device->set_error( \ + string_printf("%s in %s (device_cuda_impl.cpp:%d)", name, #stmt, __LINE__)); \ } \ } \ (void)0 @@ -2647,14 +2630,15 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name, const DeviceRequestedFeatures &) { - CUDAContextScope scope(device); - CUfunction func; + const CUDAContextScope scope(device); - cuda_assert( - cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data())); - if (device->have_error()) { - device->cuda_error_message( - string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data())); + CUfunction func; + const CUresult result = cuModuleGetFunction( + &func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()); + if (result != CUDA_SUCCESS) { + device->set_error(string_printf("Could not find kernel \"kernel_cuda_%s\" in module (%s)", + kernel_name.data(), + cuewErrorString(result))); return NULL; } diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index db04c13d083..fbf6a914744 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -70,7 +70,7 @@ struct KernelParams { if (res != CUDA_SUCCESS) { \ const char *name; \ cuGetErrorName(res, &name); \ - set_error(string_printf("OptiX CUDA error %s in %s, line %d", name, #stmt, __LINE__)); \ + set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \ return; \ } \ } \ @@ -81,7 +81,7 @@ struct KernelParams { if (res != CUDA_SUCCESS) { \ const char *name; \ cuGetErrorName(res, &name); \ - set_error(string_printf("OptiX CUDA error %s in %s, line %d", name, #stmt, __LINE__)); \ + set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \ return false; \ } \ } \ @@ -92,7 +92,7 @@ struct KernelParams { enum OptixResult res = stmt; \ if (res != OPTIX_SUCCESS) { \ const char *name = optixGetErrorName(res); \ - set_error(string_printf("OptiX error %s in %s, line %d", name, #stmt, __LINE__)); \ + set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \ return; \ } \ } \ @@ -102,7 +102,7 @@ struct KernelParams { enum OptixResult res = stmt; \ if (res != OPTIX_SUCCESS) { \ const char *name = optixGetErrorName(res); \ - set_error(string_printf("OptiX error %s in %s, line %d", name, #stmt, __LINE__)); \ + set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \ return false; \ } \ } \ @@ -322,12 +322,12 @@ class OptiXDevice : public CUDADevice { // Disable baking for now, since its kernel is not well-suited for inlining and is very slow if (requested_features.use_baking) { - set_error("OptiX implementation does not support baking yet"); + set_error("OptiX backend does not support baking yet"); return false; } // Disable shader raytracing support for now, since continuation callables are slow if (requested_features.use_shader_raytrace) { - set_error("OptiX implementation does not support shader raytracing yet"); + set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet"); return false; } @@ -386,14 +386,14 @@ class OptiXDevice : public CUDADevice { if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { if (!getenv("OPTIX_ROOT_DIR")) { set_error( - "OPTIX_ROOT_DIR environment variable not set, must be set with the path to the " - "Optix SDK in order to compile the Optix kernel on demand."); + "Missing OPTIX_ROOT_DIR environment variable (which must be set with the path to " + "the Optix SDK to be able to compile Optix kernels on demand)."); return false; } ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true); } if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) { - set_error("Failed loading OptiX kernel " + ptx_filename + "."); + set_error("Failed to load OptiX kernel from '" + ptx_filename + "'"); return false; } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 7c50140ecfe..f5bfebbaf78 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -833,7 +833,7 @@ bool Session::load_kernels(bool lock_scene) message = "Failed loading render kernel, see console for errors"; progress.set_error(message); - progress.set_status("Error", message); + progress.set_status(message); progress.set_update(); return false; } @@ -872,7 +872,7 @@ void Session::run() /* progress update */ if (progress.get_cancel()) - progress.set_status("Cancel", progress.get_cancel_message()); + progress.set_status(progress.get_cancel_message()); else progress.set_update(); } |