diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-09-27 15:47:51 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-09-27 16:24:12 +0300 |
commit | a6b53ef99492267f8f27fd58ea35104b88e1bec8 (patch) | |
tree | 651dbdd6ced4f35f78540b83b2a1acc454c7aae0 /intern | |
parent | 2bd020521578549eb47c58c7984c9a35b7c35cd8 (diff) |
Cycles: print name of kernels on errors in CUDA queue, for debugging
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/cuda/queue.cpp | 54 | ||||
-rw-r--r-- | intern/cycles/device/cuda/queue.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_queue.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/device/device_queue.h | 2 |
4 files changed, 47 insertions, 25 deletions
diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp index b7f86c10553..1149a835b14 100644 --- a/intern/cycles/device/cuda/queue.cpp +++ b/intern/cycles/device/cuda/queue.cpp @@ -116,18 +116,18 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar } /* Launch kernel. */ - cuda_device_assert(cuda_device_, - cuLaunchKernel(cuda_kernel.function, - num_blocks, - 1, - 1, - num_threads_per_block, - 1, - 1, - shared_mem_bytes, - cuda_stream_, - args, - 0)); + assert_success(cuLaunchKernel(cuda_kernel.function, + num_blocks, + 1, + 1, + num_threads_per_block, + 1, + 1, + shared_mem_bytes, + cuda_stream_, + args, + 0), + "enqueue"); return !(cuda_device_->have_error()); } @@ -139,7 +139,8 @@ bool CUDADeviceQueue::synchronize() } const CUDAContextScope scope(cuda_device_); - cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_)); + assert_success(cuStreamSynchronize(cuda_stream_), "synchronize"); + debug_synchronize(); return !(cuda_device_->have_error()); @@ -162,9 +163,9 @@ void CUDADeviceQueue::zero_to_device(device_memory &mem) assert(mem.device_pointer != 0); const CUDAContextScope scope(cuda_device_); - cuda_device_assert( - cuda_device_, - cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_)); + assert_success( + cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_), + "zero_to_device"); } void CUDADeviceQueue::copy_to_device(device_memory &mem) @@ -185,10 +186,10 @@ void CUDADeviceQueue::copy_to_device(device_memory &mem) /* Copy memory to device. */ const CUDAContextScope scope(cuda_device_); - cuda_device_assert( - cuda_device_, + assert_success( cuMemcpyHtoDAsync( - (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_)); + (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_), + "copy_to_device"); } void CUDADeviceQueue::copy_from_device(device_memory &mem) @@ -204,10 +205,19 @@ void CUDADeviceQueue::copy_from_device(device_memory &mem) /* Copy memory from device. */ const CUDAContextScope scope(cuda_device_); - cuda_device_assert( - cuda_device_, + assert_success( cuMemcpyDtoHAsync( - mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_)); + mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_), + "copy_from_device"); +} + +void CUDADeviceQueue::assert_success(CUresult result, const char *operation) +{ + if (result != CUDA_SUCCESS) { + const char *name = cuewErrorString(result); + cuda_device_->set_error(string_printf( + "%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str())); + } } unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create() diff --git a/intern/cycles/device/cuda/queue.h b/intern/cycles/device/cuda/queue.h index 62e3aa3d6c2..4d1995ed69e 100644 --- a/intern/cycles/device/cuda/queue.h +++ b/intern/cycles/device/cuda/queue.h @@ -60,6 +60,8 @@ class CUDADeviceQueue : public DeviceQueue { protected: CUDADevice *cuda_device_; CUstream cuda_stream_; + + void assert_success(CUresult result, const char *operation); }; CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_queue.cpp b/intern/cycles/device/device_queue.cpp index a89ba68d62c..f2b2f3496e0 100644 --- a/intern/cycles/device/device_queue.cpp +++ b/intern/cycles/device/device_queue.cpp @@ -57,8 +57,9 @@ void DeviceQueue::debug_init_execution() { if (VLOG_IS_ON(3)) { last_sync_time_ = time_dt(); - last_kernels_enqueued_ = 0; } + + last_kernels_enqueued_ = 0; } void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size) @@ -66,8 +67,9 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size) if (VLOG_IS_ON(3)) { VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size " << work_size; - last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel); } + + last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel); } void DeviceQueue::debug_synchronize() @@ -80,8 +82,14 @@ void DeviceQueue::debug_synchronize() stats_kernel_time_[last_kernels_enqueued_] += elapsed_time; last_sync_time_ = new_time; - last_kernels_enqueued_ = 0; } + + last_kernels_enqueued_ = 0; +} + +string DeviceQueue::debug_active_kernels() +{ + return device_kernel_mask_as_string(last_kernels_enqueued_); } CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_queue.h b/intern/cycles/device/device_queue.h index edda3e61d51..e6835b787cf 100644 --- a/intern/cycles/device/device_queue.h +++ b/intern/cycles/device/device_queue.h @@ -21,6 +21,7 @@ #include "device/device_graphics_interop.h" #include "util/util_logging.h" #include "util/util_map.h" +#include "util/util_string.h" #include "util/util_unique_ptr.h" CCL_NAMESPACE_BEGIN @@ -101,6 +102,7 @@ class DeviceQueue { void debug_init_execution(); void debug_enqueue(DeviceKernel kernel, const int work_size); void debug_synchronize(); + string debug_active_kernels(); /* Combination of kernels enqueued together sync last synchronize. */ DeviceKernelMask last_kernels_enqueued_; |