diff options
Diffstat (limited to 'intern/cycles/device/queue.cpp')
-rw-r--r-- | intern/cycles/device/queue.cpp | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/intern/cycles/device/queue.cpp b/intern/cycles/device/queue.cpp index cc0cf0ccf84..8c997b184b6 100644 --- a/intern/cycles/device/queue.cpp +++ b/intern/cycles/device/queue.cpp @@ -12,9 +12,13 @@ CCL_NAMESPACE_BEGIN DeviceQueue::DeviceQueue(Device *device) - : device(device), last_kernels_enqueued_(0), last_sync_time_(0.0) + : device(device), + last_kernels_enqueued_(0), + last_sync_time_(0.0), + is_per_kernel_performance_(false) { DCHECK_NE(device, nullptr); + is_per_kernel_performance_ = getenv("CYCLES_DEBUG_PER_KERNEL_PERFORMANCE"); } DeviceQueue::~DeviceQueue() @@ -33,11 +37,17 @@ DeviceQueue::~DeviceQueue() }); VLOG_DEVICE_STATS << "GPU queue stats:"; + double total_time = 0.0; for (const auto &[mask, time] : stats_sorted) { + total_time += time; VLOG_DEVICE_STATS << " " << std::setfill(' ') << std::setw(10) << std::fixed << std::setprecision(5) << std::right << time << "s: " << device_kernel_mask_as_string(mask); } + + if (is_per_kernel_performance_) + VLOG_DEVICE_STATS << "GPU queue total time: " << std::fixed << std::setprecision(5) + << total_time; } } @@ -50,7 +60,7 @@ void DeviceQueue::debug_init_execution() last_kernels_enqueued_ = 0; } -void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size) +void DeviceQueue::debug_enqueue_begin(DeviceKernel kernel, const int work_size) { if (VLOG_DEVICE_STATS_IS_ON) { VLOG_DEVICE_STATS << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size " @@ -60,6 +70,13 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size) last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel); } +void DeviceQueue::debug_enqueue_end() +{ + if (VLOG_DEVICE_STATS_IS_ON && is_per_kernel_performance_) { + synchronize(); + } +} + void DeviceQueue::debug_synchronize() { if (VLOG_DEVICE_STATS_IS_ON) { @@ -67,7 +84,11 @@ void DeviceQueue::debug_synchronize() const double elapsed_time = new_time - last_sync_time_; VLOG_DEVICE_STATS << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s"; - stats_kernel_time_[last_kernels_enqueued_] += elapsed_time; + /* There is no sense to have an entries in the performance data + * container without related kernel information. */ + if (last_kernels_enqueued_ != 0) { + stats_kernel_time_[last_kernels_enqueued_] += elapsed_time; + } last_sync_time_ = new_time; } |