Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/queue.cpp')
-rw-r--r--intern/cycles/device/queue.cpp27
1 files changed, 24 insertions, 3 deletions
diff --git a/intern/cycles/device/queue.cpp b/intern/cycles/device/queue.cpp
index cc0cf0ccf84..8c997b184b6 100644
--- a/intern/cycles/device/queue.cpp
+++ b/intern/cycles/device/queue.cpp
@@ -12,9 +12,13 @@
CCL_NAMESPACE_BEGIN
DeviceQueue::DeviceQueue(Device *device)
- : device(device), last_kernels_enqueued_(0), last_sync_time_(0.0)
+ : device(device),
+ last_kernels_enqueued_(0),
+ last_sync_time_(0.0),
+ is_per_kernel_performance_(false)
{
DCHECK_NE(device, nullptr);
+ is_per_kernel_performance_ = getenv("CYCLES_DEBUG_PER_KERNEL_PERFORMANCE");
}
DeviceQueue::~DeviceQueue()
@@ -33,11 +37,17 @@ DeviceQueue::~DeviceQueue()
});
VLOG_DEVICE_STATS << "GPU queue stats:";
+ double total_time = 0.0;
for (const auto &[mask, time] : stats_sorted) {
+ total_time += time;
VLOG_DEVICE_STATS << " " << std::setfill(' ') << std::setw(10) << std::fixed
<< std::setprecision(5) << std::right << time
<< "s: " << device_kernel_mask_as_string(mask);
}
+
+ if (is_per_kernel_performance_)
+ VLOG_DEVICE_STATS << "GPU queue total time: " << std::fixed << std::setprecision(5)
+ << total_time;
}
}
@@ -50,7 +60,7 @@ void DeviceQueue::debug_init_execution()
last_kernels_enqueued_ = 0;
}
-void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
+void DeviceQueue::debug_enqueue_begin(DeviceKernel kernel, const int work_size)
{
if (VLOG_DEVICE_STATS_IS_ON) {
VLOG_DEVICE_STATS << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
@@ -60,6 +70,13 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
}
+void DeviceQueue::debug_enqueue_end()
+{
+ if (VLOG_DEVICE_STATS_IS_ON && is_per_kernel_performance_) {
+ synchronize();
+ }
+}
+
void DeviceQueue::debug_synchronize()
{
if (VLOG_DEVICE_STATS_IS_ON) {
@@ -67,7 +84,11 @@ void DeviceQueue::debug_synchronize()
const double elapsed_time = new_time - last_sync_time_;
VLOG_DEVICE_STATS << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
- stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
+ /* There is no sense to have an entries in the performance data
+ * container without related kernel information. */
+ if (last_kernels_enqueued_ != 0) {
+ stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
+ }
last_sync_time_ = new_time;
}