From b41c72b710d4013fd6d67dc49a8ebb2a416b4462 Mon Sep 17 00:00:00 2001 From: Alaska Date: Thu, 25 Nov 2021 09:20:28 +0100 Subject: Fix performance decrease with Scrambling Distance on With the current code in master, scrambling distance is enabled on non-hardware accelerated ray tracing devices see a measurable performance decrease when compared scrambling distance on vs off. From testing, this performance decrease comes from the large tile sizes scheduled in `tile.cpp`. This patch attempts to address the performance decrease by using different algorithms to calculate the tile size for devices with hardware accelerated ray traversal and devices without. Large tile sizes for hardware accelerated devices and small tile sizes for others. Most of this code is based on proposals from @brecht and @leesonw Reviewed By: brecht, leesonw Differential Revision: https://developer.blender.org/D13042 --- intern/cycles/integrator/path_trace_work_gpu.cpp | 3 ++- intern/cycles/integrator/tile.cpp | 5 +++-- intern/cycles/integrator/tile.h | 3 ++- intern/cycles/integrator/work_tile_scheduler.cpp | 7 ++++++- intern/cycles/integrator/work_tile_scheduler.h | 6 ++++++ 5 files changed, 19 insertions(+), 5 deletions(-) (limited to 'intern/cycles/integrator') diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index b9784f68f56..aff21ef59bb 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -257,7 +257,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics, * become busy after adding new tiles). This is especially important for the shadow catcher which * schedules work in halves of available number of paths. */ work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8); - + work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) != + 0); work_tile_scheduler_.reset(effective_buffer_params_, start_sample, samples_num, diff --git a/intern/cycles/integrator/tile.cpp b/intern/cycles/integrator/tile.cpp index 4a1558cce09..e9a3cbd38aa 100644 --- a/intern/cycles/integrator/tile.cpp +++ b/intern/cycles/integrator/tile.cpp @@ -46,7 +46,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x) return next_power_of_two(x); } -TileSize tile_calculate_best_size(const int2 &image_size, +TileSize tile_calculate_best_size(const bool accel_rt, + const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance) @@ -73,7 +74,7 @@ TileSize tile_calculate_best_size(const int2 &image_size, TileSize tile_size; const int num_path_states_per_sample = max_num_path_states / num_samples; - if (scrambling_distance < 0.9f) { + if (scrambling_distance < 0.9f && accel_rt) { /* Prefer large tiles for scrambling distance, bounded by max num path states. */ tile_size.width = min(image_size.x, max_num_path_states); tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1)); diff --git a/intern/cycles/integrator/tile.h b/intern/cycles/integrator/tile.h index 61f7d736115..05b1e0af6b1 100644 --- a/intern/cycles/integrator/tile.h +++ b/intern/cycles/integrator/tile.h @@ -49,7 +49,8 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size); * of active path states. * Will attempt to provide best guess to keep path tracing threads of a device as localized as * possible, and have as many threads active for every tile as possible. */ -TileSize tile_calculate_best_size(const int2 &image_size, +TileSize tile_calculate_best_size(const bool accel_rt, + const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance); diff --git a/intern/cycles/integrator/work_tile_scheduler.cpp b/intern/cycles/integrator/work_tile_scheduler.cpp index 2d1ac07db7f..cac573dfeda 100644 --- a/intern/cycles/integrator/work_tile_scheduler.cpp +++ b/intern/cycles/integrator/work_tile_scheduler.cpp @@ -28,6 +28,11 @@ WorkTileScheduler::WorkTileScheduler() { } +void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt) +{ + accelerated_rt_ = accelerated_rt; +} + void WorkTileScheduler::set_max_num_path_states(int max_num_path_states) { max_num_path_states_ = max_num_path_states; @@ -59,7 +64,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params, void WorkTileScheduler::reset_scheduler_state() { tile_size_ = tile_calculate_best_size( - image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_); + accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_); VLOG(3) << "Will schedule tiles of size " << tile_size_; diff --git a/intern/cycles/integrator/work_tile_scheduler.h b/intern/cycles/integrator/work_tile_scheduler.h index d9fa7e84431..8aa2f8e90bd 100644 --- a/intern/cycles/integrator/work_tile_scheduler.h +++ b/intern/cycles/integrator/work_tile_scheduler.h @@ -31,6 +31,9 @@ class WorkTileScheduler { public: WorkTileScheduler(); + /* To indicate if there is accelerated RT support. */ + void set_accelerated_rt(bool state); + /* MAximum path states which are allowed to be used by a single scheduled work tile. * * Affects the scheduled work size: the work size will be as big as possible, but will not exceed @@ -54,6 +57,9 @@ class WorkTileScheduler { protected: void reset_scheduler_state(); + /* Used to indicate if there is accelerated ray tracing. */ + bool accelerated_rt_ = false; + /* Maximum allowed path states to be used. * * TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the -- cgit v1.2.3