Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.cpp3
-rw-r--r--intern/cycles/integrator/tile.cpp5
-rw-r--r--intern/cycles/integrator/tile.h3
-rw-r--r--intern/cycles/integrator/work_tile_scheduler.cpp7
-rw-r--r--intern/cycles/integrator/work_tile_scheduler.h6
-rw-r--r--intern/cycles/kernel/device/gpu/work_stealing.h25
-rw-r--r--intern/cycles/test/integrator_tile_test.cpp16
7 files changed, 41 insertions, 24 deletions
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index b9784f68f56..aff21ef59bb 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -257,7 +257,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
* become busy after adding new tiles). This is especially important for the shadow catcher which
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
-
+ work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
+ 0);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,
diff --git a/intern/cycles/integrator/tile.cpp b/intern/cycles/integrator/tile.cpp
index 4a1558cce09..e9a3cbd38aa 100644
--- a/intern/cycles/integrator/tile.cpp
+++ b/intern/cycles/integrator/tile.cpp
@@ -46,7 +46,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
return next_power_of_two(x);
}
-TileSize tile_calculate_best_size(const int2 &image_size,
+TileSize tile_calculate_best_size(const bool accel_rt,
+ const int2 &image_size,
const int num_samples,
const int max_num_path_states,
const float scrambling_distance)
@@ -73,7 +74,7 @@ TileSize tile_calculate_best_size(const int2 &image_size,
TileSize tile_size;
const int num_path_states_per_sample = max_num_path_states / num_samples;
- if (scrambling_distance < 0.9f) {
+ if (scrambling_distance < 0.9f && accel_rt) {
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
tile_size.width = min(image_size.x, max_num_path_states);
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
diff --git a/intern/cycles/integrator/tile.h b/intern/cycles/integrator/tile.h
index 61f7d736115..05b1e0af6b1 100644
--- a/intern/cycles/integrator/tile.h
+++ b/intern/cycles/integrator/tile.h
@@ -49,7 +49,8 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
* of active path states.
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
* possible, and have as many threads active for every tile as possible. */
-TileSize tile_calculate_best_size(const int2 &image_size,
+TileSize tile_calculate_best_size(const bool accel_rt,
+ const int2 &image_size,
const int num_samples,
const int max_num_path_states,
const float scrambling_distance);
diff --git a/intern/cycles/integrator/work_tile_scheduler.cpp b/intern/cycles/integrator/work_tile_scheduler.cpp
index 2d1ac07db7f..cac573dfeda 100644
--- a/intern/cycles/integrator/work_tile_scheduler.cpp
+++ b/intern/cycles/integrator/work_tile_scheduler.cpp
@@ -28,6 +28,11 @@ WorkTileScheduler::WorkTileScheduler()
{
}
+void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
+{
+ accelerated_rt_ = accelerated_rt;
+}
+
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
{
max_num_path_states_ = max_num_path_states;
@@ -59,7 +64,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
void WorkTileScheduler::reset_scheduler_state()
{
tile_size_ = tile_calculate_best_size(
- image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
+ accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
VLOG(3) << "Will schedule tiles of size " << tile_size_;
diff --git a/intern/cycles/integrator/work_tile_scheduler.h b/intern/cycles/integrator/work_tile_scheduler.h
index d9fa7e84431..8aa2f8e90bd 100644
--- a/intern/cycles/integrator/work_tile_scheduler.h
+++ b/intern/cycles/integrator/work_tile_scheduler.h
@@ -31,6 +31,9 @@ class WorkTileScheduler {
public:
WorkTileScheduler();
+ /* To indicate if there is accelerated RT support. */
+ void set_accelerated_rt(bool state);
+
/* MAximum path states which are allowed to be used by a single scheduled work tile.
*
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
@@ -54,6 +57,9 @@ class WorkTileScheduler {
protected:
void reset_scheduler_state();
+ /* Used to indicate if there is accelerated ray tracing. */
+ bool accelerated_rt_ = false;
+
/* Maximum allowed path states to be used.
*
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
diff --git a/intern/cycles/kernel/device/gpu/work_stealing.h b/intern/cycles/kernel/device/gpu/work_stealing.h
index fab0915c38e..c3083948057 100644
--- a/intern/cycles/kernel/device/gpu/work_stealing.h
+++ b/intern/cycles/kernel/device/gpu/work_stealing.h
@@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile,
ccl_private uint *y,
ccl_private uint *sample)
{
-#if 0
- /* Keep threads for the same sample together. */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#else
- /* Keeping threads for the same pixel together.
- * Appears to improve performance by a few % on CUDA and OptiX. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#endif
+ uint sample_offset, pixel_offset;
+
+ if (kernel_data.integrator.scrambling_distance < 0.9f) {
+ /* Keep threads for the same sample together. */
+ uint tile_pixels = tile->w * tile->h;
+ sample_offset = global_work_index / tile_pixels;
+ pixel_offset = global_work_index - sample_offset * tile_pixels;
+ }
+ else {
+ /* Keeping threads for the same pixel together.
+ * Appears to improve performance by a few % on CUDA and OptiX. */
+ sample_offset = global_work_index % tile->num_samples;
+ pixel_offset = global_work_index / tile->num_samples;
+ }
uint y_offset = pixel_offset / tile->w;
uint x_offset = pixel_offset - y_offset * tile->w;
diff --git a/intern/cycles/test/integrator_tile_test.cpp b/intern/cycles/test/integrator_tile_test.cpp
index 8bb0856d6a9..822c34c36bf 100644
--- a/intern/cycles/test/integrator_tile_test.cpp
+++ b/intern/cycles/test/integrator_tile_test.cpp
@@ -24,26 +24,26 @@ CCL_NAMESPACE_BEGIN
TEST(tile_calculate_best_size, Basic)
{
/* Make sure CPU-like case is handled properly. */
- EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
- EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
/* Enough path states to fit an entire image with all samples. */
- EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
TileSize(1920, 1080, 1));
- EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
TileSize(1920, 1080, 100));
}
TEST(tile_calculate_best_size, Extreme)
{
- EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072, 1.0f),
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 262144, 131072, 1.0f),
TileSize(1, 1, 512));
- EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072, 1.0f),
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 1048576, 131072, 1.0f),
TileSize(1, 1, 1024));
- EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072, 1.0f),
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 10485760, 131072, 1.0f),
TileSize(1, 1, 4096));
- EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
+ EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
TileSize(1, 1, 1024));
}