diff options
Diffstat (limited to 'intern/cycles/kernel/device/gpu/work_stealing.h')
-rw-r--r-- | intern/cycles/kernel/device/gpu/work_stealing.h | 25 |
1 files changed, 14 insertions, 11 deletions
diff --git a/intern/cycles/kernel/device/gpu/work_stealing.h b/intern/cycles/kernel/device/gpu/work_stealing.h index fab0915c38e..c3083948057 100644 --- a/intern/cycles/kernel/device/gpu/work_stealing.h +++ b/intern/cycles/kernel/device/gpu/work_stealing.h @@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile, ccl_private uint *y, ccl_private uint *sample) { -#if 0 - /* Keep threads for the same sample together. */ - uint tile_pixels = tile->w * tile->h; - uint sample_offset = global_work_index / tile_pixels; - uint pixel_offset = global_work_index - sample_offset * tile_pixels; -#else - /* Keeping threads for the same pixel together. - * Appears to improve performance by a few % on CUDA and OptiX. */ - uint sample_offset = global_work_index % tile->num_samples; - uint pixel_offset = global_work_index / tile->num_samples; -#endif + uint sample_offset, pixel_offset; + + if (kernel_data.integrator.scrambling_distance < 0.9f) { + /* Keep threads for the same sample together. */ + uint tile_pixels = tile->w * tile->h; + sample_offset = global_work_index / tile_pixels; + pixel_offset = global_work_index - sample_offset * tile_pixels; + } + else { + /* Keeping threads for the same pixel together. + * Appears to improve performance by a few % on CUDA and OptiX. */ + sample_offset = global_work_index % tile->num_samples; + pixel_offset = global_work_index / tile->num_samples; + } uint y_offset = pixel_offset / tile->w; uint x_offset = pixel_offset - y_offset * tile->w; |