diff options
-rw-r--r-- | intern/cycles/kernel/kernel_work_stealing.h | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h index 0c2d9379b63..9667156eaf5 100644 --- a/intern/cycles/kernel/kernel_work_stealing.h +++ b/intern/cycles/kernel/kernel_work_stealing.h @@ -66,9 +66,15 @@ ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, ccl_private uint *y, ccl_private uint *sample) { +#ifdef __KERNEL_CUDA__ + /* Keeping threads for the same pixel together improves performance on CUDA. */ + uint sample_offset = global_work_index % tile->num_samples; + uint pixel_offset = global_work_index / tile->num_samples; +#else /* __KERNEL_CUDA__ */ uint tile_pixels = tile->w * tile->h; uint sample_offset = global_work_index / tile_pixels; uint pixel_offset = global_work_index - sample_offset * tile_pixels; +#endif /* __KERNEL_CUDA__ */ uint y_offset = pixel_offset / tile->w; uint x_offset = pixel_offset - y_offset * tile->w; |