diff options
author | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-05-03 16:30:45 +0300 |
---|---|---|
committer | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-05-03 16:30:45 +0300 |
commit | 6bf4115c13962c99d1cdc97f2be92c4922f3fd33 (patch) | |
tree | 569c512a242caf2ea4465f2eef561933ed937a2f /intern/cycles/device | |
parent | 6f9c839f444f92c4b0c336a6f5e31cb9660d7dbc (diff) |
Cycles: Split kernel - sort shaders
Reduce thread divergence in kernel_shader_eval.
Rays are sorted in blocks of 2048 according to shader->id.
On R9 290 Classroom is ~30% faster, and Pabellon Barcelone is ~8% faster.
No sorting for CUDA split kernel.
Reviewers: sergey, maiself
Reviewed By: maiself
Differential Revision: https://developer.blender.org/D2598
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.h | 2 |
2 files changed, 10 insertions, 0 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index bb3089c5418..9118793aad6 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -39,6 +39,8 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) kernel_do_volume = NULL; kernel_queue_enqueue = NULL; kernel_indirect_background = NULL; + kernel_shader_setup = NULL; + kernel_shader_sort = NULL; kernel_shader_eval = NULL; kernel_holdout_emission_blurring_pathtermination_ao = NULL; kernel_subsurface_scatter = NULL; @@ -64,6 +66,8 @@ DeviceSplitKernel::~DeviceSplitKernel() delete kernel_do_volume; delete kernel_queue_enqueue; delete kernel_indirect_background; + delete kernel_shader_setup; + delete kernel_shader_sort; delete kernel_shader_eval; delete kernel_holdout_emission_blurring_pathtermination_ao; delete kernel_subsurface_scatter; @@ -89,6 +93,8 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe LOAD_KERNEL(do_volume); LOAD_KERNEL(queue_enqueue); LOAD_KERNEL(indirect_background); + LOAD_KERNEL(shader_setup); + LOAD_KERNEL(shader_sort); LOAD_KERNEL(shader_eval); LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); LOAD_KERNEL(subsurface_scatter); @@ -241,6 +247,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size); diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 55548122c0c..58c2fdbb077 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -61,6 +61,8 @@ private: SplitKernelFunction *kernel_do_volume; SplitKernelFunction *kernel_queue_enqueue; SplitKernelFunction *kernel_indirect_background; + SplitKernelFunction *kernel_shader_setup; + SplitKernelFunction *kernel_shader_sort; SplitKernelFunction *kernel_shader_eval; SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao; SplitKernelFunction *kernel_subsurface_scatter; |