Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHristo Gueorguiev <prem.nirved@gmail.com>2017-05-03 16:30:45 +0300
committerHristo Gueorguiev <prem.nirved@gmail.com>2017-05-03 16:30:45 +0300
commit6bf4115c13962c99d1cdc97f2be92c4922f3fd33 (patch)
tree569c512a242caf2ea4465f2eef561933ed937a2f /intern/cycles/device
parent6f9c839f444f92c4b0c336a6f5e31cb9660d7dbc (diff)
Cycles: Split kernel - sort shaders
Reduce thread divergence in kernel_shader_eval. Rays are sorted in blocks of 2048 according to shader->id. On R9 290 Classroom is ~30% faster, and Pabellon Barcelone is ~8% faster. No sorting for CUDA split kernel. Reviewers: sergey, maiself Reviewed By: maiself Differential Revision: https://developer.blender.org/D2598
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_split_kernel.cpp8
-rw-r--r--intern/cycles/device/device_split_kernel.h2
2 files changed, 10 insertions, 0 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index bb3089c5418..9118793aad6 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -39,6 +39,8 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
kernel_do_volume = NULL;
kernel_queue_enqueue = NULL;
kernel_indirect_background = NULL;
+ kernel_shader_setup = NULL;
+ kernel_shader_sort = NULL;
kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
@@ -64,6 +66,8 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_do_volume;
delete kernel_queue_enqueue;
delete kernel_indirect_background;
+ delete kernel_shader_setup;
+ delete kernel_shader_sort;
delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
@@ -89,6 +93,8 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
LOAD_KERNEL(do_volume);
LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(indirect_background);
+ LOAD_KERNEL(shader_setup);
+ LOAD_KERNEL(shader_sort);
LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
@@ -241,6 +247,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 55548122c0c..58c2fdbb077 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -61,6 +61,8 @@ private:
SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_indirect_background;
+ SplitKernelFunction *kernel_shader_setup;
+ SplitKernelFunction *kernel_shader_sort;
SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;