diff options
author | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-05-03 16:30:45 +0300 |
---|---|---|
committer | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-05-03 16:30:45 +0300 |
commit | 6bf4115c13962c99d1cdc97f2be92c4922f3fd33 (patch) | |
tree | 569c512a242caf2ea4465f2eef561933ed937a2f /intern/cycles/kernel/kernels | |
parent | 6f9c839f444f92c4b0c336a6f5e31cb9660d7dbc (diff) |
Cycles: Split kernel - sort shaders
Reduce thread divergence in kernel_shader_eval.
Rays are sorted in blocks of 2048 according to shader->id.
On R9 290 Classroom is ~30% faster, and Pabellon Barcelone is ~8% faster.
No sorting for CUDA split kernel.
Reviewers: sergey, maiself
Reviewed By: maiself
Differential Revision: https://developer.blender.org/D2598
Diffstat (limited to 'intern/cycles/kernel/kernels')
7 files changed, 72 insertions, 4 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 896b80d783e..39c9a9cf33c 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -77,6 +77,8 @@ DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) DECLARE_SPLIT_KERNEL_FUNCTION(do_volume) DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort) DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 96f54bb427e..8c05dd1d9ef 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -44,6 +44,8 @@ # include "kernel/split/kernel_do_volume.h" # include "kernel/split/kernel_queue_enqueue.h" # include "kernel/split/kernel_indirect_background.h" +# include "kernel/split/kernel_shader_setup.h" +# include "kernel/split/kernel_shader_sort.h" # include "kernel/split/kernel_shader_eval.h" # include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" # include "kernel/split/kernel_subsurface_scatter.h" @@ -181,7 +183,9 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) @@ -209,6 +213,8 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, REGISTER(do_volume); REGISTER(queue_enqueue); REGISTER(indirect_background); + REGISTER(shader_setup); + REGISTER(shader_sort); REGISTER(shader_eval); REGISTER(holdout_emission_blurring_pathtermination_ao); REGISTER(subsurface_scatter); diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 585b91876a9..8b7f1a8d405 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -31,6 +31,8 @@ #include "kernel/split/kernel_do_volume.h" #include "kernel/split/kernel_queue_enqueue.h" #include "kernel/split/kernel_indirect_background.h" +#include "kernel/split/kernel_shader_setup.h" +#include "kernel/split/kernel_shader_sort.h" #include "kernel/split/kernel_shader_eval.h" #include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" #include "kernel/split/kernel_subsurface_scatter.h" @@ -108,7 +110,9 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index 6baee460986..5bfb31b193a 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -22,6 +22,5 @@ __kernel void kernel_ocl_path_trace_shader_eval( ccl_global char *kg, ccl_constant KernelData *data) { - ccl_local unsigned int local_queue_atomics; - kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics); + kernel_shader_eval((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl new file mode 100644 index 00000000000..38bfd04ad4c --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl @@ -0,0 +1,27 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_shader_setup.h" + +__kernel void kernel_ocl_path_trace_shader_setup( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + ccl_local unsigned int local_queue_atomics; + kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl new file mode 100644 index 00000000000..6f722915d45 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl @@ -0,0 +1,28 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_shader_sort.h" + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void kernel_ocl_path_trace_shader_sort( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + ccl_local ShaderSortLocals locals; + kernel_shader_sort((KernelGlobals*)kg, &locals); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl index 732cda30115..8de82db7afe 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -23,6 +23,8 @@ #include "kernel/kernels/opencl/kernel_do_volume.cl" #include "kernel/kernels/opencl/kernel_indirect_background.cl" #include "kernel/kernels/opencl/kernel_queue_enqueue.cl" +#include "kernel/kernels/opencl/kernel_shader_setup.cl" +#include "kernel/kernels/opencl/kernel_shader_sort.cl" #include "kernel/kernels/opencl/kernel_shader_eval.cl" #include "kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" #include "kernel/kernels/opencl/kernel_subsurface_scatter.cl" |