Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHristo Gueorguiev <prem.nirved@gmail.com>2017-05-03 16:30:45 +0300
committerHristo Gueorguiev <prem.nirved@gmail.com>2017-05-03 16:30:45 +0300
commit6bf4115c13962c99d1cdc97f2be92c4922f3fd33 (patch)
tree569c512a242caf2ea4465f2eef561933ed937a2f /intern/cycles/kernel/kernels
parent6f9c839f444f92c4b0c336a6f5e31cb9660d7dbc (diff)
Cycles: Split kernel - sort shaders
Reduce thread divergence in kernel_shader_eval. Rays are sorted in blocks of 2048 according to shader->id. On R9 290 Classroom is ~30% faster, and Pabellon Barcelone is ~8% faster. No sorting for CUDA split kernel. Reviewers: sergey, maiself Reviewed By: maiself Differential Revision: https://developer.blender.org/D2598
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h8
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu6
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl27
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl28
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split.cl2
7 files changed, 72 insertions, 4 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 896b80d783e..39c9a9cf33c 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -77,6 +77,8 @@ DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
+DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup)
+DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort)
DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 96f54bb427e..8c05dd1d9ef 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -44,6 +44,8 @@
# include "kernel/split/kernel_do_volume.h"
# include "kernel/split/kernel_queue_enqueue.h"
# include "kernel/split/kernel_indirect_background.h"
+# include "kernel/split/kernel_shader_setup.h"
+# include "kernel/split/kernel_shader_sort.h"
# include "kernel/split/kernel_shader_eval.h"
# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
# include "kernel/split/kernel_subsurface_scatter.h"
@@ -181,7 +183,9 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
@@ -209,6 +213,8 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
REGISTER(do_volume);
REGISTER(queue_enqueue);
REGISTER(indirect_background);
+ REGISTER(shader_setup);
+ REGISTER(shader_sort);
REGISTER(shader_eval);
REGISTER(holdout_emission_blurring_pathtermination_ao);
REGISTER(subsurface_scatter);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 585b91876a9..8b7f1a8d405 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -31,6 +31,8 @@
#include "kernel/split/kernel_do_volume.h"
#include "kernel/split/kernel_queue_enqueue.h"
#include "kernel/split/kernel_indirect_background.h"
+#include "kernel/split/kernel_shader_setup.h"
+#include "kernel/split/kernel_shader_sort.h"
#include "kernel/split/kernel_shader_eval.h"
#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
#include "kernel/split/kernel_subsurface_scatter.h"
@@ -108,7 +110,9 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
index 6baee460986..5bfb31b193a 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -22,6 +22,5 @@ __kernel void kernel_ocl_path_trace_shader_eval(
ccl_global char *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics;
- kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics);
+ kernel_shader_eval((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
new file mode 100644
index 00000000000..38bfd04ad4c
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_shader_setup.h"
+
+__kernel void kernel_ocl_path_trace_shader_setup(
+ ccl_global char *kg,
+ ccl_constant KernelData *data)
+{
+ ccl_local unsigned int local_queue_atomics;
+ kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
new file mode 100644
index 00000000000..6f722915d45
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_shader_sort.h"
+
+__attribute__((reqd_work_group_size(64, 1, 1)))
+__kernel void kernel_ocl_path_trace_shader_sort(
+ ccl_global char *kg,
+ ccl_constant KernelData *data)
+{
+ ccl_local ShaderSortLocals locals;
+ kernel_shader_sort((KernelGlobals*)kg, &locals);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
index 732cda30115..8de82db7afe 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
@@ -23,6 +23,8 @@
#include "kernel/kernels/opencl/kernel_do_volume.cl"
#include "kernel/kernels/opencl/kernel_indirect_background.cl"
#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
+#include "kernel/kernels/opencl/kernel_shader_setup.cl"
+#include "kernel/kernels/opencl/kernel_shader_sort.cl"
#include "kernel/kernels/opencl/kernel_shader_eval.cl"
#include "kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl"
#include "kernel/kernels/opencl/kernel_subsurface_scatter.cl"