Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorJeroen Bakker <j.bakker@atmind.nl>2019-02-19 18:31:31 +0300
committerJeroen Bakker <j.bakker@atmind.nl>2019-02-19 18:33:50 +0300
commit15edda3a8e07003bef695cca939744bbea80ad18 (patch)
tree24ea0c20bb0ef6e1a2ea8d771c12d3e86c2ecbd6 /intern
parentd6d306441fee0fbead3d48ba08166c31895de8ae (diff)
T61463: Separate Baking kernels
Cycles OpenCL: Split baking kernels in own program Fix T61463. Before this patch baking was part of the base kernels. There are 3 baking kernels that and all 3 uses shader evaluation. Only for one of these kernels the functionality was wrapped in the __NO_BAKING__ compile directive. When you start baking this leads to long compile times. By separating in individual programs will reduce the compile times. Also wrapped all baking kernels with __NO_BAKING__ to reduce the compilation times. Impact on compilation time job | scene_name | previous | new | percentage --------+-----------------+----------+-------+------------ T61463 | empty | 10.63 | 7.27 | 32% T61463 | bmw | 17.91 | 14.24 | 20% T61463 | fishycat | 19.57 | 15.08 | 23% T61463 | barbershop | 54.10 | 48.18 | 11% T61463 | classroom | 17.55 | 14.42 | 18% T61463 | koro | 18.92 | 17.15 | 9% T61463 | pavillion | 17.43 | 14.23 | 18% T61463 | splash279 | 16.48 | 15.33 | 7% T61463 | volume_emission | 36.22 | 34.19 | 6% Impact on render time job | scene_name | previous | new | percentage --------+-----------------+----------+---------+------------ T61463 | empty | 21.06 | 20.54 | 2% T61463 | bmw | 198.44 | 189.59 | 4% T61463 | fishycat | 394.20 | 388.50 | 1% T61463 | barbershop | 1188.16 | 1185.49 | 0% T61463 | classroom | 341.08 | 339.27 | 1% T61463 | koro | 472.43 | 360.70 | 24% T61463 | pavillion | 905.77 | 902.14 | 0% T61463 | splash279 | 55.26 | 54.92 | 1% T61463 | volume_emission | 62.59 | 39.09 | 38% I don't have a grounded explanation why koro and volume_emission is this much faster; I have done several tests though... Maniphest Tasks: T61463 Differential Revision: https://developer.blender.org/D4376
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/opencl/opencl.h8
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp32
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp2
-rw-r--r--intern/cycles/kernel/CMakeLists.txt3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl72
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_background.cl39
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_bake.cl38
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_displace.cl40
8 files changed, 149 insertions, 85 deletions
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index a2c0e53b3e7..766b9e4bf1a 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -325,7 +325,11 @@ public:
map<ustring, cl_kernel> kernels;
};
- OpenCLProgram base_program, denoising_program;
+ OpenCLProgram base_program;
+ OpenCLProgram bake_program;
+ OpenCLProgram displace_program;
+ OpenCLProgram background_program;
+ OpenCLProgram denoising_program;
typedef map<string, device_vector<uchar>*> ConstMemMap;
typedef map<string, device_ptr> MemMap;
@@ -571,7 +575,7 @@ protected:
ustring key,
thread_scoped_lock& cache_locker);
- virtual string build_options_for_base_program(
+ virtual string build_options_for_bake_program(
const DeviceRequestedFeatures& /*requested_features*/);
private:
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index d8f9a242ac8..6a47a60e915 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
}
base_program.release();
+ bake_program.release();
+ displace_program.release();
+ background_program.release();
if(cqCommandQueue)
clReleaseCommandQueue(cqCommandQueue);
if(cxContext)
@@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
if(!opencl_version_check())
return false;
- base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
+ base_program = OpenCLProgram(this, "base", "kernel.cl", "");
base_program.add_kernel(ustring("convert_to_byte"));
base_program.add_kernel(ustring("convert_to_half_float"));
- base_program.add_kernel(ustring("displace"));
- base_program.add_kernel(ustring("background"));
- base_program.add_kernel(ustring("bake"));
base_program.add_kernel(ustring("zero_buffer"));
+ bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features));
+ bake_program.add_kernel(ustring("bake"));
+
+ displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features));
+ displace_program.add_kernel(ustring("displace"));
+
+ background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features));
+ background_program.add_kernel(ustring("background"));
+
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
denoising_program.add_kernel(ustring("filter_divide_shadow"));
denoising_program.add_kernel(ustring("filter_get_feature"));
@@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
denoising_program.add_kernel(ustring("filter_finalize"));
vector<OpenCLProgram*> programs;
- programs.push_back(&base_program);
- programs.push_back(&denoising_program);
+ programs.push_back(&bake_program);
+ programs.push_back(&displace_program);
+ programs.push_back(&background_program);
/* Call actual class to fill the vector with its programs. */
if(!add_kernel_programs(requested_features, programs)) {
return false;
}
+ programs.push_back(&base_program);
+ programs.push_back(&denoising_program);
/* Parallel compilation of Cycles kernels, this launches multiple
* processes to workaround OpenCL frameworks serializing the calls
@@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
cl_kernel kernel;
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
- kernel = base_program(ustring("bake"));
+ kernel = bake_program(ustring("bake"));
}
else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
- kernel = base_program(ustring("displace"));
+ kernel = displace_program(ustring("displace"));
}
else {
- kernel = base_program(ustring("background"));
+ kernel = background_program(ustring("background"));
}
cl_uint start_arg_index =
@@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel(
cache_locker);
}
-string OpenCLDeviceBase::build_options_for_base_program(
+string OpenCLDeviceBase::build_options_for_bake_program(
const DeviceRequestedFeatures& requested_features)
{
/* TODO(sergey): By default we compile all features, meaning
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index b759f69d3ab..c9d3eb2eb8c 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -327,7 +327,7 @@ public:
protected:
/* ** Those guys are for workign around some compiler-specific bugs ** */
- string build_options_for_base_program(
+ string build_options_for_bake_program(
const DeviceRequestedFeatures& requested_features)
{
return requested_features.get_build_options();
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f7041ee2783..0a2acd3f669 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -37,6 +37,9 @@ set(SRC_CUDA_KERNELS
set(SRC_OPENCL_KERNELS
kernels/opencl/kernel.cl
+ kernels/opencl/kernel_bake.cl
+ kernels/opencl/kernel_displace.cl
+ kernels/opencl/kernel_background.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
kernels/opencl/kernel_split_bundle.cl
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index de1f5088629..aa837e2ae87 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -81,78 +81,6 @@ __kernel void kernel_ocl_path_trace(
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
-__kernel void kernel_ocl_displace(
- ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
-
- KERNEL_BUFFER_PARAMS,
-
- int type, int sx, int sw, int offset, int sample)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
-
- if(x < sx + sw) {
- kernel_displace_evaluate(kg, input, output, x);
- }
-}
-__kernel void kernel_ocl_background(
- ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
-
- KERNEL_BUFFER_PARAMS,
-
- int type, int sx, int sw, int offset, int sample)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
-
- if(x < sx + sw) {
- kernel_background_evaluate(kg, input, output, x);
- }
-}
-
-__kernel void kernel_ocl_bake(
- ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
-
- KERNEL_BUFFER_PARAMS,
-
- int type, int filter, int sx, int sw, int offset, int sample)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
-
- if(x < sx + sw) {
-#ifdef __NO_BAKING__
- output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#else
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
-#endif
- }
-}
-
__kernel void kernel_ocl_convert_to_byte(
ccl_constant KernelData *data,
ccl_global uchar4 *rgba,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_background.cl
new file mode 100644
index 00000000000..c7c709c0ad7
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_background.cl
@@ -0,0 +1,39 @@
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/kernel_math.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_color.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_path_branched.h"
+
+#include "kernel/kernel_bake.h"
+
+__kernel void kernel_ocl_background(
+ ccl_constant KernelData *data,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+
+ KERNEL_BUFFER_PARAMS,
+
+ int type, int sx, int sw, int offset, int sample)
+{
+ KernelGlobals kglobals, *kg = &kglobals;
+
+ kg->data = data;
+
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
+
+ int x = sx + ccl_global_id(0);
+
+ if(x < sx + sw) {
+#ifdef __NO_BAKING__
+ output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+ kernel_background_evaluate(kg, input, output, x);
+#endif
+ }
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl
new file mode 100644
index 00000000000..041312b53cb
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl
@@ -0,0 +1,38 @@
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/kernel_math.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_color.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_path_branched.h"
+
+#include "kernel/kernel_bake.h"
+
+__kernel void kernel_ocl_bake(
+ ccl_constant KernelData *data,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+
+ KERNEL_BUFFER_PARAMS,
+
+ int type, int filter, int sx, int sw, int offset, int sample)
+{
+ KernelGlobals kglobals, *kg = &kglobals;
+
+ kg->data = data;
+
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
+
+ int x = sx + ccl_global_id(0);
+
+ if(x < sx + sw) {
+#ifdef __NO_BAKING__
+ output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
+#endif
+ }
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl
new file mode 100644
index 00000000000..288bfd5eadc
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl
@@ -0,0 +1,40 @@
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/kernel_math.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_color.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_path_branched.h"
+
+#include "kernel/kernel_bake.h
+
+__kernel void kernel_ocl_displace(
+ ccl_constant KernelData *data,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+
+ KERNEL_BUFFER_PARAMS,
+
+ int type, int sx, int sw, int offset, int sample)
+{
+ KernelGlobals kglobals, *kg = &kglobals;
+
+ kg->data = data;
+
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
+
+ int x = sx + ccl_global_id(0);
+
+ if(x < sx + sw) {
+#ifdef __NO_BAKING__
+ output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+ kernel_displace_evaluate(kg, input, output, x);
+#endif
+ }
+}
+