diff options
author | Jeroen Bakker <j.bakker@atmind.nl> | 2019-02-19 18:31:31 +0300 |
---|---|---|
committer | Jeroen Bakker <j.bakker@atmind.nl> | 2019-02-19 18:33:50 +0300 |
commit | 15edda3a8e07003bef695cca939744bbea80ad18 (patch) | |
tree | 24ea0c20bb0ef6e1a2ea8d771c12d3e86c2ecbd6 /intern/cycles/device | |
parent | d6d306441fee0fbead3d48ba08166c31895de8ae (diff) |
T61463: Separate Baking kernels
Cycles OpenCL: Split baking kernels in own program
Fix T61463. Before this patch baking was part of the base kernels. There
are 3 baking kernels that and all 3 uses shader evaluation. Only for one
of these kernels the functionality was wrapped in the __NO_BAKING__
compile directive.
When you start baking this leads to long compile times. By separating
in individual programs will reduce the compile times.
Also wrapped all baking kernels with __NO_BAKING__ to reduce the
compilation times.
Impact on compilation time
job | scene_name | previous | new | percentage
--------+-----------------+----------+-------+------------
T61463 | empty | 10.63 | 7.27 | 32%
T61463 | bmw | 17.91 | 14.24 | 20%
T61463 | fishycat | 19.57 | 15.08 | 23%
T61463 | barbershop | 54.10 | 48.18 | 11%
T61463 | classroom | 17.55 | 14.42 | 18%
T61463 | koro | 18.92 | 17.15 | 9%
T61463 | pavillion | 17.43 | 14.23 | 18%
T61463 | splash279 | 16.48 | 15.33 | 7%
T61463 | volume_emission | 36.22 | 34.19 | 6%
Impact on render time
job | scene_name | previous | new | percentage
--------+-----------------+----------+---------+------------
T61463 | empty | 21.06 | 20.54 | 2%
T61463 | bmw | 198.44 | 189.59 | 4%
T61463 | fishycat | 394.20 | 388.50 | 1%
T61463 | barbershop | 1188.16 | 1185.49 | 0%
T61463 | classroom | 341.08 | 339.27 | 1%
T61463 | koro | 472.43 | 360.70 | 24%
T61463 | pavillion | 905.77 | 902.14 | 0%
T61463 | splash279 | 55.26 | 54.92 | 1%
T61463 | volume_emission | 62.59 | 39.09 | 38%
I don't have a grounded explanation why koro and volume_emission is this much
faster; I have done several tests though...
Maniphest Tasks: T61463
Differential Revision: https://developer.blender.org/D4376
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 8 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 32 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 2 |
3 files changed, 29 insertions, 13 deletions
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index a2c0e53b3e7..766b9e4bf1a 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -325,7 +325,11 @@ public: map<ustring, cl_kernel> kernels; }; - OpenCLProgram base_program, denoising_program; + OpenCLProgram base_program; + OpenCLProgram bake_program; + OpenCLProgram displace_program; + OpenCLProgram background_program; + OpenCLProgram denoising_program; typedef map<string, device_vector<uchar>*> ConstMemMap; typedef map<string, device_ptr> MemMap; @@ -571,7 +575,7 @@ protected: ustring key, thread_scoped_lock& cache_locker); - virtual string build_options_for_base_program( + virtual string build_options_for_bake_program( const DeviceRequestedFeatures& /*requested_features*/); private: diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index d8f9a242ac8..6a47a60e915 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase() } base_program.release(); + bake_program.release(); + displace_program.release(); + background_program.release(); if(cqCommandQueue) clReleaseCommandQueue(cqCommandQueue); if(cxContext) @@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea if(!opencl_version_check()) return false; - base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features)); + base_program = OpenCLProgram(this, "base", "kernel.cl", ""); base_program.add_kernel(ustring("convert_to_byte")); base_program.add_kernel(ustring("convert_to_half_float")); - base_program.add_kernel(ustring("displace")); - base_program.add_kernel(ustring("background")); - base_program.add_kernel(ustring("bake")); base_program.add_kernel(ustring("zero_buffer")); + bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features)); + bake_program.add_kernel(ustring("bake")); + + displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features)); + displace_program.add_kernel(ustring("displace")); + + background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features)); + background_program.add_kernel(ustring("background")); + denoising_program = OpenCLProgram(this, "denoising", "filter.cl", ""); denoising_program.add_kernel(ustring("filter_divide_shadow")); denoising_program.add_kernel(ustring("filter_get_feature")); @@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea denoising_program.add_kernel(ustring("filter_finalize")); vector<OpenCLProgram*> programs; - programs.push_back(&base_program); - programs.push_back(&denoising_program); + programs.push_back(&bake_program); + programs.push_back(&displace_program); + programs.push_back(&background_program); /* Call actual class to fill the vector with its programs. */ if(!add_kernel_programs(requested_features, programs)) { return false; } + programs.push_back(&base_program); + programs.push_back(&denoising_program); /* Parallel compilation of Cycles kernels, this launches multiple * processes to workaround OpenCL frameworks serializing the calls @@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task) cl_kernel kernel; if(task.shader_eval_type >= SHADER_EVAL_BAKE) { - kernel = base_program(ustring("bake")); + kernel = bake_program(ustring("bake")); } else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) { - kernel = base_program(ustring("displace")); + kernel = displace_program(ustring("displace")); } else { - kernel = base_program(ustring("background")); + kernel = background_program(ustring("background")); } cl_uint start_arg_index = @@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel( cache_locker); } -string OpenCLDeviceBase::build_options_for_base_program( +string OpenCLDeviceBase::build_options_for_bake_program( const DeviceRequestedFeatures& requested_features) { /* TODO(sergey): By default we compile all features, meaning diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index b759f69d3ab..c9d3eb2eb8c 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -327,7 +327,7 @@ public: protected: /* ** Those guys are for workign around some compiler-specific bugs ** */ - string build_options_for_base_program( + string build_options_for_bake_program( const DeviceRequestedFeatures& requested_features) { return requested_features.get_build_options(); |