From 15edda3a8e07003bef695cca939744bbea80ad18 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 19 Feb 2019 16:31:31 +0100 Subject: T61463: Separate Baking kernels Cycles OpenCL: Split baking kernels in own program Fix T61463. Before this patch baking was part of the base kernels. There are 3 baking kernels that and all 3 uses shader evaluation. Only for one of these kernels the functionality was wrapped in the __NO_BAKING__ compile directive. When you start baking this leads to long compile times. By separating in individual programs will reduce the compile times. Also wrapped all baking kernels with __NO_BAKING__ to reduce the compilation times. Impact on compilation time job | scene_name | previous | new | percentage --------+-----------------+----------+-------+------------ T61463 | empty | 10.63 | 7.27 | 32% T61463 | bmw | 17.91 | 14.24 | 20% T61463 | fishycat | 19.57 | 15.08 | 23% T61463 | barbershop | 54.10 | 48.18 | 11% T61463 | classroom | 17.55 | 14.42 | 18% T61463 | koro | 18.92 | 17.15 | 9% T61463 | pavillion | 17.43 | 14.23 | 18% T61463 | splash279 | 16.48 | 15.33 | 7% T61463 | volume_emission | 36.22 | 34.19 | 6% Impact on render time job | scene_name | previous | new | percentage --------+-----------------+----------+---------+------------ T61463 | empty | 21.06 | 20.54 | 2% T61463 | bmw | 198.44 | 189.59 | 4% T61463 | fishycat | 394.20 | 388.50 | 1% T61463 | barbershop | 1188.16 | 1185.49 | 0% T61463 | classroom | 341.08 | 339.27 | 1% T61463 | koro | 472.43 | 360.70 | 24% T61463 | pavillion | 905.77 | 902.14 | 0% T61463 | splash279 | 55.26 | 54.92 | 1% T61463 | volume_emission | 62.59 | 39.09 | 38% I don't have a grounded explanation why koro and volume_emission is this much faster; I have done several tests though... Maniphest Tasks: T61463 Differential Revision: https://developer.blender.org/D4376 --- intern/cycles/device/opencl/opencl.h | 8 ++- intern/cycles/device/opencl/opencl_base.cpp | 32 +++++++--- intern/cycles/device/opencl/opencl_split.cpp | 2 +- intern/cycles/kernel/CMakeLists.txt | 3 + intern/cycles/kernel/kernels/opencl/kernel.cl | 72 ---------------------- .../kernel/kernels/opencl/kernel_background.cl | 39 ++++++++++++ intern/cycles/kernel/kernels/opencl/kernel_bake.cl | 38 ++++++++++++ .../kernel/kernels/opencl/kernel_displace.cl | 40 ++++++++++++ 8 files changed, 149 insertions(+), 85 deletions(-) create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_background.cl create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_bake.cl create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_displace.cl (limited to 'intern') diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index a2c0e53b3e7..766b9e4bf1a 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -325,7 +325,11 @@ public: map kernels; }; - OpenCLProgram base_program, denoising_program; + OpenCLProgram base_program; + OpenCLProgram bake_program; + OpenCLProgram displace_program; + OpenCLProgram background_program; + OpenCLProgram denoising_program; typedef map*> ConstMemMap; typedef map MemMap; @@ -571,7 +575,7 @@ protected: ustring key, thread_scoped_lock& cache_locker); - virtual string build_options_for_base_program( + virtual string build_options_for_bake_program( const DeviceRequestedFeatures& /*requested_features*/); private: diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index d8f9a242ac8..6a47a60e915 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase() } base_program.release(); + bake_program.release(); + displace_program.release(); + background_program.release(); if(cqCommandQueue) clReleaseCommandQueue(cqCommandQueue); if(cxContext) @@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea if(!opencl_version_check()) return false; - base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features)); + base_program = OpenCLProgram(this, "base", "kernel.cl", ""); base_program.add_kernel(ustring("convert_to_byte")); base_program.add_kernel(ustring("convert_to_half_float")); - base_program.add_kernel(ustring("displace")); - base_program.add_kernel(ustring("background")); - base_program.add_kernel(ustring("bake")); base_program.add_kernel(ustring("zero_buffer")); + bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features)); + bake_program.add_kernel(ustring("bake")); + + displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features)); + displace_program.add_kernel(ustring("displace")); + + background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features)); + background_program.add_kernel(ustring("background")); + denoising_program = OpenCLProgram(this, "denoising", "filter.cl", ""); denoising_program.add_kernel(ustring("filter_divide_shadow")); denoising_program.add_kernel(ustring("filter_get_feature")); @@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea denoising_program.add_kernel(ustring("filter_finalize")); vector programs; - programs.push_back(&base_program); - programs.push_back(&denoising_program); + programs.push_back(&bake_program); + programs.push_back(&displace_program); + programs.push_back(&background_program); /* Call actual class to fill the vector with its programs. */ if(!add_kernel_programs(requested_features, programs)) { return false; } + programs.push_back(&base_program); + programs.push_back(&denoising_program); /* Parallel compilation of Cycles kernels, this launches multiple * processes to workaround OpenCL frameworks serializing the calls @@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task) cl_kernel kernel; if(task.shader_eval_type >= SHADER_EVAL_BAKE) { - kernel = base_program(ustring("bake")); + kernel = bake_program(ustring("bake")); } else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) { - kernel = base_program(ustring("displace")); + kernel = displace_program(ustring("displace")); } else { - kernel = base_program(ustring("background")); + kernel = background_program(ustring("background")); } cl_uint start_arg_index = @@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel( cache_locker); } -string OpenCLDeviceBase::build_options_for_base_program( +string OpenCLDeviceBase::build_options_for_bake_program( const DeviceRequestedFeatures& requested_features) { /* TODO(sergey): By default we compile all features, meaning diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index b759f69d3ab..c9d3eb2eb8c 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -327,7 +327,7 @@ public: protected: /* ** Those guys are for workign around some compiler-specific bugs ** */ - string build_options_for_base_program( + string build_options_for_bake_program( const DeviceRequestedFeatures& requested_features) { return requested_features.get_build_options(); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f7041ee2783..0a2acd3f669 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -37,6 +37,9 @@ set(SRC_CUDA_KERNELS set(SRC_OPENCL_KERNELS kernels/opencl/kernel.cl + kernels/opencl/kernel_bake.cl + kernels/opencl/kernel_displace.cl + kernels/opencl/kernel_background.cl kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_split.cl kernels/opencl/kernel_split_bundle.cl diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index de1f5088629..aa837e2ae87 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -81,78 +81,6 @@ __kernel void kernel_ocl_path_trace( #else /* __COMPILE_ONLY_MEGAKERNEL__ */ -__kernel void kernel_ocl_displace( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { - kernel_displace_evaluate(kg, input, output, x); - } -} -__kernel void kernel_ocl_background( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { - kernel_background_evaluate(kg, input, output, x); - } -} - -__kernel void kernel_ocl_bake( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int filter, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { -#ifdef __NO_BAKING__ - output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); -#else - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); -#endif - } -} - __kernel void kernel_ocl_convert_to_byte( ccl_constant KernelData *data, ccl_global uchar4 *rgba, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_background.cl new file mode 100644 index 00000000000..c7c709c0ad7 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_background.cl @@ -0,0 +1,39 @@ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_color.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" + +#include "kernel/kernel_path.h" +#include "kernel/kernel_path_branched.h" + +#include "kernel/kernel_bake.h" + +__kernel void kernel_ocl_background( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_background_evaluate(kg, input, output, x); +#endif + } +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl new file mode 100644 index 00000000000..041312b53cb --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl @@ -0,0 +1,38 @@ +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_color.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" + +#include "kernel/kernel_path.h" +#include "kernel/kernel_path_branched.h" + +#include "kernel/kernel_bake.h" + +__kernel void kernel_ocl_bake( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int filter, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); +#endif + } +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl new file mode 100644 index 00000000000..288bfd5eadc --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl @@ -0,0 +1,40 @@ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_color.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" + +#include "kernel/kernel_path.h" +#include "kernel/kernel_path_branched.h" + +#include "kernel/kernel_bake.h + +__kernel void kernel_ocl_displace( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_displace_evaluate(kg, input, output, x); +#endif + } +} + -- cgit v1.2.3