diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2019-02-15 10:18:38 +0300 |
---|---|---|
committer | Jeroen Bakker <j.bakker@atmind.nl> | 2019-02-15 10:56:20 +0300 |
commit | 9800837b987930e6152c2dc27cae5bd55873d306 (patch) | |
tree | 5a70a6bbf34c9141a6912675db86a166ab2ab420 /intern/cycles/device/opencl/opencl_split.cpp | |
parent | de0e456a6c7d6da065d275104bc2022b69874648 (diff) |
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
Diffstat (limited to 'intern/cycles/device/opencl/opencl_split.cpp')
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 171 |
1 files changed, 158 insertions, 13 deletions
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 5a2555f9f80..b759f69d3ab 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -79,6 +79,27 @@ public: OpenCLProgram program_data_init; OpenCLProgram program_state_buffer_size; + OpenCLProgram program_split; + + OpenCLProgram program_path_init; + OpenCLProgram program_scene_intersect; + OpenCLProgram program_lamp_emission; + OpenCLProgram program_do_volume; + OpenCLProgram program_queue_enqueue; + OpenCLProgram program_indirect_background; + OpenCLProgram program_shader_setup; + OpenCLProgram program_shader_sort; + OpenCLProgram program_shader_eval; + OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; + OpenCLProgram program_subsurface_scatter; + OpenCLProgram program_direct_lighting; + OpenCLProgram program_shadow_blocked_ao; + OpenCLProgram program_shadow_blocked_dl; + OpenCLProgram program_enqueue_inactive; + OpenCLProgram program_next_iteration_setup; + OpenCLProgram program_indirect_subsurface; + OpenCLProgram program_buffer_update; + OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_); ~OpenCLDeviceSplitKernel() @@ -99,26 +120,150 @@ public: return BVH_LAYOUT_BVH2; } - virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features) + { + if (!OpenCLDeviceBase::load_kernels(requested_features)) { + return false; + } + return split_kernel->load_kernels(requested_features); + } + + const string fast_compiled_kernels = + "path_init " + "scene_intersect " + "queue_enqueue " + "shader_setup " + "shader_sort " + "enqueue_inactive " + "next_iteration_setup " + "indirect_subsurface " + "buffer_update"; + + const string get_opencl_program_name(bool single_program, const string& kernel_name) + { + if (single_program) { + return "split"; + } + else { + if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { + return "split_bundle"; + } + else { + return "split_" + kernel_name; + } + } + } + + const string get_opencl_program_filename(bool single_program, const string& kernel_name) + { + if (single_program) { + return "kernel_split.cl"; + } + else { + if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { + return "kernel_split_bundle.cl"; + } + else { + return "kernel_" + kernel_name + ".cl"; + } + } + } + + virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features, vector<OpenCLDeviceBase::OpenCLProgram*> &programs) { bool single_program = OpenCLInfo::use_single_program(); - program_data_init = OpenCLDeviceBase::OpenCLProgram(this, - single_program ? "split" : "split_data_init", - single_program ? "kernel_split.cl" : "kernel_data_init.cl", - get_build_options(this, requested_features)); - + program_data_init = OpenCLDeviceBase::OpenCLProgram( + this, + get_opencl_program_name(single_program, "data_init"), + get_opencl_program_filename(single_program, "data_init"), + get_build_options(this, requested_features)); program_data_init.add_kernel(ustring("path_trace_data_init")); programs.push_back(&program_data_init); - program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this, - single_program ? "split" : "split_state_buffer_size", - single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl", - get_build_options(this, requested_features)); + program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram( + this, + get_opencl_program_name(single_program, "state_buffer_size"), + get_opencl_program_filename(single_program, "state_buffer_size"), + get_build_options(this, requested_features)); + program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size")); programs.push_back(&program_state_buffer_size); - return split_kernel->load_kernels(requested_features); + +#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); +#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \ + program_##kernel_name = \ + OpenCLDeviceBase::OpenCLProgram(this, \ + "split_"#kernel_name, \ + "kernel_"#kernel_name".cl", \ + get_build_options(this, requested_features)); \ + program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ + programs.push_back(&program_##kernel_name); + + if (single_program) { + program_split = OpenCLDeviceBase::OpenCLProgram( + this, + "split" , + "kernel_split.cl", + get_build_options(this, requested_features)); + + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); + + programs.push_back(&program_split); + } + else { + /* Ordered with most complex kernels first, to reduce overall compile time. */ + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval); + + /* Quick kernels bundled in a single program to reduce overhead of starting + * Blender processes. */ + program_split = OpenCLDeviceBase::OpenCLProgram( + this, + "split_bundle" , + "kernel_split_bundle.cl", + get_build_options(this, requested_features)); + + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); + programs.push_back(&program_split); + } +#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM +#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM + + return true; } void thread_run(DeviceTask *task) @@ -281,8 +426,8 @@ public: bool single_program = OpenCLInfo::use_single_program(); kernel->program = OpenCLDeviceBase::OpenCLProgram(device, - single_program ? "split" : "split_" + kernel_name, - single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl", + device->get_opencl_program_name(single_program, kernel_name), + device->get_opencl_program_filename(single_program, kernel_name), get_build_options(device, requested_features)); kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); |