Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2019-02-15 10:18:38 +0300
committerJeroen Bakker <j.bakker@atmind.nl>2019-02-15 10:56:20 +0300
commit9800837b987930e6152c2dc27cae5bd55873d306 (patch)
tree5a70a6bbf34c9141a6912675db86a166ab2ab420 /intern/cycles/device/opencl/opencl_split.cpp
parentde0e456a6c7d6da065d275104bc2022b69874648 (diff)
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working. So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function. Depends on D2231. Patch by lukasstockner97, jbakker, brecht job | scene_name | compilation_time ----------+-----------------+------------------ Baseline | empty | 22.73 D2264 | empty | 13.94 Baseline | bmw | 56.44 D2264 | bmw | 41.32 Baseline | fishycat | 59.50 D2264 | fishycat | 45.19 Baseline | barbershop | 212.28 D2264 | barbershop | 169.81 Baseline | victor | 67.51 D2264 | victor | 53.60 Baseline | classroom | 51.46 D2264 | classroom | 39.02 Baseline | koro | 62.48 D2264 | koro | 49.03 Baseline | pavillion | 54.37 D2264 | pavillion | 38.82 Baseline | splash279 | 47.43 D2264 | splash279 | 37.94 Baseline | volume_emission | 145.22 D2264 | volume_emission | 121.10 This patch reduced compilation time as the split kernels and base kernels are compiled in parallel. In cycles debug mode (256) you can set unmark the opencl single program file, what reduces the compilation time even further (bmw 17 seconds, barbershop 53 seconds). Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97 Reviewed By: brecht Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli Differential Revision: https://developer.blender.org/D2264
Diffstat (limited to 'intern/cycles/device/opencl/opencl_split.cpp')
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp171
1 files changed, 158 insertions, 13 deletions
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 5a2555f9f80..b759f69d3ab 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -79,6 +79,27 @@ public:
OpenCLProgram program_data_init;
OpenCLProgram program_state_buffer_size;
+ OpenCLProgram program_split;
+
+ OpenCLProgram program_path_init;
+ OpenCLProgram program_scene_intersect;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_queue_enqueue;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_setup;
+ OpenCLProgram program_shader_sort;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+ OpenCLProgram program_enqueue_inactive;
+ OpenCLProgram program_next_iteration_setup;
+ OpenCLProgram program_indirect_subsurface;
+ OpenCLProgram program_buffer_update;
+
OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features)
+ {
+ if (!OpenCLDeviceBase::load_kernels(requested_features)) {
+ return false;
+ }
+ return split_kernel->load_kernels(requested_features);
+ }
+
+ const string fast_compiled_kernels =
+ "path_init "
+ "scene_intersect "
+ "queue_enqueue "
+ "shader_setup "
+ "shader_sort "
+ "enqueue_inactive "
+ "next_iteration_setup "
+ "indirect_subsurface "
+ "buffer_update";
+
+ const string get_opencl_program_name(bool single_program, const string& kernel_name)
+ {
+ if (single_program) {
+ return "split";
+ }
+ else {
+ if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ return "split_bundle";
+ }
+ else {
+ return "split_" + kernel_name;
+ }
+ }
+ }
+
+ const string get_opencl_program_filename(bool single_program, const string& kernel_name)
+ {
+ if (single_program) {
+ return "kernel_split.cl";
+ }
+ else {
+ if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ return "kernel_split_bundle.cl";
+ }
+ else {
+ return "kernel_" + kernel_name + ".cl";
+ }
+ }
+ }
+
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
{
bool single_program = OpenCLInfo::use_single_program();
- program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
- single_program ? "split" : "split_data_init",
- single_program ? "kernel_split.cl" : "kernel_data_init.cl",
- get_build_options(this, requested_features));
-
+ program_data_init = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ get_opencl_program_name(single_program, "data_init"),
+ get_opencl_program_filename(single_program, "data_init"),
+ get_build_options(this, requested_features));
program_data_init.add_kernel(ustring("path_trace_data_init"));
programs.push_back(&program_data_init);
- program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
- single_program ? "split" : "split_state_buffer_size",
- single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl",
- get_build_options(this, requested_features));
+ program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ get_opencl_program_name(single_program, "state_buffer_size"),
+ get_opencl_program_filename(single_program, "state_buffer_size"),
+ get_build_options(this, requested_features));
+
program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
programs.push_back(&program_state_buffer_size);
- return split_kernel->load_kernels(requested_features);
+
+#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \
+ program_##kernel_name = \
+ OpenCLDeviceBase::OpenCLProgram(this, \
+ "split_"#kernel_name, \
+ "kernel_"#kernel_name".cl", \
+ get_build_options(this, requested_features)); \
+ program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+ programs.push_back(&program_##kernel_name);
+
+ if (single_program) {
+ program_split = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ "split" ,
+ "kernel_split.cl",
+ get_build_options(this, requested_features));
+
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+
+ programs.push_back(&program_split);
+ }
+ else {
+ /* Ordered with most complex kernels first, to reduce overall compile time. */
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval);
+
+ /* Quick kernels bundled in a single program to reduce overhead of starting
+ * Blender processes. */
+ program_split = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ "split_bundle" ,
+ "kernel_split_bundle.cl",
+ get_build_options(this, requested_features));
+
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+ programs.push_back(&program_split);
+ }
+#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM
+#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM
+
+ return true;
}
void thread_run(DeviceTask *task)
@@ -281,8 +426,8 @@ public:
bool single_program = OpenCLInfo::use_single_program();
kernel->program =
OpenCLDeviceBase::OpenCLProgram(device,
- single_program ? "split" : "split_" + kernel_name,
- single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl",
+ device->get_opencl_program_name(single_program, kernel_name),
+ device->get_opencl_program_filename(single_program, kernel_name),
get_build_options(device, requested_features));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));