From 4ce9785e01587638ae26256fe23315e436c658ff Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 15 Feb 2019 08:18:38 +0100 Subject: Cycles: Support multithreaded compilation of kernels This patch implements a workaround to get the multithreaded compilation from D2231 working. So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function. Depends on D2231. Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97 Reviewed By: brecht Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli Differential Revision: https://developer.blender.org/D2264 --- intern/cycles/blender/CMakeLists.txt | 4 + intern/cycles/blender/blender_python.cpp | 30 ++++ intern/cycles/device/device_intern.h | 1 + intern/cycles/device/opencl/opencl.h | 20 ++- intern/cycles/device/opencl/opencl_base.cpp | 20 +-- intern/cycles/device/opencl/opencl_mega.cpp | 24 ++- intern/cycles/device/opencl/opencl_split.cpp | 171 +++++++++++++++++++-- intern/cycles/device/opencl/opencl_util.cpp | 140 +++++++++++++++-- intern/cycles/kernel/CMakeLists.txt | 1 + .../kernel/kernels/opencl/kernel_split_bundle.cl | 28 ++++ intern/cycles/util/util_system.cpp | 22 +++ intern/cycles/util/util_system.h | 4 + 12 files changed, 420 insertions(+), 45 deletions(-) create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl (limited to 'intern') diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt index 84e2690333e..f8720de366f 100644 --- a/intern/cycles/blender/CMakeLists.txt +++ b/intern/cycles/blender/CMakeLists.txt @@ -51,6 +51,10 @@ set(ADDON_FILES add_definitions(${GL_DEFINITIONS}) +if(WITH_CYCLES_DEVICE_OPENCL) + add_definitions(-DWITH_OPENCL) +endif() + if(WITH_CYCLES_NETWORK) add_definitions(-DWITH_NETWORK) endif() diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index de702337f98..a720a60c05b 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -40,6 +40,10 @@ #include #endif +#ifdef WITH_OPENCL +#include "device/device_intern.h" +#endif + CCL_NAMESPACE_BEGIN namespace { @@ -628,6 +632,31 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/) DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE; Py_RETURN_NONE; } + +static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args) +{ + PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence"); + if(sequence == NULL) { + Py_RETURN_FALSE; + } + + vector parameters; + for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) { + PyObject *item = PySequence_Fast_GET_ITEM(sequence, i); + PyObject *item_as_string = PyObject_Str(item); + const char *parameter_string = PyUnicode_AsUTF8(item_as_string); + parameters.push_back(parameter_string); + Py_DECREF(item_as_string); + } + Py_DECREF(sequence); + + if (device_opencl_compile_kernel(parameters)) { + Py_RETURN_TRUE; + } + else { + Py_RETURN_FALSE; + } +} #endif static bool denoise_parse_filepaths(PyObject *pyfilepaths, vector& filepaths) @@ -903,6 +932,7 @@ static PyMethodDef methods[] = { {"system_info", system_info_func, METH_NOARGS, ""}, #ifdef WITH_OPENCL {"opencl_disable", opencl_disable_func, METH_NOARGS, ""}, + {"opencl_compile", opencl_compile_func, METH_VARARGS, ""}, #endif /* Standalone denoising */ diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h index 0b26057c3ba..94df1e009eb 100644 --- a/intern/cycles/device/device_intern.h +++ b/intern/cycles/device/device_intern.h @@ -24,6 +24,7 @@ class Device; Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); bool device_opencl_init(); Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); +bool device_opencl_compile_kernel(const vector& parameters); bool device_cuda_init(); Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address); diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 9b763167459..a2c0e53b3e7 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -268,6 +268,7 @@ public: cl_platform_id cpPlatform; cl_device_id cdDevice; cl_int ciErr; + int device_num; class OpenCLProgram { public: @@ -293,7 +294,15 @@ public: private: bool build_kernel(const string *debug_src); + /* Build the program by calling the own process. + * This is required for multithreaded OpenCL compilation, since most Frameworks serialize + * build calls internally if they come from the same process. + * If that is not supported, this function just returns false. + */ + bool compile_separate(const string& clbin); + /* Build the program by calling OpenCL directly. */ bool compile_kernel(const string *debug_src); + /* Loading and saving the program from/to disk. */ bool load_binary(const string& clbin, const string *debug_src = NULL); bool save_binary(const string& clbin); @@ -342,12 +351,17 @@ public: bool opencl_version_check(); string device_md5_hash(string kernel_custom_build_options = ""); - bool load_kernels(const DeviceRequestedFeatures& requested_features); + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features); /* Has to be implemented by the real device classes. * The base device will then load all these programs. */ - virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, - vector &programs) = 0; + virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features, + vector &programs) = 0; + + /* Get the name of the opencl program for the given kernel */ + virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0; + /* Get the program file name to compile (*.cl) for the given kernel */ + virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0; void mem_alloc(device_memory& mem); void mem_copy_to(device_memory& mem); diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 4417065bb7f..d8f9a242ac8 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro } assert(info.num < usable_devices.size()); OpenCLPlatformDevice& platform_device = usable_devices[info.num]; + device_num = info.num; cpPlatform = platform_device.platform_id; cdDevice = platform_device.device_id; platform_name = platform_device.platform_name; @@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro texture_info.resize(1); memory_manager.alloc("texture_info", texture_info); - fprintf(stderr, "Device init success\n"); device_initialized = true; } @@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea programs.push_back(&base_program); programs.push_back(&denoising_program); /* Call actual class to fill the vector with its programs. */ - if(!load_kernels(requested_features, programs)) { + if(!add_kernel_programs(requested_features, programs)) { return false; } - /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks - * serialize the calls internally, so it's not much use right now. - * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor - * should be set to false as well. */ -#if 0 + /* Parallel compilation of Cycles kernels, this launches multiple + * processes to workaround OpenCL frameworks serializing the calls + * internally within a single process. */ TaskPool task_pool; foreach(OpenCLProgram *program, programs) { task_pool.push(function_bind(&OpenCLProgram::load, program)); @@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea return false; } } -#else - foreach(OpenCLProgram *program, programs) { - program->load(); - if(!program->is_loaded()) { - return false; - } - } -#endif return true; } diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index 0a7bf96fed7..c0b9e81d4d3 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -35,19 +35,35 @@ public: OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_) : OpenCLDeviceBase(info, stats, profiler, background_), - path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ") + path_trace_program(this, + get_opencl_program_name(false, "megakernel"), + get_opencl_program_filename(false, "megakernel"), + "-D__COMPILE_ONLY_MEGAKERNEL__ ") { } - virtual bool show_samples() const { + + virtual bool show_samples() const + { return true; } - virtual BVHLayoutMask get_bvh_layout_mask() const { + virtual BVHLayoutMask get_bvh_layout_mask() const + { return BVH_LAYOUT_BVH2; } - virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/, + const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name) + { + return kernel_name; + } + + const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/) + { + return "kernel.cl"; + } + + virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/, vector &programs) { path_trace_program.add_kernel(ustring("path_trace")); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 5a2555f9f80..b759f69d3ab 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -79,6 +79,27 @@ public: OpenCLProgram program_data_init; OpenCLProgram program_state_buffer_size; + OpenCLProgram program_split; + + OpenCLProgram program_path_init; + OpenCLProgram program_scene_intersect; + OpenCLProgram program_lamp_emission; + OpenCLProgram program_do_volume; + OpenCLProgram program_queue_enqueue; + OpenCLProgram program_indirect_background; + OpenCLProgram program_shader_setup; + OpenCLProgram program_shader_sort; + OpenCLProgram program_shader_eval; + OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; + OpenCLProgram program_subsurface_scatter; + OpenCLProgram program_direct_lighting; + OpenCLProgram program_shadow_blocked_ao; + OpenCLProgram program_shadow_blocked_dl; + OpenCLProgram program_enqueue_inactive; + OpenCLProgram program_next_iteration_setup; + OpenCLProgram program_indirect_subsurface; + OpenCLProgram program_buffer_update; + OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_); ~OpenCLDeviceSplitKernel() @@ -99,26 +120,150 @@ public: return BVH_LAYOUT_BVH2; } - virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features) + { + if (!OpenCLDeviceBase::load_kernels(requested_features)) { + return false; + } + return split_kernel->load_kernels(requested_features); + } + + const string fast_compiled_kernels = + "path_init " + "scene_intersect " + "queue_enqueue " + "shader_setup " + "shader_sort " + "enqueue_inactive " + "next_iteration_setup " + "indirect_subsurface " + "buffer_update"; + + const string get_opencl_program_name(bool single_program, const string& kernel_name) + { + if (single_program) { + return "split"; + } + else { + if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { + return "split_bundle"; + } + else { + return "split_" + kernel_name; + } + } + } + + const string get_opencl_program_filename(bool single_program, const string& kernel_name) + { + if (single_program) { + return "kernel_split.cl"; + } + else { + if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { + return "kernel_split_bundle.cl"; + } + else { + return "kernel_" + kernel_name + ".cl"; + } + } + } + + virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features, vector &programs) { bool single_program = OpenCLInfo::use_single_program(); - program_data_init = OpenCLDeviceBase::OpenCLProgram(this, - single_program ? "split" : "split_data_init", - single_program ? "kernel_split.cl" : "kernel_data_init.cl", - get_build_options(this, requested_features)); - + program_data_init = OpenCLDeviceBase::OpenCLProgram( + this, + get_opencl_program_name(single_program, "data_init"), + get_opencl_program_filename(single_program, "data_init"), + get_build_options(this, requested_features)); program_data_init.add_kernel(ustring("path_trace_data_init")); programs.push_back(&program_data_init); - program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this, - single_program ? "split" : "split_state_buffer_size", - single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl", - get_build_options(this, requested_features)); + program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram( + this, + get_opencl_program_name(single_program, "state_buffer_size"), + get_opencl_program_filename(single_program, "state_buffer_size"), + get_build_options(this, requested_features)); + program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size")); programs.push_back(&program_state_buffer_size); - return split_kernel->load_kernels(requested_features); + +#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); +#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \ + program_##kernel_name = \ + OpenCLDeviceBase::OpenCLProgram(this, \ + "split_"#kernel_name, \ + "kernel_"#kernel_name".cl", \ + get_build_options(this, requested_features)); \ + program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ + programs.push_back(&program_##kernel_name); + + if (single_program) { + program_split = OpenCLDeviceBase::OpenCLProgram( + this, + "split" , + "kernel_split.cl", + get_build_options(this, requested_features)); + + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); + + programs.push_back(&program_split); + } + else { + /* Ordered with most complex kernels first, to reduce overall compile time. */ + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval); + + /* Quick kernels bundled in a single program to reduce overhead of starting + * Blender processes. */ + program_split = OpenCLDeviceBase::OpenCLProgram( + this, + "split_bundle" , + "kernel_split_bundle.cl", + get_build_options(this, requested_features)); + + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); + programs.push_back(&program_split); + } +#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM +#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM + + return true; } void thread_run(DeviceTask *task) @@ -281,8 +426,8 @@ public: bool single_program = OpenCLInfo::use_single_program(); kernel->program = OpenCLDeviceBase::OpenCLProgram(device, - single_program ? "split" : "split_" + kernel_name, - single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl", + device->get_opencl_program_name(single_program, kernel_name), + device->get_opencl_program_filename(single_program, kernel_name), get_build_options(device, requested_features)); kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index f43aa5f350a..fe5ba4886a9 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -17,12 +17,14 @@ #ifdef WITH_OPENCL #include "device/opencl/opencl.h" +#include "device/device_intern.h" #include "util/util_debug.h" #include "util/util_logging.h" #include "util/util_md5.h" #include "util/util_path.h" #include "util/util_time.h" +#include "util/util_system.h" using std::cerr; using std::endl; @@ -369,17 +371,119 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src) } double starttime = time_dt(); - add_log(string("Compiling OpenCL program ") + program_name.c_str(), false); + add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); add_log(string("Build flags: ") + kernel_build_options, true); if(!build_kernel(debug_src)) return false; - add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false); + double elapsed = time_dt() - starttime; + add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false); return true; } +bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin) +{ + vector args; + args.push_back("--background"); + args.push_back("--factory-startup"); + args.push_back("--python-expr"); + + args.push_back( + string_printf( + "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')", + (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false", + device->device_num, + device->device_name.c_str(), + device->platform_name.c_str(), + (device->kernel_build_options(NULL) + kernel_build_options).c_str(), + kernel_file.c_str(), + clbin.c_str())); + + double starttime = time_dt(); + add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); + add_log(string("Build flags: ") + kernel_build_options, true); + if(!system_call_self(args) || !path_exists(clbin)) { + return false; + } + + double elapsed = time_dt() - starttime; + add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false); + + return load_binary(clbin); +} + +/* Compile opencl kernel. This method is called from the _cycles Python + * module compile kernels. Parameters must match function above. */ +bool device_opencl_compile_kernel(const vector& parameters) +{ + bool force_all_platforms = parameters[0] == "true"; + int device_platform_id = std::stoi(parameters[1]); + const string& device_name = parameters[2]; + const string& platform_name = parameters[3]; + const string& build_options = parameters[4]; + const string& kernel_file = parameters[5]; + const string& binary_path = parameters[6]; + + if(clewInit() != CLEW_SUCCESS) { + return false; + } + + vector usable_devices; + OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms); + if(device_platform_id >= usable_devices.size()) { + return false; + } + + OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id]; + if(platform_device.platform_name != platform_name || + platform_device.device_name != device_name) + { + return false; + } + + cl_platform_id platform = platform_device.platform_id; + cl_device_id device = platform_device.device_id; + const cl_context_properties context_props[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties) platform, + 0, 0 + }; + + cl_int err; + cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err); + if(err != CL_SUCCESS) { + return false; + } + + string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n"; + source = path_source_replace_includes(source, path_get("source")); + size_t source_len = source.size(); + const char *source_str = source.c_str(); + cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err); + bool result = false; + + if(err == CL_SUCCESS) { + err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL); + + if(err == CL_SUCCESS) { + size_t size = 0; + clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); + if(size > 0) { + vector binary(size); + uint8_t *bytes = &binary[0]; + clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL); + result = path_write_binary(binary_path, binary); + } + } + clReleaseProgram(program); + } + + clReleaseContext(context); + + return result; +} + bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin, const string *debug_src) { @@ -467,15 +571,31 @@ void OpenCLDeviceBase::OpenCLProgram::load() } else { add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true); - - /* If does not exist or loading binary failed, compile kernel. */ - if(!compile_kernel(debug_src)) { - return; + if(!path_exists(clbin)) { + if(compile_separate(clbin)) { + add_log(string("Built and loaded program from ") + clbin + ".", true); + loaded = true; + } + else { + add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true); + + /* If does not exist or loading binary failed, compile kernel. */ + if(!compile_kernel(debug_src)) { + return; + } + + /* Save binary for reuse. */ + if(!save_binary(clbin)) { + add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true); + } + } } - - /* Save binary for reuse. */ - if(!save_binary(clbin)) { - add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true); + else { + add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true); + /* Fall back to compiling. */ + if(!compile_kernel(debug_src)) { + return; + } } } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 163aacf19f9..f7041ee2783 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -39,6 +39,7 @@ set(SRC_OPENCL_KERNELS kernels/opencl/kernel.cl kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_split.cl + kernels/opencl/kernel_split_bundle.cl kernels/opencl/kernel_data_init.cl kernels/opencl/kernel_path_init.cl kernels/opencl/kernel_queue_enqueue.cl diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl new file mode 100644 index 00000000000..71ea68382b4 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl @@ -0,0 +1,28 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" // PRECOMPILED +#include "kernel/split/kernel_split_common.h" // PRECOMPILED + +#include "kernel/kernels/opencl/kernel_path_init.cl" +#include "kernel/kernels/opencl/kernel_scene_intersect.cl" +#include "kernel/kernels/opencl/kernel_queue_enqueue.cl" +#include "kernel/kernels/opencl/kernel_shader_setup.cl" +#include "kernel/kernels/opencl/kernel_shader_sort.cl" +#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl" +#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" +#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" +#include "kernel/kernels/opencl/kernel_buffer_update.cl" diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index fc6db1f6662..a79829a3dd9 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -22,6 +22,9 @@ #include +#include +OIIO_NAMESPACE_USING + #ifdef _WIN32 # if(!defined(FREE_WINDOWS)) # include @@ -329,6 +332,25 @@ bool system_cpu_support_avx2() #endif +bool system_call_self(const vector& args) +{ + /* Escape program and arguments in case they contain spaces. */ + string cmd = "\"" + Sysutil::this_program_path() + "\""; + + for(int i = 0; i < args.size(); i++) { + cmd += " \"" + args[i] + "\""; + } + + /* Quiet output. */ +#ifdef _WIN32 + cmd += " > nul"; +#else + cmd += " > /dev/null"; +#endif + + return (system(cmd.c_str()) == 0); +} + size_t system_physical_ram() { #ifdef _WIN32 diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index 1e7cf1d9f2a..2590b31a59d 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -18,6 +18,7 @@ #define __UTIL_SYSTEM_H__ #include "util/util_string.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -61,6 +62,9 @@ bool system_cpu_support_avx2(); size_t system_physical_ram(); +/* Start a new process of the current application with the given arguments. */ +bool system_call_self(const vector& args); + CCL_NAMESPACE_END #endif /* __UTIL_SYSTEM_H__ */ -- cgit v1.2.3