Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2019-02-15 10:18:38 +0300
committerJeroen Bakker <j.bakker@atmind.nl>2019-02-15 10:49:25 +0300
commit4ce9785e01587638ae26256fe23315e436c658ff (patch)
tree338144aec436dc68a6fdd9240786988dd3f8705e /intern
parentd17a0270344101c4cd6e5026ea8239240868d24f (diff)
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working. So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function. Depends on D2231. Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97 Reviewed By: brecht Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli Differential Revision: https://developer.blender.org/D2264
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/CMakeLists.txt4
-rw-r--r--intern/cycles/blender/blender_python.cpp30
-rw-r--r--intern/cycles/device/device_intern.h1
-rw-r--r--intern/cycles/device/opencl/opencl.h20
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp20
-rw-r--r--intern/cycles/device/opencl/opencl_mega.cpp24
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp171
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp140
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl28
-rw-r--r--intern/cycles/util/util_system.cpp22
-rw-r--r--intern/cycles/util/util_system.h4
12 files changed, 420 insertions, 45 deletions
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 84e2690333e..f8720de366f 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -51,6 +51,10 @@ set(ADDON_FILES
add_definitions(${GL_DEFINITIONS})
+if(WITH_CYCLES_DEVICE_OPENCL)
+ add_definitions(-DWITH_OPENCL)
+endif()
+
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index de702337f98..a720a60c05b 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -40,6 +40,10 @@
#include <OSL/oslconfig.h>
#endif
+#ifdef WITH_OPENCL
+#include "device/device_intern.h"
+#endif
+
CCL_NAMESPACE_BEGIN
namespace {
@@ -628,6 +632,31 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
Py_RETURN_NONE;
}
+
+static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+ PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
+ if(sequence == NULL) {
+ Py_RETURN_FALSE;
+ }
+
+ vector<string> parameters;
+ for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+ PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+ PyObject *item_as_string = PyObject_Str(item);
+ const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
+ parameters.push_back(parameter_string);
+ Py_DECREF(item_as_string);
+ }
+ Py_DECREF(sequence);
+
+ if (device_opencl_compile_kernel(parameters)) {
+ Py_RETURN_TRUE;
+ }
+ else {
+ Py_RETURN_FALSE;
+ }
+}
#endif
static bool denoise_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepaths)
@@ -903,6 +932,7 @@ static PyMethodDef methods[] = {
{"system_info", system_info_func, METH_NOARGS, ""},
#ifdef WITH_OPENCL
{"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
+ {"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
#endif
/* Standalone denoising */
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 0b26057c3ba..94df1e009eb 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -24,6 +24,7 @@ class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string>& parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 9b763167459..a2c0e53b3e7 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -268,6 +268,7 @@ public:
cl_platform_id cpPlatform;
cl_device_id cdDevice;
cl_int ciErr;
+ int device_num;
class OpenCLProgram {
public:
@@ -293,7 +294,15 @@ public:
private:
bool build_kernel(const string *debug_src);
+ /* Build the program by calling the own process.
+ * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+ * build calls internally if they come from the same process.
+ * If that is not supported, this function just returns false.
+ */
+ bool compile_separate(const string& clbin);
+ /* Build the program by calling OpenCL directly. */
bool compile_kernel(const string *debug_src);
+ /* Loading and saving the program from/to disk. */
bool load_binary(const string& clbin, const string *debug_src = NULL);
bool save_binary(const string& clbin);
@@ -342,12 +351,17 @@ public:
bool opencl_version_check();
string device_md5_hash(string kernel_custom_build_options = "");
- bool load_kernels(const DeviceRequestedFeatures& requested_features);
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features);
/* Has to be implemented by the real device classes.
* The base device will then load all these programs. */
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
- vector<OpenCLProgram*> &programs) = 0;
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
+ vector<OpenCLProgram*> &programs) = 0;
+
+ /* Get the name of the opencl program for the given kernel */
+ virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0;
+ /* Get the program file name to compile (*.cl) for the given kernel */
+ virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0;
void mem_alloc(device_memory& mem);
void mem_copy_to(device_memory& mem);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 4417065bb7f..d8f9a242ac8 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
}
assert(info.num < usable_devices.size());
OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+ device_num = info.num;
cpPlatform = platform_device.platform_id;
cdDevice = platform_device.device_id;
platform_name = platform_device.platform_name;
@@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
texture_info.resize(1);
memory_manager.alloc("texture_info", texture_info);
- fprintf(stderr, "Device init success\n");
device_initialized = true;
}
@@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
programs.push_back(&base_program);
programs.push_back(&denoising_program);
/* Call actual class to fill the vector with its programs. */
- if(!load_kernels(requested_features, programs)) {
+ if(!add_kernel_programs(requested_features, programs)) {
return false;
}
- /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
- * serialize the calls internally, so it's not much use right now.
- * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
- * should be set to false as well. */
-#if 0
+ /* Parallel compilation of Cycles kernels, this launches multiple
+ * processes to workaround OpenCL frameworks serializing the calls
+ * internally within a single process. */
TaskPool task_pool;
foreach(OpenCLProgram *program, programs) {
task_pool.push(function_bind(&OpenCLProgram::load, program));
@@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
return false;
}
}
-#else
- foreach(OpenCLProgram *program, programs) {
- program->load();
- if(!program->is_loaded()) {
- return false;
- }
- }
-#endif
return true;
}
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 0a7bf96fed7..c0b9e81d4d3 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -35,19 +35,35 @@ public:
OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
: OpenCLDeviceBase(info, stats, profiler, background_),
- path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+ path_trace_program(this,
+ get_opencl_program_name(false, "megakernel"),
+ get_opencl_program_filename(false, "megakernel"),
+ "-D__COMPILE_ONLY_MEGAKERNEL__ ")
{
}
- virtual bool show_samples() const {
+
+ virtual bool show_samples() const
+ {
return true;
}
- virtual BVHLayoutMask get_bvh_layout_mask() const {
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+ const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
+ {
+ return kernel_name;
+ }
+
+ const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
+ {
+ return "kernel.cl";
+ }
+
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
path_trace_program.add_kernel(ustring("path_trace"));
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 5a2555f9f80..b759f69d3ab 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -79,6 +79,27 @@ public:
OpenCLProgram program_data_init;
OpenCLProgram program_state_buffer_size;
+ OpenCLProgram program_split;
+
+ OpenCLProgram program_path_init;
+ OpenCLProgram program_scene_intersect;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_queue_enqueue;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_setup;
+ OpenCLProgram program_shader_sort;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+ OpenCLProgram program_enqueue_inactive;
+ OpenCLProgram program_next_iteration_setup;
+ OpenCLProgram program_indirect_subsurface;
+ OpenCLProgram program_buffer_update;
+
OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features)
+ {
+ if (!OpenCLDeviceBase::load_kernels(requested_features)) {
+ return false;
+ }
+ return split_kernel->load_kernels(requested_features);
+ }
+
+ const string fast_compiled_kernels =
+ "path_init "
+ "scene_intersect "
+ "queue_enqueue "
+ "shader_setup "
+ "shader_sort "
+ "enqueue_inactive "
+ "next_iteration_setup "
+ "indirect_subsurface "
+ "buffer_update";
+
+ const string get_opencl_program_name(bool single_program, const string& kernel_name)
+ {
+ if (single_program) {
+ return "split";
+ }
+ else {
+ if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ return "split_bundle";
+ }
+ else {
+ return "split_" + kernel_name;
+ }
+ }
+ }
+
+ const string get_opencl_program_filename(bool single_program, const string& kernel_name)
+ {
+ if (single_program) {
+ return "kernel_split.cl";
+ }
+ else {
+ if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ return "kernel_split_bundle.cl";
+ }
+ else {
+ return "kernel_" + kernel_name + ".cl";
+ }
+ }
+ }
+
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
{
bool single_program = OpenCLInfo::use_single_program();
- program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
- single_program ? "split" : "split_data_init",
- single_program ? "kernel_split.cl" : "kernel_data_init.cl",
- get_build_options(this, requested_features));
-
+ program_data_init = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ get_opencl_program_name(single_program, "data_init"),
+ get_opencl_program_filename(single_program, "data_init"),
+ get_build_options(this, requested_features));
program_data_init.add_kernel(ustring("path_trace_data_init"));
programs.push_back(&program_data_init);
- program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
- single_program ? "split" : "split_state_buffer_size",
- single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl",
- get_build_options(this, requested_features));
+ program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ get_opencl_program_name(single_program, "state_buffer_size"),
+ get_opencl_program_filename(single_program, "state_buffer_size"),
+ get_build_options(this, requested_features));
+
program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
programs.push_back(&program_state_buffer_size);
- return split_kernel->load_kernels(requested_features);
+
+#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \
+ program_##kernel_name = \
+ OpenCLDeviceBase::OpenCLProgram(this, \
+ "split_"#kernel_name, \
+ "kernel_"#kernel_name".cl", \
+ get_build_options(this, requested_features)); \
+ program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+ programs.push_back(&program_##kernel_name);
+
+ if (single_program) {
+ program_split = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ "split" ,
+ "kernel_split.cl",
+ get_build_options(this, requested_features));
+
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+
+ programs.push_back(&program_split);
+ }
+ else {
+ /* Ordered with most complex kernels first, to reduce overall compile time. */
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval);
+
+ /* Quick kernels bundled in a single program to reduce overhead of starting
+ * Blender processes. */
+ program_split = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ "split_bundle" ,
+ "kernel_split_bundle.cl",
+ get_build_options(this, requested_features));
+
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+ programs.push_back(&program_split);
+ }
+#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM
+#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM
+
+ return true;
}
void thread_run(DeviceTask *task)
@@ -281,8 +426,8 @@ public:
bool single_program = OpenCLInfo::use_single_program();
kernel->program =
OpenCLDeviceBase::OpenCLProgram(device,
- single_program ? "split" : "split_" + kernel_name,
- single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl",
+ device->get_opencl_program_name(single_program, kernel_name),
+ device->get_opencl_program_filename(single_program, kernel_name),
get_build_options(device, requested_features));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index f43aa5f350a..fe5ba4886a9 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -17,12 +17,14 @@
#ifdef WITH_OPENCL
#include "device/opencl/opencl.h"
+#include "device/device_intern.h"
#include "util/util_debug.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_path.h"
#include "util/util_time.h"
+#include "util/util_system.h"
using std::cerr;
using std::endl;
@@ -369,17 +371,119 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
}
double starttime = time_dt();
- add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
+ add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
add_log(string("Build flags: ") + kernel_build_options, true);
if(!build_kernel(debug_src))
return false;
- add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
+ double elapsed = time_dt() - starttime;
+ add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
return true;
}
+bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin)
+{
+ vector<string> args;
+ args.push_back("--background");
+ args.push_back("--factory-startup");
+ args.push_back("--python-expr");
+
+ args.push_back(
+ string_printf(
+ "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')",
+ (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false",
+ device->device_num,
+ device->device_name.c_str(),
+ device->platform_name.c_str(),
+ (device->kernel_build_options(NULL) + kernel_build_options).c_str(),
+ kernel_file.c_str(),
+ clbin.c_str()));
+
+ double starttime = time_dt();
+ add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
+ add_log(string("Build flags: ") + kernel_build_options, true);
+ if(!system_call_self(args) || !path_exists(clbin)) {
+ return false;
+ }
+
+ double elapsed = time_dt() - starttime;
+ add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
+
+ return load_binary(clbin);
+}
+
+/* Compile opencl kernel. This method is called from the _cycles Python
+ * module compile kernels. Parameters must match function above. */
+bool device_opencl_compile_kernel(const vector<string>& parameters)
+{
+ bool force_all_platforms = parameters[0] == "true";
+ int device_platform_id = std::stoi(parameters[1]);
+ const string& device_name = parameters[2];
+ const string& platform_name = parameters[3];
+ const string& build_options = parameters[4];
+ const string& kernel_file = parameters[5];
+ const string& binary_path = parameters[6];
+
+ if(clewInit() != CLEW_SUCCESS) {
+ return false;
+ }
+
+ vector<OpenCLPlatformDevice> usable_devices;
+ OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms);
+ if(device_platform_id >= usable_devices.size()) {
+ return false;
+ }
+
+ OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id];
+ if(platform_device.platform_name != platform_name ||
+ platform_device.device_name != device_name)
+ {
+ return false;
+ }
+
+ cl_platform_id platform = platform_device.platform_id;
+ cl_device_id device = platform_device.device_id;
+ const cl_context_properties context_props[] = {
+ CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
+ 0, 0
+ };
+
+ cl_int err;
+ cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
+ if(err != CL_SUCCESS) {
+ return false;
+ }
+
+ string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
+ source = path_source_replace_includes(source, path_get("source"));
+ size_t source_len = source.size();
+ const char *source_str = source.c_str();
+ cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
+ bool result = false;
+
+ if(err == CL_SUCCESS) {
+ err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
+
+ if(err == CL_SUCCESS) {
+ size_t size = 0;
+ clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+ if(size > 0) {
+ vector<uint8_t> binary(size);
+ uint8_t *bytes = &binary[0];
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+ result = path_write_binary(binary_path, binary);
+ }
+ }
+ clReleaseProgram(program);
+ }
+
+ clReleaseContext(context);
+
+ return result;
+}
+
bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
const string *debug_src)
{
@@ -467,15 +571,31 @@ void OpenCLDeviceBase::OpenCLProgram::load()
}
else {
add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
-
- /* If does not exist or loading binary failed, compile kernel. */
- if(!compile_kernel(debug_src)) {
- return;
+ if(!path_exists(clbin)) {
+ if(compile_separate(clbin)) {
+ add_log(string("Built and loaded program from ") + clbin + ".", true);
+ loaded = true;
+ }
+ else {
+ add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+ /* If does not exist or loading binary failed, compile kernel. */
+ if(!compile_kernel(debug_src)) {
+ return;
+ }
+
+ /* Save binary for reuse. */
+ if(!save_binary(clbin)) {
+ add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+ }
+ }
}
-
- /* Save binary for reuse. */
- if(!save_binary(clbin)) {
- add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+ else {
+ add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
+ /* Fall back to compiling. */
+ if(!compile_kernel(debug_src)) {
+ return;
+ }
}
}
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 163aacf19f9..f7041ee2783 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -39,6 +39,7 @@ set(SRC_OPENCL_KERNELS
kernels/opencl/kernel.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
+ kernels/opencl/kernel_split_bundle.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
new file mode 100644
index 00000000000..71ea68382b4
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h" // PRECOMPILED
+#include "kernel/split/kernel_split_common.h" // PRECOMPILED
+
+#include "kernel/kernels/opencl/kernel_path_init.cl"
+#include "kernel/kernels/opencl/kernel_scene_intersect.cl"
+#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
+#include "kernel/kernels/opencl/kernel_shader_setup.cl"
+#include "kernel/kernels/opencl/kernel_shader_sort.cl"
+#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
+#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
+#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
+#include "kernel/kernels/opencl/kernel_buffer_update.cl"
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index fc6db1f6662..a79829a3dd9 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -22,6 +22,9 @@
#include <numaapi.h>
+#include <OpenImageIO/sysutil.h>
+OIIO_NAMESPACE_USING
+
#ifdef _WIN32
# if(!defined(FREE_WINDOWS))
# include <intrin.h>
@@ -329,6 +332,25 @@ bool system_cpu_support_avx2()
#endif
+bool system_call_self(const vector<string>& args)
+{
+ /* Escape program and arguments in case they contain spaces. */
+ string cmd = "\"" + Sysutil::this_program_path() + "\"";
+
+ for(int i = 0; i < args.size(); i++) {
+ cmd += " \"" + args[i] + "\"";
+ }
+
+ /* Quiet output. */
+#ifdef _WIN32
+ cmd += " > nul";
+#else
+ cmd += " > /dev/null";
+#endif
+
+ return (system(cmd.c_str()) == 0);
+}
+
size_t system_physical_ram()
{
#ifdef _WIN32
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 1e7cf1d9f2a..2590b31a59d 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -18,6 +18,7 @@
#define __UTIL_SYSTEM_H__
#include "util/util_string.h"
+#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -61,6 +62,9 @@ bool system_cpu_support_avx2();
size_t system_physical_ram();
+/* Start a new process of the current application with the given arguments. */
+bool system_call_self(const vector<string>& args);
+
CCL_NAMESPACE_END
#endif /* __UTIL_SYSTEM_H__ */