diff options
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_intern.h | 1 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 20 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 20 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_mega.cpp | 24 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 171 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_util.cpp | 140 |
6 files changed, 331 insertions, 45 deletions
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h index 0b26057c3ba..94df1e009eb 100644 --- a/intern/cycles/device/device_intern.h +++ b/intern/cycles/device/device_intern.h @@ -24,6 +24,7 @@ class Device; Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); bool device_opencl_init(); Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); +bool device_opencl_compile_kernel(const vector<string>& parameters); bool device_cuda_init(); Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address); diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 9b763167459..a2c0e53b3e7 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -268,6 +268,7 @@ public: cl_platform_id cpPlatform; cl_device_id cdDevice; cl_int ciErr; + int device_num; class OpenCLProgram { public: @@ -293,7 +294,15 @@ public: private: bool build_kernel(const string *debug_src); + /* Build the program by calling the own process. + * This is required for multithreaded OpenCL compilation, since most Frameworks serialize + * build calls internally if they come from the same process. + * If that is not supported, this function just returns false. + */ + bool compile_separate(const string& clbin); + /* Build the program by calling OpenCL directly. */ bool compile_kernel(const string *debug_src); + /* Loading and saving the program from/to disk. */ bool load_binary(const string& clbin, const string *debug_src = NULL); bool save_binary(const string& clbin); @@ -342,12 +351,17 @@ public: bool opencl_version_check(); string device_md5_hash(string kernel_custom_build_options = ""); - bool load_kernels(const DeviceRequestedFeatures& requested_features); + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features); /* Has to be implemented by the real device classes. * The base device will then load all these programs. */ - virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, - vector<OpenCLProgram*> &programs) = 0; + virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features, + vector<OpenCLProgram*> &programs) = 0; + + /* Get the name of the opencl program for the given kernel */ + virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0; + /* Get the program file name to compile (*.cl) for the given kernel */ + virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0; void mem_alloc(device_memory& mem); void mem_copy_to(device_memory& mem); diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 4417065bb7f..d8f9a242ac8 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro } assert(info.num < usable_devices.size()); OpenCLPlatformDevice& platform_device = usable_devices[info.num]; + device_num = info.num; cpPlatform = platform_device.platform_id; cdDevice = platform_device.device_id; platform_name = platform_device.platform_name; @@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro texture_info.resize(1); memory_manager.alloc("texture_info", texture_info); - fprintf(stderr, "Device init success\n"); device_initialized = true; } @@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea programs.push_back(&base_program); programs.push_back(&denoising_program); /* Call actual class to fill the vector with its programs. */ - if(!load_kernels(requested_features, programs)) { + if(!add_kernel_programs(requested_features, programs)) { return false; } - /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks - * serialize the calls internally, so it's not much use right now. - * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor - * should be set to false as well. */ -#if 0 + /* Parallel compilation of Cycles kernels, this launches multiple + * processes to workaround OpenCL frameworks serializing the calls + * internally within a single process. */ TaskPool task_pool; foreach(OpenCLProgram *program, programs) { task_pool.push(function_bind(&OpenCLProgram::load, program)); @@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea return false; } } -#else - foreach(OpenCLProgram *program, programs) { - program->load(); - if(!program->is_loaded()) { - return false; - } - } -#endif return true; } diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index 0a7bf96fed7..c0b9e81d4d3 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -35,19 +35,35 @@ public: OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_) : OpenCLDeviceBase(info, stats, profiler, background_), - path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ") + path_trace_program(this, + get_opencl_program_name(false, "megakernel"), + get_opencl_program_filename(false, "megakernel"), + "-D__COMPILE_ONLY_MEGAKERNEL__ ") { } - virtual bool show_samples() const { + + virtual bool show_samples() const + { return true; } - virtual BVHLayoutMask get_bvh_layout_mask() const { + virtual BVHLayoutMask get_bvh_layout_mask() const + { return BVH_LAYOUT_BVH2; } - virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/, + const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name) + { + return kernel_name; + } + + const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/) + { + return "kernel.cl"; + } + + virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/, vector<OpenCLProgram*> &programs) { path_trace_program.add_kernel(ustring("path_trace")); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 5a2555f9f80..b759f69d3ab 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -79,6 +79,27 @@ public: OpenCLProgram program_data_init; OpenCLProgram program_state_buffer_size; + OpenCLProgram program_split; + + OpenCLProgram program_path_init; + OpenCLProgram program_scene_intersect; + OpenCLProgram program_lamp_emission; + OpenCLProgram program_do_volume; + OpenCLProgram program_queue_enqueue; + OpenCLProgram program_indirect_background; + OpenCLProgram program_shader_setup; + OpenCLProgram program_shader_sort; + OpenCLProgram program_shader_eval; + OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; + OpenCLProgram program_subsurface_scatter; + OpenCLProgram program_direct_lighting; + OpenCLProgram program_shadow_blocked_ao; + OpenCLProgram program_shadow_blocked_dl; + OpenCLProgram program_enqueue_inactive; + OpenCLProgram program_next_iteration_setup; + OpenCLProgram program_indirect_subsurface; + OpenCLProgram program_buffer_update; + OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_); ~OpenCLDeviceSplitKernel() @@ -99,26 +120,150 @@ public: return BVH_LAYOUT_BVH2; } - virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features) + { + if (!OpenCLDeviceBase::load_kernels(requested_features)) { + return false; + } + return split_kernel->load_kernels(requested_features); + } + + const string fast_compiled_kernels = + "path_init " + "scene_intersect " + "queue_enqueue " + "shader_setup " + "shader_sort " + "enqueue_inactive " + "next_iteration_setup " + "indirect_subsurface " + "buffer_update"; + + const string get_opencl_program_name(bool single_program, const string& kernel_name) + { + if (single_program) { + return "split"; + } + else { + if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { + return "split_bundle"; + } + else { + return "split_" + kernel_name; + } + } + } + + const string get_opencl_program_filename(bool single_program, const string& kernel_name) + { + if (single_program) { + return "kernel_split.cl"; + } + else { + if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { + return "kernel_split_bundle.cl"; + } + else { + return "kernel_" + kernel_name + ".cl"; + } + } + } + + virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features, vector<OpenCLDeviceBase::OpenCLProgram*> &programs) { bool single_program = OpenCLInfo::use_single_program(); - program_data_init = OpenCLDeviceBase::OpenCLProgram(this, - single_program ? "split" : "split_data_init", - single_program ? "kernel_split.cl" : "kernel_data_init.cl", - get_build_options(this, requested_features)); - + program_data_init = OpenCLDeviceBase::OpenCLProgram( + this, + get_opencl_program_name(single_program, "data_init"), + get_opencl_program_filename(single_program, "data_init"), + get_build_options(this, requested_features)); program_data_init.add_kernel(ustring("path_trace_data_init")); programs.push_back(&program_data_init); - program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this, - single_program ? "split" : "split_state_buffer_size", - single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl", - get_build_options(this, requested_features)); + program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram( + this, + get_opencl_program_name(single_program, "state_buffer_size"), + get_opencl_program_filename(single_program, "state_buffer_size"), + get_build_options(this, requested_features)); + program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size")); programs.push_back(&program_state_buffer_size); - return split_kernel->load_kernels(requested_features); + +#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); +#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \ + program_##kernel_name = \ + OpenCLDeviceBase::OpenCLProgram(this, \ + "split_"#kernel_name, \ + "kernel_"#kernel_name".cl", \ + get_build_options(this, requested_features)); \ + program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ + programs.push_back(&program_##kernel_name); + + if (single_program) { + program_split = OpenCLDeviceBase::OpenCLProgram( + this, + "split" , + "kernel_split.cl", + get_build_options(this, requested_features)); + + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); + + programs.push_back(&program_split); + } + else { + /* Ordered with most complex kernels first, to reduce overall compile time. */ + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval); + + /* Quick kernels bundled in a single program to reduce overhead of starting + * Blender processes. */ + program_split = OpenCLDeviceBase::OpenCLProgram( + this, + "split_bundle" , + "kernel_split_bundle.cl", + get_build_options(this, requested_features)); + + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); + programs.push_back(&program_split); + } +#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM +#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM + + return true; } void thread_run(DeviceTask *task) @@ -281,8 +426,8 @@ public: bool single_program = OpenCLInfo::use_single_program(); kernel->program = OpenCLDeviceBase::OpenCLProgram(device, - single_program ? "split" : "split_" + kernel_name, - single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl", + device->get_opencl_program_name(single_program, kernel_name), + device->get_opencl_program_filename(single_program, kernel_name), get_build_options(device, requested_features)); kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index f43aa5f350a..fe5ba4886a9 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -17,12 +17,14 @@ #ifdef WITH_OPENCL #include "device/opencl/opencl.h" +#include "device/device_intern.h" #include "util/util_debug.h" #include "util/util_logging.h" #include "util/util_md5.h" #include "util/util_path.h" #include "util/util_time.h" +#include "util/util_system.h" using std::cerr; using std::endl; @@ -369,17 +371,119 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src) } double starttime = time_dt(); - add_log(string("Compiling OpenCL program ") + program_name.c_str(), false); + add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); add_log(string("Build flags: ") + kernel_build_options, true); if(!build_kernel(debug_src)) return false; - add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false); + double elapsed = time_dt() - starttime; + add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false); return true; } +bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin) +{ + vector<string> args; + args.push_back("--background"); + args.push_back("--factory-startup"); + args.push_back("--python-expr"); + + args.push_back( + string_printf( + "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')", + (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false", + device->device_num, + device->device_name.c_str(), + device->platform_name.c_str(), + (device->kernel_build_options(NULL) + kernel_build_options).c_str(), + kernel_file.c_str(), + clbin.c_str())); + + double starttime = time_dt(); + add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); + add_log(string("Build flags: ") + kernel_build_options, true); + if(!system_call_self(args) || !path_exists(clbin)) { + return false; + } + + double elapsed = time_dt() - starttime; + add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false); + + return load_binary(clbin); +} + +/* Compile opencl kernel. This method is called from the _cycles Python + * module compile kernels. Parameters must match function above. */ +bool device_opencl_compile_kernel(const vector<string>& parameters) +{ + bool force_all_platforms = parameters[0] == "true"; + int device_platform_id = std::stoi(parameters[1]); + const string& device_name = parameters[2]; + const string& platform_name = parameters[3]; + const string& build_options = parameters[4]; + const string& kernel_file = parameters[5]; + const string& binary_path = parameters[6]; + + if(clewInit() != CLEW_SUCCESS) { + return false; + } + + vector<OpenCLPlatformDevice> usable_devices; + OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms); + if(device_platform_id >= usable_devices.size()) { + return false; + } + + OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id]; + if(platform_device.platform_name != platform_name || + platform_device.device_name != device_name) + { + return false; + } + + cl_platform_id platform = platform_device.platform_id; + cl_device_id device = platform_device.device_id; + const cl_context_properties context_props[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties) platform, + 0, 0 + }; + + cl_int err; + cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err); + if(err != CL_SUCCESS) { + return false; + } + + string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n"; + source = path_source_replace_includes(source, path_get("source")); + size_t source_len = source.size(); + const char *source_str = source.c_str(); + cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err); + bool result = false; + + if(err == CL_SUCCESS) { + err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL); + + if(err == CL_SUCCESS) { + size_t size = 0; + clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); + if(size > 0) { + vector<uint8_t> binary(size); + uint8_t *bytes = &binary[0]; + clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL); + result = path_write_binary(binary_path, binary); + } + } + clReleaseProgram(program); + } + + clReleaseContext(context); + + return result; +} + bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin, const string *debug_src) { @@ -467,15 +571,31 @@ void OpenCLDeviceBase::OpenCLProgram::load() } else { add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true); - - /* If does not exist or loading binary failed, compile kernel. */ - if(!compile_kernel(debug_src)) { - return; + if(!path_exists(clbin)) { + if(compile_separate(clbin)) { + add_log(string("Built and loaded program from ") + clbin + ".", true); + loaded = true; + } + else { + add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true); + + /* If does not exist or loading binary failed, compile kernel. */ + if(!compile_kernel(debug_src)) { + return; + } + + /* Save binary for reuse. */ + if(!save_binary(clbin)) { + add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true); + } + } } - - /* Save binary for reuse. */ - if(!save_binary(clbin)) { - add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true); + else { + add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true); + /* Fall back to compiling. */ + if(!compile_kernel(debug_src)) { + return; + } } } |