diff options
-rw-r--r-- | intern/cycles/blender/blender_session.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/blender/blender_session.h | 1 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 22 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 31 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 76 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 303 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_util.cpp | 144 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 90 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 56 | ||||
-rw-r--r-- | intern/cycles/render/session.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_progress.h | 23 | ||||
-rw-r--r-- | intern/cycles/util/util_task.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/util/util_task.h | 1 |
13 files changed, 560 insertions, 205 deletions
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index ab08b9e146d..27541800804 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -933,6 +933,11 @@ void BlenderSession::get_status(string& status, string& substatus) session->progress.get_status(status, substatus); } +void BlenderSession::get_kernel_status(string& kernel_status) +{ + session->progress.get_kernel_status(kernel_status); +} + void BlenderSession::get_progress(float& progress, double& total_time, double& render_time) { session->progress.get_time(total_time, render_time); @@ -951,7 +956,7 @@ void BlenderSession::update_bake_progress() void BlenderSession::update_status_progress() { - string timestatus, status, substatus; + string timestatus, status, substatus, kernel_status; string scene = ""; float progress; double total_time, remaining_time = 0, render_time; @@ -960,6 +965,7 @@ void BlenderSession::update_status_progress() float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f; get_status(status, substatus); + get_kernel_status(kernel_status); get_progress(progress, total_time, render_time); if(progress > 0) @@ -989,6 +995,8 @@ void BlenderSession::update_status_progress() status = " | " + status; if(substatus.size() > 0) status += " | " + substatus; + if(kernel_status.size() > 0) + status += " | " + kernel_status; double current_time = time_dt(); /* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date. diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index 2aa3c77c37d..2c0a83cf6e7 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -90,6 +90,7 @@ public: void tag_redraw(); void tag_update(); void get_status(string& status, string& substatus); + void get_kernel_status(string& kernel_status); void get_progress(float& progress, double& total_time, double& render_time); void test_cancel(); void update_status_progress(); diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 3bf978600d5..6f3208e955f 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -56,6 +56,14 @@ enum DeviceTypeMask { DEVICE_MASK_ALL = ~0 }; +enum DeviceKernelStatus { + DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0, + DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE, + DEVICE_KERNEL_USING_FEATURE_KERNEL, + DEVICE_KERNEL_FEATURE_KERNEL_INVALID, + DEVICE_KERNEL_UNKNOWN, +}; + #define DEVICE_MASK(type) (DeviceTypeMask)(1 << type) class DeviceInfo { @@ -321,6 +329,20 @@ public: const DeviceRequestedFeatures& /*requested_features*/) { return true; } + /* Wait for device to become available to upload data and receive tasks + * This method is used by the OpenCL device to load the + * optimized kernels or when not (yet) available load the + * generic kernels (only during foreground rendering) */ + virtual bool wait_for_availability( + const DeviceRequestedFeatures& /*requested_features*/) + { return true; } + /* Check if there are 'better' kernels available to be used + * We can switch over to these kernels + * This method is used to determine if we can switch the preview kernels + * to regular kernels */ + virtual DeviceKernelStatus get_active_kernel_switch_state() + { return DEVICE_KERNEL_USING_FEATURE_KERNEL; } + /* tasks */ virtual int get_split_task_count(DeviceTask& task) = 0; virtual void task_add(DeviceTask& task) = 0; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 2fac4fa071b..516b86654aa 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -120,6 +120,37 @@ public: return true; } + bool wait_for_availability(const DeviceRequestedFeatures& requested_features) + { + foreach(SubDevice& sub, devices) + if(!sub.device->wait_for_availability(requested_features)) + return false; + + return true; + } + + DeviceKernelStatus get_active_kernel_switch_state() + { + DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL; + + foreach(SubDevice& sub, devices) { + DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state(); + switch (subresult) { + case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL: + result = subresult; + break; + + case DEVICE_KERNEL_FEATURE_KERNEL_INVALID: + case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE: + return subresult; + + case DEVICE_KERNEL_USING_FEATURE_KERNEL: + break; + } + } + return result; + } + void mem_alloc(device_memory& mem) { device_ptr key = unique_key++; diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 2a4e07419ac..bb507be4c72 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -261,16 +261,22 @@ class OpenCLDevice : public Device { public: DedicatedTaskPool task_pool; + + /* Task pool for required kernels (base, AO kernels during foreground rendering) */ + TaskPool load_required_kernel_task_pool; + /* Task pool for optional kernels (feature kernels during foreground rendering) */ + TaskPool load_kernel_task_pool; cl_context cxContext; cl_command_queue cqCommandQueue; cl_platform_id cpPlatform; cl_device_id cdDevice; cl_int ciErr; int device_num; + bool use_preview_kernels; class OpenCLProgram { public: - OpenCLProgram() : loaded(false), program(NULL), device(NULL) {} + OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {} OpenCLProgram(OpenCLDevice *device, const string& program_name, const string& kernel_name, @@ -279,12 +285,24 @@ public: ~OpenCLProgram(); void add_kernel(ustring name); - void load(); + + /* Try to load the program from device cache or disk */ + bool load(); + /* Compile the kernel (first separate, failback to local) */ + void compile(); + /* Create the OpenCL kernels after loading or compiling */ + void create_kernels(); bool is_loaded() const { return loaded; } const string& get_log() const { return log; } void report_error(); + /* Wait until this kernel is available to be used + * It will return true when the kernel is available. + * It will return false when the kernel is not available + * or could not be loaded. */ + bool wait_for_availability(); + cl_kernel operator()(); cl_kernel operator()(ustring name); @@ -308,6 +326,8 @@ public: void add_error(const string& msg); bool loaded; + bool needs_compiling; + cl_program program; OpenCLDevice *device; @@ -323,19 +343,32 @@ public: map<ustring, cl_kernel> kernels; }; - DeviceSplitKernel *split_kernel; - - OpenCLProgram program_split; + /* Container for all types of split programs. */ + class OpenCLSplitPrograms { + public: + OpenCLDevice *device; + OpenCLProgram program_split; + OpenCLProgram program_lamp_emission; + OpenCLProgram program_do_volume; + OpenCLProgram program_indirect_background; + OpenCLProgram program_shader_eval; + OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; + OpenCLProgram program_subsurface_scatter; + OpenCLProgram program_direct_lighting; + OpenCLProgram program_shadow_blocked_ao; + OpenCLProgram program_shadow_blocked_dl; + + OpenCLSplitPrograms(OpenCLDevice *device); + ~OpenCLSplitPrograms(); + + /* Load the kernels and put the created kernels in the given `programs` + * paramter. */ + void load_kernels(vector<OpenCLProgram*> &programs, + const DeviceRequestedFeatures& requested_features, + bool is_preview=false); + }; - OpenCLProgram program_lamp_emission; - OpenCLProgram program_do_volume; - OpenCLProgram program_indirect_background; - OpenCLProgram program_shader_eval; - OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; - OpenCLProgram program_subsurface_scatter; - OpenCLProgram program_direct_lighting; - OpenCLProgram program_shadow_blocked_ao; - OpenCLProgram program_shadow_blocked_dl; + DeviceSplitKernel *split_kernel; OpenCLProgram base_program; OpenCLProgram bake_program; @@ -343,6 +376,9 @@ public: OpenCLProgram background_program; OpenCLProgram denoising_program; + OpenCLSplitPrograms kernel_programs; + OpenCLSplitPrograms preview_programs; + typedef map<string, device_vector<uchar>*> ConstMemMap; typedef map<string, device_ptr> MemMap; @@ -358,22 +394,30 @@ public: void opencl_error(const string& message); void opencl_assert_err(cl_int err, const char* where); - OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_); + OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); ~OpenCLDevice(); static void CL_CALLBACK context_notify_callback(const char *err_info, const void * /*private_info*/, size_t /*cb*/, void *user_data); bool opencl_version_check(); + OpenCLSplitPrograms* get_split_programs(); string device_md5_hash(string kernel_custom_build_options = ""); bool load_kernels(const DeviceRequestedFeatures& requested_features); + void load_required_kernels(const DeviceRequestedFeatures& requested_features); + void load_preview_kernels(); + + bool wait_for_availability(const DeviceRequestedFeatures& requested_features); + DeviceKernelStatus get_active_kernel_switch_state(); /* Get the name of the opencl program for the given kernel */ const string get_opencl_program_name(const string& kernel_name); /* Get the program file name to compile (*.cl) for the given kernel */ const string get_opencl_program_filename(const string& kernel_name); - string get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name); + string get_build_options(const DeviceRequestedFeatures& requested_features, + const string& opencl_program_name, + bool preview_kernel=false); /* Enable the default features to reduce recompilation events */ void enable_default_features(DeviceRequestedFeatures& features); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 57612098b34..555707cecd5 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -104,7 +104,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures& features) } } -string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name) +string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name, bool preview_kernel) { /* first check for non-split kernel programs */ if (opencl_program_name == "base" || opencl_program_name == "denoising") { @@ -181,7 +181,13 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_ enable_default_features(nofeatures); /* Add program specific optimized compile directives */ - if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) { + if (preview_kernel) { + DeviceRequestedFeatures preview_features; + preview_features.use_hair = true; + build_options += "-D__KERNEL_OPENCL_PREVIEW__ "; + build_options += preview_features.get_build_options(); + } + else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) { build_options += nofeatures.get_build_options(); } else { @@ -208,6 +214,77 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_ return build_options; } +OpenCLDevice::OpenCLSplitPrograms::OpenCLSplitPrograms(OpenCLDevice *device_) +{ + device = device_; +} + +OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms() +{ + program_split.release(); + program_lamp_emission.release(); + program_do_volume.release(); + program_indirect_background.release(); + program_shader_eval.release(); + program_holdout_emission_blurring_pathtermination_ao.release(); + program_subsurface_scatter.release(); + program_direct_lighting.release(); + program_shadow_blocked_ao.release(); + program_shadow_blocked_dl.release(); +} + +void OpenCLDevice::OpenCLSplitPrograms::load_kernels(vector<OpenCLProgram*> &programs, const DeviceRequestedFeatures& requested_features, bool is_preview) +{ + if (!requested_features.use_baking) { +#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); +#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \ + const string program_name_##kernel_name = "split_"#kernel_name; \ + program_##kernel_name = \ + OpenCLDevice::OpenCLProgram(device, \ + program_name_##kernel_name, \ + "kernel_"#kernel_name".cl", \ + device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \ + program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ + programs.push_back(&program_##kernel_name); + + /* Ordered with most complex kernels first, to reduce overall compile time. */ + ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter); + if (requested_features.use_volume || is_preview) { + ADD_SPLIT_KERNEL_PROGRAM(do_volume); + } + ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl); + ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao); + ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao); + ADD_SPLIT_KERNEL_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_PROGRAM(indirect_background); + ADD_SPLIT_KERNEL_PROGRAM(shader_eval); + + /* Quick kernels bundled in a single program to reduce overhead of starting + * Blender processes. */ + program_split = OpenCLDevice::OpenCLProgram(device, + "split_bundle" , + "kernel_split_bundle.cl", + device->get_build_options(requested_features, "split_bundle", is_preview)); + + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update); + programs.push_back(&program_split); + +#undef ADD_SPLIT_KERNEL_PROGRAM +#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM + } +} + namespace { /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to @@ -319,7 +396,9 @@ public: OpenCLDevice::OpenCLProgram(device, program_name, device->get_opencl_program_filename(kernel_name), - device->get_build_options(requested_features, program_name)); + device->get_build_options(requested_features, + program_name, + device->use_preview_kernels)); kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); kernel->program.load(); @@ -339,7 +418,8 @@ public: size_buffer.zero_to_device(); uint threads = num_threads; - cl_kernel kernel_state_buffer_size = device->program_split(ustring("path_trace_state_buffer_size")); + OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs(); + cl_kernel kernel_state_buffer_size = programs->program_split(ustring("path_trace_state_buffer_size")); device->kernel_set_args(kernel_state_buffer_size, 0, kg, data, threads, size_buffer); size_t global_size = 64; @@ -389,7 +469,8 @@ public: cl_int start_sample = rtile.start_sample; cl_int end_sample = rtile.start_sample + rtile.num_samples; - cl_kernel kernel_data_init = device->program_split(ustring("path_trace_data_init")); + OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs(); + cl_kernel kernel_data_init = programs->program_split(ustring("path_trace_data_init")); cl_uint start_arg_index = device->kernel_set_args(kernel_data_init, @@ -522,6 +603,8 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char* where) OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background) : Device(info, stats, profiler, background), + kernel_programs(this), + preview_programs(this), memory_manager(this), texture_info(this, "__texture_info", MEM_TEXTURE) { @@ -532,6 +615,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b null_mem = 0; device_initialized = false; textures_need_update = true; + use_preview_kernels = !background; vector<OpenCLPlatformDevice> usable_devices; OpenCLInfo::get_usable_devices(&usable_devices); @@ -595,11 +679,16 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b device_initialized = true; split_kernel = new OpenCLSplitKernel(this); + if (!background) { + load_preview_kernels(); + } } OpenCLDevice::~OpenCLDevice() { task_pool.stop(); + load_required_kernel_task_pool.stop(); + load_kernel_task_pool.stop(); memory_manager.free(); @@ -615,7 +704,7 @@ OpenCLDevice::~OpenCLDevice() bake_program.release(); displace_program.release(); background_program.release(); - program_split.release(); + denoising_program.release(); if(cqCommandQueue) clReleaseCommandQueue(cqCommandQueue); @@ -681,8 +770,51 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature /* Verify we have right opencl version. */ if(!opencl_version_check()) return false; + + load_required_kernels(requested_features); + + vector<OpenCLProgram*> programs; + kernel_programs.load_kernels(programs, requested_features, false); + + if (!requested_features.use_baking && requested_features.use_denoising) { + denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising")); + denoising_program.add_kernel(ustring("filter_divide_shadow")); + denoising_program.add_kernel(ustring("filter_get_feature")); + denoising_program.add_kernel(ustring("filter_write_feature")); + denoising_program.add_kernel(ustring("filter_detect_outliers")); + denoising_program.add_kernel(ustring("filter_combine_halves")); + denoising_program.add_kernel(ustring("filter_construct_transform")); + denoising_program.add_kernel(ustring("filter_nlm_calc_difference")); + denoising_program.add_kernel(ustring("filter_nlm_blur")); + denoising_program.add_kernel(ustring("filter_nlm_calc_weight")); + denoising_program.add_kernel(ustring("filter_nlm_update_output")); + denoising_program.add_kernel(ustring("filter_nlm_normalize")); + denoising_program.add_kernel(ustring("filter_nlm_construct_gramian")); + denoising_program.add_kernel(ustring("filter_finalize")); + programs.push_back(&denoising_program); + } + + load_required_kernel_task_pool.wait_work(); + + /* Parallel compilation of Cycles kernels, this launches multiple + * processes to workaround OpenCL frameworks serializing the calls + * internally within a single process. */ + foreach(OpenCLProgram *program, programs) { + if (!program->load()) { + load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program)); + } + } + return true; +} +void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures& requested_features) +{ vector<OpenCLProgram*> programs; + base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base")); + base_program.add_kernel(ustring("convert_to_byte")); + base_program.add_kernel(ustring("convert_to_half_float")); + base_program.add_kernel(ustring("zero_buffer")); + programs.push_back(&base_program); if (requested_features.use_true_displacement) { displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace")); @@ -696,101 +828,89 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature programs.push_back(&background_program); } -#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); -#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \ - const string program_name_##kernel_name = "split_"#kernel_name; \ - program_##kernel_name = \ - OpenCLDevice::OpenCLProgram(this, \ - program_name_##kernel_name, \ - "kernel_"#kernel_name".cl", \ - get_build_options(requested_features, program_name_##kernel_name)); \ - program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ - programs.push_back(&program_##kernel_name); - - /* Ordered with most complex kernels first, to reduce overall compile time. */ - ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter); - if (requested_features.use_volume) { - ADD_SPLIT_KERNEL_PROGRAM(do_volume); - } - ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl); - ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao); - ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao); - ADD_SPLIT_KERNEL_PROGRAM(lamp_emission); - ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); - ADD_SPLIT_KERNEL_PROGRAM(indirect_background); - ADD_SPLIT_KERNEL_PROGRAM(shader_eval); - - /* Quick kernels bundled in a single program to reduce overhead of starting - * Blender processes. */ - program_split = OpenCLDevice::OpenCLProgram(this, - "split_bundle" , - "kernel_split_bundle.cl", - get_build_options(requested_features, "split_bundle")); - - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface); - ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update); - programs.push_back(&program_split); - -#undef ADD_SPLIT_KERNEL_PROGRAM -#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM - - base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base")); - base_program.add_kernel(ustring("convert_to_byte")); - base_program.add_kernel(ustring("convert_to_half_float")); - base_program.add_kernel(ustring("zero_buffer")); - programs.push_back(&base_program); - if (requested_features.use_baking) { bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake")); bake_program.add_kernel(ustring("bake")); programs.push_back(&bake_program); } - denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising")); - denoising_program.add_kernel(ustring("filter_divide_shadow")); - denoising_program.add_kernel(ustring("filter_get_feature")); - denoising_program.add_kernel(ustring("filter_write_feature")); - denoising_program.add_kernel(ustring("filter_detect_outliers")); - denoising_program.add_kernel(ustring("filter_combine_halves")); - denoising_program.add_kernel(ustring("filter_construct_transform")); - denoising_program.add_kernel(ustring("filter_nlm_calc_difference")); - denoising_program.add_kernel(ustring("filter_nlm_blur")); - denoising_program.add_kernel(ustring("filter_nlm_calc_weight")); - denoising_program.add_kernel(ustring("filter_nlm_update_output")); - denoising_program.add_kernel(ustring("filter_nlm_normalize")); - denoising_program.add_kernel(ustring("filter_nlm_construct_gramian")); - denoising_program.add_kernel(ustring("filter_finalize")); - programs.push_back(&denoising_program); - - /* Parallel compilation of Cycles kernels, this launches multiple - * processes to workaround OpenCL frameworks serializing the calls - * internally within a single process. */ - TaskPool task_pool; foreach(OpenCLProgram *program, programs) { - task_pool.push(function_bind(&OpenCLProgram::load, program)); + if (!program->load()) { + load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program)); + } } - task_pool.wait_work(); +} + +void OpenCLDevice::load_preview_kernels() +{ + DeviceRequestedFeatures no_features; + vector<OpenCLProgram*> programs; + preview_programs.load_kernels(programs, no_features, true); foreach(OpenCLProgram *program, programs) { - VLOG(2) << program->get_log(); - if(!program->is_loaded()) { - program->report_error(); - return false; + if (!program->load()) { + load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program)); } } +} +bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures& requested_features) +{ + if (background) { + load_kernel_task_pool.wait_work(); + use_preview_kernels = false; + } + else { + /* We use a device setting to determine to load preview kernels or not + * Better to check on device level than per kernel as mixing preview and + * non-preview kernels does not work due to different data types */ + if (use_preview_kernels) { + use_preview_kernels = !load_kernel_task_pool.finished(); + } + } return split_kernel->load_kernels(requested_features); } +OpenCLDevice::OpenCLSplitPrograms* OpenCLDevice::get_split_programs() +{ + return use_preview_kernels?&preview_programs:&kernel_programs; +} + +DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state() +{ + /* Do not switch kernels for background renderings + * We do foreground rendering but use the preview kernels + * Check for the optimized kernels + * + * This works also the other way around, where we are using + * optimized kernels but new ones are being compiled due + * to other features that are needed */ + if (background) { + /* The if-statements below would find the same result, + * But as the `finished` method uses a mutex we added + * this as an early exit */ + return DEVICE_KERNEL_USING_FEATURE_KERNEL; + } + + bool other_kernels_finished = load_kernel_task_pool.finished(); + if (use_preview_kernels) { + if (other_kernels_finished) { + return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE; + } + else { + return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL; + } + } + else { + if (other_kernels_finished) { + return DEVICE_KERNEL_USING_FEATURE_KERNEL; + } + else { + return DEVICE_KERNEL_FEATURE_KERNEL_INVALID; + } + } +} + void OpenCLDevice::mem_alloc(device_memory& mem) { if(mem.name) { @@ -892,6 +1012,7 @@ void OpenCLDevice::mem_copy_from(device_memory& mem, int y, int w, int h, int el void OpenCLDevice::mem_zero_kernel(device_ptr mem, size_t size) { + base_program.wait_for_availability(); cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer")); size_t global_size[] = {1024, 1024}; @@ -1719,17 +1840,15 @@ void OpenCLDevice::shader(DeviceTask& task) cl_int d_shader_w = task.shader_w; cl_int d_offset = task.offset; - cl_kernel kernel; - + OpenCLDevice::OpenCLProgram *program = &background_program; if(task.shader_eval_type >= SHADER_EVAL_BAKE) { - kernel = bake_program(ustring("bake")); + program = &bake_program; } else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) { - kernel = displace_program(ustring("displace")); - } - else { - kernel = background_program(ustring("background")); + program = &displace_program; } + program->wait_for_availability(); + cl_kernel kernel = (*program)(); cl_uint start_arg_index = kernel_set_args(kernel, diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index ef0deaeff62..920c8dc4e6a 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -243,6 +243,18 @@ string OpenCLCache::get_kernel_md5() return self.kernel_md5; } +static string get_program_source(const string& kernel_file) +{ + string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n"; + /* We compile kernels consisting of many files. unfortunately OpenCL + * kernel caches do not seem to recognize changes in included files. + * so we force recompile on changes by adding the md5 hash of all files. + */ + source = path_source_replace_includes(source, path_get("source")); + source += "\n// " + util_md5_string(source) + "\n"; + return source; +} + OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device, const string& program_name, const string& kernel_file, @@ -255,6 +267,7 @@ OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device, use_stdout(use_stdout) { loaded = false; + needs_compiling = true; program = NULL; } @@ -343,13 +356,7 @@ bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src) bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src) { - string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n"; - /* We compile kernels consisting of many files. unfortunately OpenCL - * kernel caches do not seem to recognize changes in included files. - * so we force recompile on changes by adding the md5 hash of all files. - */ - source = path_source_replace_includes(source, path_get("source")); - source += "\n// " + util_md5_string(source) + "\n"; + string source = get_program_source(kernel_file); if(debug_src) { path_write_text(*debug_src, source); @@ -473,8 +480,7 @@ bool device_opencl_compile_kernel(const vector<string>& parameters) return false; } - string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n"; - source = path_source_replace_includes(source, path_get("source")); + string source = get_program_source(kernel_file); size_t source_len = source.size(); const char *source_str = source.c_str(); cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err); @@ -548,11 +554,54 @@ bool OpenCLDevice::OpenCLProgram::save_binary(const string& clbin) return path_write_binary(clbin, binary); } -void OpenCLDevice::OpenCLProgram::load() +bool OpenCLDevice::OpenCLProgram::load() { - assert(device); - loaded = false; + string device_md5 = device->device_md5_hash(kernel_build_options); + + /* Try to use cached kernel. */ + thread_scoped_lock cache_locker; + ustring cache_key(program_name + device_md5); + program = device->load_cached_kernel(cache_key, + cache_locker); + if (!program) { + add_log(string("OpenCL program ") + program_name + " not found in cache.", true); + + /* need to create source to get md5 */ + string source = get_program_source(kernel_file); + + string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source); + basename = path_cache_get(path_join("kernels", basename)); + string clbin = basename + ".clbin"; + + /* If binary kernel exists already, try use it. */ + if(path_exists(clbin) && load_binary(clbin)) { + /* Kernel loaded from binary, nothing to do. */ + add_log(string("Loaded program from ") + clbin + ".", true); + + /* Cache the program. */ + device->store_cached_kernel(program, + cache_key, + cache_locker); + } + else { + add_log(string("OpenCL program ") + program_name + " not found on disk.", true); + cache_locker.unlock(); + } + } + + if (program) { + create_kernels(); + loaded = true; + needs_compiling = false; + } + + return loaded; +} + +void OpenCLDevice::OpenCLProgram::compile() +{ + assert(device); string device_md5 = device->device_md5_hash(kernel_build_options); @@ -562,12 +611,13 @@ void OpenCLDevice::OpenCLProgram::load() program = device->load_cached_kernel(cache_key, cache_locker); - if(!program) { + if (!program) + { + add_log(string("OpenCL program ") + program_name + " not found in cache.", true); /* need to create source to get md5 */ - string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n"; - source = path_source_replace_includes(source, path_get("source")); + string source = get_program_source(kernel_file); string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source); basename = path_cache_get(path_join("kernels", basename)); @@ -582,49 +632,38 @@ void OpenCLDevice::OpenCLProgram::load() } /* If binary kernel exists already, try use it. */ - if(path_exists(clbin) && load_binary(clbin)) { - /* Kernel loaded from binary, nothing to do. */ - add_log(string("Loaded program from ") + clbin + ".", true); + if(compile_separate(clbin)) { + add_log(string("Built and loaded program from ") + clbin + ".", true); + loaded = true; } else { - add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true); - if(!path_exists(clbin)) { - if(compile_separate(clbin)) { - add_log(string("Built and loaded program from ") + clbin + ".", true); - loaded = true; - } - else { - add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true); - - /* If does not exist or loading binary failed, compile kernel. */ - if(!compile_kernel(debug_src)) { - return; - } - - /* Save binary for reuse. */ - if(!save_binary(clbin)) { - add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true); - } - } + add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true); + + /* If does not exist or loading binary failed, compile kernel. */ + if(!compile_kernel(debug_src)) { + needs_compiling = false; + return; } - else { - add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true); - /* Fall back to compiling. */ - if(!compile_kernel(debug_src)) { - return; - } + + /* Save binary for reuse. */ + if(!save_binary(clbin)) { + add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true); } } /* Cache the program. */ device->store_cached_kernel(program, - cache_key, - cache_locker); - } - else { - add_log(string("Found cached OpenCL program ") + program_name + ".", true); + cache_key, + cache_locker); } + create_kernels(); + needs_compiling = false; + loaded = true; +} + +void OpenCLDevice::OpenCLProgram::create_kernels() +{ for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) { assert(kernel->second == NULL); cl_int ciErr; @@ -635,8 +674,15 @@ void OpenCLDevice::OpenCLProgram::load() return; } } +} - loaded = true; +bool OpenCLDevice::OpenCLProgram::wait_for_availability() +{ + add_log(string("Waiting for availability of ") + program_name + ".", true); + while (needs_compiling) { + time_sleep(0.1); + } + return loaded; } void OpenCLDevice::OpenCLProgram::report_error() diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index caa0057d997..281d9a25047 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -121,52 +121,62 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_OPENCL__ -/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */ +# if defined(__KERNEL_OPENCL_AMD__) || defined(__KERNEL_OPENCL_INTEL_CPU__) +# define __CL_USE_NATIVE__ +# endif -# ifdef __KERNEL_OPENCL_NVIDIA__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ -# define __SUBSURFACE__ -# define __PRINCIPLED__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __CMJ__ -# define __BRANCHED_PATH__ -# endif /* __KERNEL_OPENCL_NVIDIA__ */ +/* Preview kernel is used as a small kernel when the optimized kernel is still being compiled. */ +# ifdef __KERNEL_OPENCL_PREVIEW__ +# define __AO__ +# define __PASSES__ +# define __HAIR__ +# else + +/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */ -# ifdef __KERNEL_OPENCL_APPLE__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ -# define __PRINCIPLED__ -# define __CMJ__ +# ifdef __KERNEL_OPENCL_NVIDIA__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +# define __SUBSURFACE__ +# define __PRINCIPLED__ +# define __VOLUME__ +# define __VOLUME_SCATTER__ +# define __SHADOW_RECORD_ALL__ +# define __CMJ__ +# define __BRANCHED_PATH__ +# endif /* __KERNEL_OPENCL_NVIDIA__ */ + +# ifdef __KERNEL_OPENCL_APPLE__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +# define __PRINCIPLED__ +# define __CMJ__ /* TODO(sergey): Currently experimental section is ignored here, * this is because megakernel in device_opencl does not support * custom cflags depending on the scene features. */ -# endif /* __KERNEL_OPENCL_APPLE__ */ - -# ifdef __KERNEL_OPENCL_AMD__ -# define __CL_USE_NATIVE__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ -# define __SUBSURFACE__ -# define __PRINCIPLED__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __CMJ__ -# define __BRANCHED_PATH__ -# endif /* __KERNEL_OPENCL_AMD__ */ - -# ifdef __KERNEL_OPENCL_INTEL_CPU__ -# define __CL_USE_NATIVE__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ -# define __PRINCIPLED__ -# define __CMJ__ -# endif /* __KERNEL_OPENCL_INTEL_CPU__ */ - +# endif /* __KERNEL_OPENCL_APPLE__ */ + +# ifdef __KERNEL_OPENCL_AMD__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +# define __SUBSURFACE__ +# define __PRINCIPLED__ +# define __VOLUME__ +# define __VOLUME_SCATTER__ +# define __SHADOW_RECORD_ALL__ +# define __CMJ__ +# define __BRANCHED_PATH__ +# endif /* __KERNEL_OPENCL_AMD__ */ + +# ifdef __KERNEL_OPENCL_INTEL_CPU__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +# define __PRINCIPLED__ +# define __CMJ__ +# endif /* __KERNEL_OPENCL_INTEL_CPU__ */ + +# endif /* KERNEL_OPENCL_PREVIEW__ */ #endif /* __KERNEL_OPENCL__ */ /* Kernel features */ diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index dea50d52cfa..d4b1a5e843b 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -212,6 +212,11 @@ void Session::run_gpu() /* advance to next tile */ bool no_tiles = !tile_manager.next(); + DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN; + if (no_tiles) { + kernel_state = device->get_active_kernel_switch_state(); + } + if(params.background) { /* if no work left and in background mode, we can stop immediately */ if(no_tiles) { @@ -219,6 +224,16 @@ void Session::run_gpu() break; } } + + /* Don't go in pause mode when image was rendered with preview kernels + * When feature kernels become available the session will be resetted. */ + else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { + time_sleep(0.1); + } + else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) { + reset_gpu(tile_manager.params, params.samples); + } + else { /* if in interactive mode, and we are either paused or done for now, * wait for pause condition notify to wake up again */ @@ -540,6 +555,11 @@ void Session::run_cpu() bool no_tiles = !tile_manager.next(); bool need_tonemap = false; + DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN; + if (no_tiles) { + kernel_state = device->get_active_kernel_switch_state(); + } + if(params.background) { /* if no work left and in background mode, we can stop immediately */ if(no_tiles) { @@ -547,6 +567,16 @@ void Session::run_cpu() break; } } + + /* Don't go in pause mode when preview kernels are used + * When feature kernels become available the session will be resetted. */ + else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { + time_sleep(0.1); + } + else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) { + reset_cpu(tile_manager.params, params.samples); + } + else { /* if in interactive mode, and we are either paused or done for now, * wait for pause condition notify to wake up again */ @@ -699,7 +729,7 @@ DeviceRequestedFeatures Session::get_requested_device_features() return requested_features; } -void Session::load_kernels(bool lock_scene) +bool Session::load_kernels(bool lock_scene) { thread_scoped_lock scene_lock; if(lock_scene) { @@ -722,7 +752,7 @@ void Session::load_kernels(bool lock_scene) progress.set_error(message); progress.set_status("Error", message); progress.set_update(); - return; + return false; } progress.add_skip_time(timer, false); @@ -730,14 +760,13 @@ void Session::load_kernels(bool lock_scene) kernels_loaded = true; loaded_kernel_features = requested_features; + return true; } + return false; } void Session::run() { - /* load kernels */ - load_kernels(); - if(params.use_profiling && (params.device.type == DEVICE_CPU)) { profiler.start(); } @@ -879,7 +908,7 @@ bool Session::update_scene() /* update scene */ if(scene->need_update()) { - load_kernels(false); + bool new_kernels_needed = load_kernels(false); /* Update max_closures. */ KernelIntegrator *kintegrator = &scene->dscene.data.integrator; @@ -894,6 +923,21 @@ bool Session::update_scene() progress.set_status("Updating Scene"); MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); + DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state(); + bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE || + kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID; + if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { + progress.set_kernel_status("Compiling render kernels"); + } + if (new_kernels_needed || kernel_switch_needed) { + progress.set_kernel_status("Compiling render kernels"); + device->wait_for_availability(loaded_kernel_features); + progress.set_kernel_status(""); + } + + if (kernel_switch_needed) { + reset(tile_manager.params, params.samples); + } return true; } return false; diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index cbdfc75a905..404b7b7a945 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -162,7 +162,7 @@ public: void set_pause(bool pause); bool update_scene(); - void load_kernels(bool lock_scene=true); + bool load_kernels(bool lock_scene=true); void device_free(); diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 4ed9ebd60ff..06900d14cdc 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -46,6 +46,7 @@ public: substatus = ""; sync_status = ""; sync_substatus = ""; + kernel_status = ""; update_cb = function_null; cancel = false; cancel_message = ""; @@ -86,6 +87,7 @@ public: substatus = ""; sync_status = ""; sync_substatus = ""; + kernel_status = ""; cancel = false; cancel_message = ""; error = false; @@ -313,6 +315,25 @@ public: } } + + /* kernel status */ + + void set_kernel_status(const string &kernel_status_) + { + { + thread_scoped_lock lock(progress_mutex); + kernel_status = kernel_status_; + } + + set_update(); + } + + void get_kernel_status(string &kernel_status_) + { + thread_scoped_lock lock(progress_mutex); + kernel_status_ = kernel_status; + } + /* callback */ void set_update() @@ -356,6 +377,8 @@ protected: string sync_status; string sync_substatus; + string kernel_status; + volatile bool cancel; string cancel_message; diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index 2a705c2432b..ce166af206a 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -148,6 +148,12 @@ bool TaskPool::canceled() return do_cancel; } +bool TaskPool::finished() +{ + thread_scoped_lock num_lock(num_mutex); + return num == 0; +} + void TaskPool::num_decrease(int done) { num_mutex.lock(); diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h index 15f0d341be7..a7e19d1ab75 100644 --- a/intern/cycles/util/util_task.h +++ b/intern/cycles/util/util_task.h @@ -93,6 +93,7 @@ public: void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */ void cancel(); /* cancel all tasks, keep worker threads running */ void stop(); /* stop all worker threads */ + bool finished(); /* check if all work has been completed */ bool canceled(); /* for worker threads, test if canceled */ |