Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorJeroen Bakker <j.bakker@atmind.nl>2019-03-15 18:28:33 +0300
committerJeroen Bakker <j.bakker@atmind.nl>2019-03-15 18:28:33 +0300
commit5051e580e4028e0a741e6519e469321a7e1f2a71 (patch)
tree83d1195eb21106155c0c949ae1453a9ce8665333 /intern
parent86ea76254d3f625bf3d9e15dade5d5b3544c06a3 (diff)
parent2f6257fd7fe305e3b226a8b505eb614bbeaf762a (diff)
Merge branch 'blender2.7'
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/blender_session.cpp10
-rw-r--r--intern/cycles/blender/blender_session.h1
-rw-r--r--intern/cycles/device/device.h22
-rw-r--r--intern/cycles/device/device_multi.cpp31
-rw-r--r--intern/cycles/device/opencl/opencl.h76
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp321
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp144
-rw-r--r--intern/cycles/kernel/kernel_types.h90
-rw-r--r--intern/cycles/render/session.cpp56
-rw-r--r--intern/cycles/render/session.h2
-rw-r--r--intern/cycles/util/util_progress.h23
-rw-r--r--intern/cycles/util/util_task.cpp6
-rw-r--r--intern/cycles/util/util_task.h1
13 files changed, 575 insertions, 208 deletions
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 501e4fec13f..f1cdda5cb13 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -987,6 +987,11 @@ void BlenderSession::get_status(string& status, string& substatus)
session->progress.get_status(status, substatus);
}
+void BlenderSession::get_kernel_status(string& kernel_status)
+{
+ session->progress.get_kernel_status(kernel_status);
+}
+
void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
{
session->progress.get_time(total_time, render_time);
@@ -1005,7 +1010,7 @@ void BlenderSession::update_bake_progress()
void BlenderSession::update_status_progress()
{
- string timestatus, status, substatus;
+ string timestatus, status, substatus, kernel_status;
string scene_status = "";
float progress;
double total_time, remaining_time = 0, render_time;
@@ -1014,6 +1019,7 @@ void BlenderSession::update_status_progress()
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
get_status(status, substatus);
+ get_kernel_status(kernel_status);
get_progress(progress, total_time, render_time);
if(progress > 0)
@@ -1038,6 +1044,8 @@ void BlenderSession::update_status_progress()
status = " | " + status;
if(substatus.size() > 0)
status += " | " + substatus;
+ if(kernel_status.size() > 0)
+ status += " | " + kernel_status;
}
double current_time = time_dt();
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 1915cdb36f1..2bfb9e56c37 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -90,6 +90,7 @@ public:
void tag_redraw();
void tag_update();
void get_status(string& status, string& substatus);
+ void get_kernel_status(string& kernel_status);
void get_progress(float& progress, double& total_time, double& render_time);
void test_cancel();
void update_status_progress();
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 08b0e7435fe..4db8d10a4aa 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -56,6 +56,14 @@ enum DeviceTypeMask {
DEVICE_MASK_ALL = ~0
};
+enum DeviceKernelStatus {
+ DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
+ DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
+ DEVICE_KERNEL_USING_FEATURE_KERNEL,
+ DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
+ DEVICE_KERNEL_UNKNOWN,
+};
+
#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
class DeviceInfo {
@@ -334,6 +342,20 @@ public:
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
+ /* Wait for device to become available to upload data and receive tasks
+ * This method is used by the OpenCL device to load the
+ * optimized kernels or when not (yet) available load the
+ * generic kernels (only during foreground rendering) */
+ virtual bool wait_for_availability(
+ const DeviceRequestedFeatures& /*requested_features*/)
+ { return true; }
+ /* Check if there are 'better' kernels available to be used
+ * We can switch over to these kernels
+ * This method is used to determine if we can switch the preview kernels
+ * to regular kernels */
+ virtual DeviceKernelStatus get_active_kernel_switch_state()
+ { return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
+
/* tasks */
virtual int get_split_task_count(DeviceTask& task) = 0;
virtual void task_add(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 3308af4f53f..efb4d9dd288 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -120,6 +120,37 @@ public:
return true;
}
+ bool wait_for_availability(const DeviceRequestedFeatures& requested_features)
+ {
+ foreach(SubDevice& sub, devices)
+ if(!sub.device->wait_for_availability(requested_features))
+ return false;
+
+ return true;
+ }
+
+ DeviceKernelStatus get_active_kernel_switch_state()
+ {
+ DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
+
+ foreach(SubDevice& sub, devices) {
+ DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
+ switch (subresult) {
+ case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
+ result = subresult;
+ break;
+
+ case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
+ case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
+ return subresult;
+
+ case DEVICE_KERNEL_USING_FEATURE_KERNEL:
+ break;
+ }
+ }
+ return result;
+ }
+
void mem_alloc(device_memory& mem)
{
device_ptr key = unique_key++;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 2a4e07419ac..bb507be4c72 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -261,16 +261,22 @@ class OpenCLDevice : public Device
{
public:
DedicatedTaskPool task_pool;
+
+ /* Task pool for required kernels (base, AO kernels during foreground rendering) */
+ TaskPool load_required_kernel_task_pool;
+ /* Task pool for optional kernels (feature kernels during foreground rendering) */
+ TaskPool load_kernel_task_pool;
cl_context cxContext;
cl_command_queue cqCommandQueue;
cl_platform_id cpPlatform;
cl_device_id cdDevice;
cl_int ciErr;
int device_num;
+ bool use_preview_kernels;
class OpenCLProgram {
public:
- OpenCLProgram() : loaded(false), program(NULL), device(NULL) {}
+ OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {}
OpenCLProgram(OpenCLDevice *device,
const string& program_name,
const string& kernel_name,
@@ -279,12 +285,24 @@ public:
~OpenCLProgram();
void add_kernel(ustring name);
- void load();
+
+ /* Try to load the program from device cache or disk */
+ bool load();
+ /* Compile the kernel (first separate, failback to local) */
+ void compile();
+ /* Create the OpenCL kernels after loading or compiling */
+ void create_kernels();
bool is_loaded() const { return loaded; }
const string& get_log() const { return log; }
void report_error();
+ /* Wait until this kernel is available to be used
+ * It will return true when the kernel is available.
+ * It will return false when the kernel is not available
+ * or could not be loaded. */
+ bool wait_for_availability();
+
cl_kernel operator()();
cl_kernel operator()(ustring name);
@@ -308,6 +326,8 @@ public:
void add_error(const string& msg);
bool loaded;
+ bool needs_compiling;
+
cl_program program;
OpenCLDevice *device;
@@ -323,19 +343,32 @@ public:
map<ustring, cl_kernel> kernels;
};
- DeviceSplitKernel *split_kernel;
-
- OpenCLProgram program_split;
+ /* Container for all types of split programs. */
+ class OpenCLSplitPrograms {
+ public:
+ OpenCLDevice *device;
+ OpenCLProgram program_split;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+
+ OpenCLSplitPrograms(OpenCLDevice *device);
+ ~OpenCLSplitPrograms();
+
+ /* Load the kernels and put the created kernels in the given `programs`
+ * paramter. */
+ void load_kernels(vector<OpenCLProgram*> &programs,
+ const DeviceRequestedFeatures& requested_features,
+ bool is_preview=false);
+ };
- OpenCLProgram program_lamp_emission;
- OpenCLProgram program_do_volume;
- OpenCLProgram program_indirect_background;
- OpenCLProgram program_shader_eval;
- OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
- OpenCLProgram program_subsurface_scatter;
- OpenCLProgram program_direct_lighting;
- OpenCLProgram program_shadow_blocked_ao;
- OpenCLProgram program_shadow_blocked_dl;
+ DeviceSplitKernel *split_kernel;
OpenCLProgram base_program;
OpenCLProgram bake_program;
@@ -343,6 +376,9 @@ public:
OpenCLProgram background_program;
OpenCLProgram denoising_program;
+ OpenCLSplitPrograms kernel_programs;
+ OpenCLSplitPrograms preview_programs;
+
typedef map<string, device_vector<uchar>*> ConstMemMap;
typedef map<string, device_ptr> MemMap;
@@ -358,22 +394,30 @@ public:
void opencl_error(const string& message);
void opencl_assert_err(cl_int err, const char* where);
- OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
+ OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
~OpenCLDevice();
static void CL_CALLBACK context_notify_callback(const char *err_info,
const void * /*private_info*/, size_t /*cb*/, void *user_data);
bool opencl_version_check();
+ OpenCLSplitPrograms* get_split_programs();
string device_md5_hash(string kernel_custom_build_options = "");
bool load_kernels(const DeviceRequestedFeatures& requested_features);
+ void load_required_kernels(const DeviceRequestedFeatures& requested_features);
+ void load_preview_kernels();
+
+ bool wait_for_availability(const DeviceRequestedFeatures& requested_features);
+ DeviceKernelStatus get_active_kernel_switch_state();
/* Get the name of the opencl program for the given kernel */
const string get_opencl_program_name(const string& kernel_name);
/* Get the program file name to compile (*.cl) for the given kernel */
const string get_opencl_program_filename(const string& kernel_name);
- string get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name);
+ string get_build_options(const DeviceRequestedFeatures& requested_features,
+ const string& opencl_program_name,
+ bool preview_kernel=false);
/* Enable the default features to reduce recompilation events */
void enable_default_features(DeviceRequestedFeatures& features);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 422813c2e07..555707cecd5 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -40,7 +40,13 @@ struct texture_slot_t {
int slot;
};
-static const string fast_compiled_kernels =
+static const string NON_SPLIT_KERNELS =
+ "denoising "
+ "base "
+ "background "
+ "displace ";
+
+static const string SPLIT_BUNDLE_KERNELS =
"data_init "
"path_init "
"state_buffer_size "
@@ -55,7 +61,10 @@ static const string fast_compiled_kernels =
const string OpenCLDevice::get_opencl_program_name(const string& kernel_name)
{
- if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ if (NON_SPLIT_KERNELS.find(kernel_name) != std::string::npos) {
+ return kernel_name;
+ }
+ else if (SPLIT_BUNDLE_KERNELS.find(kernel_name) != std::string::npos) {
return "split_bundle";
}
else {
@@ -65,7 +74,10 @@ const string OpenCLDevice::get_opencl_program_name(const string& kernel_name)
const string OpenCLDevice::get_opencl_program_filename(const string& kernel_name)
{
- if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ if (kernel_name == "denoising") {
+ return "filter.cl";
+ }
+ else if (SPLIT_BUNDLE_KERNELS.find(kernel_name) != std::string::npos) {
return "kernel_split_bundle.cl";
}
else {
@@ -92,7 +104,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures& features)
}
}
-string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name)
+string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name, bool preview_kernel)
{
/* first check for non-split kernel programs */
if (opencl_program_name == "base" || opencl_program_name == "denoising") {
@@ -169,7 +181,13 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_
enable_default_features(nofeatures);
/* Add program specific optimized compile directives */
- if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
+ if (preview_kernel) {
+ DeviceRequestedFeatures preview_features;
+ preview_features.use_hair = true;
+ build_options += "-D__KERNEL_OPENCL_PREVIEW__ ";
+ build_options += preview_features.get_build_options();
+ }
+ else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
build_options += nofeatures.get_build_options();
}
else {
@@ -196,6 +214,77 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_
return build_options;
}
+OpenCLDevice::OpenCLSplitPrograms::OpenCLSplitPrograms(OpenCLDevice *device_)
+{
+ device = device_;
+}
+
+OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
+{
+ program_split.release();
+ program_lamp_emission.release();
+ program_do_volume.release();
+ program_indirect_background.release();
+ program_shader_eval.release();
+ program_holdout_emission_blurring_pathtermination_ao.release();
+ program_subsurface_scatter.release();
+ program_direct_lighting.release();
+ program_shadow_blocked_ao.release();
+ program_shadow_blocked_dl.release();
+}
+
+void OpenCLDevice::OpenCLSplitPrograms::load_kernels(vector<OpenCLProgram*> &programs, const DeviceRequestedFeatures& requested_features, bool is_preview)
+{
+ if (!requested_features.use_baking) {
+#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
+ const string program_name_##kernel_name = "split_"#kernel_name; \
+ program_##kernel_name = \
+ OpenCLDevice::OpenCLProgram(device, \
+ program_name_##kernel_name, \
+ "kernel_"#kernel_name".cl", \
+ device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \
+ program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+ programs.push_back(&program_##kernel_name);
+
+ /* Ordered with most complex kernels first, to reduce overall compile time. */
+ ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+ if (requested_features.use_volume || is_preview) {
+ ADD_SPLIT_KERNEL_PROGRAM(do_volume);
+ }
+ ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+
+ /* Quick kernels bundled in a single program to reduce overhead of starting
+ * Blender processes. */
+ program_split = OpenCLDevice::OpenCLProgram(device,
+ "split_bundle" ,
+ "kernel_split_bundle.cl",
+ device->get_build_options(requested_features, "split_bundle", is_preview));
+
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+ programs.push_back(&program_split);
+
+#undef ADD_SPLIT_KERNEL_PROGRAM
+#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
+ }
+}
+
namespace {
/* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
@@ -307,7 +396,9 @@ public:
OpenCLDevice::OpenCLProgram(device,
program_name,
device->get_opencl_program_filename(kernel_name),
- device->get_build_options(requested_features, program_name));
+ device->get_build_options(requested_features,
+ program_name,
+ device->use_preview_kernels));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
kernel->program.load();
@@ -327,7 +418,8 @@ public:
size_buffer.zero_to_device();
uint threads = num_threads;
- cl_kernel kernel_state_buffer_size = device->program_split(ustring("path_trace_state_buffer_size"));
+ OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
+ cl_kernel kernel_state_buffer_size = programs->program_split(ustring("path_trace_state_buffer_size"));
device->kernel_set_args(kernel_state_buffer_size, 0, kg, data, threads, size_buffer);
size_t global_size = 64;
@@ -377,7 +469,8 @@ public:
cl_int start_sample = rtile.start_sample;
cl_int end_sample = rtile.start_sample + rtile.num_samples;
- cl_kernel kernel_data_init = device->program_split(ustring("path_trace_data_init"));
+ OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
+ cl_kernel kernel_data_init = programs->program_split(ustring("path_trace_data_init"));
cl_uint start_arg_index =
device->kernel_set_args(kernel_data_init,
@@ -510,6 +603,8 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char* where)
OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
: Device(info, stats, profiler, background),
+ kernel_programs(this),
+ preview_programs(this),
memory_manager(this),
texture_info(this, "__texture_info", MEM_TEXTURE)
{
@@ -520,6 +615,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
null_mem = 0;
device_initialized = false;
textures_need_update = true;
+ use_preview_kernels = !background;
vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices);
@@ -583,11 +679,16 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
device_initialized = true;
split_kernel = new OpenCLSplitKernel(this);
+ if (!background) {
+ load_preview_kernels();
+ }
}
OpenCLDevice::~OpenCLDevice()
{
task_pool.stop();
+ load_required_kernel_task_pool.stop();
+ load_kernel_task_pool.stop();
memory_manager.free();
@@ -603,7 +704,7 @@ OpenCLDevice::~OpenCLDevice()
bake_program.release();
displace_program.release();
background_program.release();
- program_split.release();
+ denoising_program.release();
if(cqCommandQueue)
clReleaseCommandQueue(cqCommandQueue);
@@ -669,8 +770,51 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
/* Verify we have right opencl version. */
if(!opencl_version_check())
return false;
+
+ load_required_kernels(requested_features);
+
+ vector<OpenCLProgram*> programs;
+ kernel_programs.load_kernels(programs, requested_features, false);
+
+ if (!requested_features.use_baking && requested_features.use_denoising) {
+ denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
+ denoising_program.add_kernel(ustring("filter_divide_shadow"));
+ denoising_program.add_kernel(ustring("filter_get_feature"));
+ denoising_program.add_kernel(ustring("filter_write_feature"));
+ denoising_program.add_kernel(ustring("filter_detect_outliers"));
+ denoising_program.add_kernel(ustring("filter_combine_halves"));
+ denoising_program.add_kernel(ustring("filter_construct_transform"));
+ denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
+ denoising_program.add_kernel(ustring("filter_nlm_blur"));
+ denoising_program.add_kernel(ustring("filter_nlm_calc_weight"));
+ denoising_program.add_kernel(ustring("filter_nlm_update_output"));
+ denoising_program.add_kernel(ustring("filter_nlm_normalize"));
+ denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
+ denoising_program.add_kernel(ustring("filter_finalize"));
+ programs.push_back(&denoising_program);
+ }
+
+ load_required_kernel_task_pool.wait_work();
+
+ /* Parallel compilation of Cycles kernels, this launches multiple
+ * processes to workaround OpenCL frameworks serializing the calls
+ * internally within a single process. */
+ foreach(OpenCLProgram *program, programs) {
+ if (!program->load()) {
+ load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+ }
+ }
+ return true;
+}
+void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures& requested_features)
+{
vector<OpenCLProgram*> programs;
+ base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
+ base_program.add_kernel(ustring("convert_to_byte"));
+ base_program.add_kernel(ustring("convert_to_half_float"));
+ base_program.add_kernel(ustring("zero_buffer"));
+ programs.push_back(&base_program);
if (requested_features.use_true_displacement) {
displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace"));
@@ -684,101 +828,89 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
programs.push_back(&background_program);
}
-#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
-#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
- const string program_name_##kernel_name = "split_"#kernel_name; \
- program_##kernel_name = \
- OpenCLDevice::OpenCLProgram(this, \
- program_name_##kernel_name, \
- "kernel_"#kernel_name".cl", \
- get_build_options(requested_features, program_name_##kernel_name)); \
- program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
- programs.push_back(&program_##kernel_name);
-
- /* Ordered with most complex kernels first, to reduce overall compile time. */
- ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
- if (requested_features.use_volume) {
- ADD_SPLIT_KERNEL_PROGRAM(do_volume);
- }
- ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
- ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
- ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
- ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
- ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
- ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
- ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
-
- /* Quick kernels bundled in a single program to reduce overhead of starting
- * Blender processes. */
- program_split = OpenCLDevice::OpenCLProgram(this,
- "split_bundle" ,
- "kernel_split_bundle.cl",
- get_build_options(requested_features, "split_bundle"));
-
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
- ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
- programs.push_back(&program_split);
-
-#undef ADD_SPLIT_KERNEL_PROGRAM
-#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
-
- base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
- base_program.add_kernel(ustring("convert_to_byte"));
- base_program.add_kernel(ustring("convert_to_half_float"));
- base_program.add_kernel(ustring("zero_buffer"));
- programs.push_back(&base_program);
-
if (requested_features.use_baking) {
bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake"));
bake_program.add_kernel(ustring("bake"));
programs.push_back(&bake_program);
}
- denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
- denoising_program.add_kernel(ustring("filter_divide_shadow"));
- denoising_program.add_kernel(ustring("filter_get_feature"));
- denoising_program.add_kernel(ustring("filter_write_feature"));
- denoising_program.add_kernel(ustring("filter_detect_outliers"));
- denoising_program.add_kernel(ustring("filter_combine_halves"));
- denoising_program.add_kernel(ustring("filter_construct_transform"));
- denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
- denoising_program.add_kernel(ustring("filter_nlm_blur"));
- denoising_program.add_kernel(ustring("filter_nlm_calc_weight"));
- denoising_program.add_kernel(ustring("filter_nlm_update_output"));
- denoising_program.add_kernel(ustring("filter_nlm_normalize"));
- denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
- denoising_program.add_kernel(ustring("filter_finalize"));
- programs.push_back(&denoising_program);
-
- /* Parallel compilation of Cycles kernels, this launches multiple
- * processes to workaround OpenCL frameworks serializing the calls
- * internally within a single process. */
- TaskPool task_pool;
foreach(OpenCLProgram *program, programs) {
- task_pool.push(function_bind(&OpenCLProgram::load, program));
+ if (!program->load()) {
+ load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+ }
}
- task_pool.wait_work();
+}
+
+void OpenCLDevice::load_preview_kernels()
+{
+ DeviceRequestedFeatures no_features;
+ vector<OpenCLProgram*> programs;
+ preview_programs.load_kernels(programs, no_features, true);
foreach(OpenCLProgram *program, programs) {
- VLOG(2) << program->get_log();
- if(!program->is_loaded()) {
- program->report_error();
- return false;
+ if (!program->load()) {
+ load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
}
}
+}
+bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures& requested_features)
+{
+ if (background) {
+ load_kernel_task_pool.wait_work();
+ use_preview_kernels = false;
+ }
+ else {
+ /* We use a device setting to determine to load preview kernels or not
+ * Better to check on device level than per kernel as mixing preview and
+ * non-preview kernels does not work due to different data types */
+ if (use_preview_kernels) {
+ use_preview_kernels = !load_kernel_task_pool.finished();
+ }
+ }
return split_kernel->load_kernels(requested_features);
}
+OpenCLDevice::OpenCLSplitPrograms* OpenCLDevice::get_split_programs()
+{
+ return use_preview_kernels?&preview_programs:&kernel_programs;
+}
+
+DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
+{
+ /* Do not switch kernels for background renderings
+ * We do foreground rendering but use the preview kernels
+ * Check for the optimized kernels
+ *
+ * This works also the other way around, where we are using
+ * optimized kernels but new ones are being compiled due
+ * to other features that are needed */
+ if (background) {
+ /* The if-statements below would find the same result,
+ * But as the `finished` method uses a mutex we added
+ * this as an early exit */
+ return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+ }
+
+ bool other_kernels_finished = load_kernel_task_pool.finished();
+ if (use_preview_kernels) {
+ if (other_kernels_finished) {
+ return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
+ }
+ else {
+ return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL;
+ }
+ }
+ else {
+ if (other_kernels_finished) {
+ return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+ }
+ else {
+ return DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+ }
+ }
+}
+
void OpenCLDevice::mem_alloc(device_memory& mem)
{
if(mem.name) {
@@ -880,6 +1012,7 @@ void OpenCLDevice::mem_copy_from(device_memory& mem, int y, int w, int h, int el
void OpenCLDevice::mem_zero_kernel(device_ptr mem, size_t size)
{
+ base_program.wait_for_availability();
cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
size_t global_size[] = {1024, 1024};
@@ -1707,17 +1840,15 @@ void OpenCLDevice::shader(DeviceTask& task)
cl_int d_shader_w = task.shader_w;
cl_int d_offset = task.offset;
- cl_kernel kernel;
-
+ OpenCLDevice::OpenCLProgram *program = &background_program;
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
- kernel = bake_program(ustring("bake"));
+ program = &bake_program;
}
else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
- kernel = displace_program(ustring("displace"));
- }
- else {
- kernel = background_program(ustring("background"));
+ program = &displace_program;
}
+ program->wait_for_availability();
+ cl_kernel kernel = (*program)();
cl_uint start_arg_index =
kernel_set_args(kernel,
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index ef0deaeff62..920c8dc4e6a 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -243,6 +243,18 @@ string OpenCLCache::get_kernel_md5()
return self.kernel_md5;
}
+static string get_program_source(const string& kernel_file)
+{
+ string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
+ /* We compile kernels consisting of many files. unfortunately OpenCL
+ * kernel caches do not seem to recognize changes in included files.
+ * so we force recompile on changes by adding the md5 hash of all files.
+ */
+ source = path_source_replace_includes(source, path_get("source"));
+ source += "\n// " + util_md5_string(source) + "\n";
+ return source;
+}
+
OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
const string& program_name,
const string& kernel_file,
@@ -255,6 +267,7 @@ OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
use_stdout(use_stdout)
{
loaded = false;
+ needs_compiling = true;
program = NULL;
}
@@ -343,13 +356,7 @@ bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
{
- string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
- /* We compile kernels consisting of many files. unfortunately OpenCL
- * kernel caches do not seem to recognize changes in included files.
- * so we force recompile on changes by adding the md5 hash of all files.
- */
- source = path_source_replace_includes(source, path_get("source"));
- source += "\n// " + util_md5_string(source) + "\n";
+ string source = get_program_source(kernel_file);
if(debug_src) {
path_write_text(*debug_src, source);
@@ -473,8 +480,7 @@ bool device_opencl_compile_kernel(const vector<string>& parameters)
return false;
}
- string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
- source = path_source_replace_includes(source, path_get("source"));
+ string source = get_program_source(kernel_file);
size_t source_len = source.size();
const char *source_str = source.c_str();
cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
@@ -548,11 +554,54 @@ bool OpenCLDevice::OpenCLProgram::save_binary(const string& clbin)
return path_write_binary(clbin, binary);
}
-void OpenCLDevice::OpenCLProgram::load()
+bool OpenCLDevice::OpenCLProgram::load()
{
- assert(device);
-
loaded = false;
+ string device_md5 = device->device_md5_hash(kernel_build_options);
+
+ /* Try to use cached kernel. */
+ thread_scoped_lock cache_locker;
+ ustring cache_key(program_name + device_md5);
+ program = device->load_cached_kernel(cache_key,
+ cache_locker);
+ if (!program) {
+ add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
+
+ /* need to create source to get md5 */
+ string source = get_program_source(kernel_file);
+
+ string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
+ basename = path_cache_get(path_join("kernels", basename));
+ string clbin = basename + ".clbin";
+
+ /* If binary kernel exists already, try use it. */
+ if(path_exists(clbin) && load_binary(clbin)) {
+ /* Kernel loaded from binary, nothing to do. */
+ add_log(string("Loaded program from ") + clbin + ".", true);
+
+ /* Cache the program. */
+ device->store_cached_kernel(program,
+ cache_key,
+ cache_locker);
+ }
+ else {
+ add_log(string("OpenCL program ") + program_name + " not found on disk.", true);
+ cache_locker.unlock();
+ }
+ }
+
+ if (program) {
+ create_kernels();
+ loaded = true;
+ needs_compiling = false;
+ }
+
+ return loaded;
+}
+
+void OpenCLDevice::OpenCLProgram::compile()
+{
+ assert(device);
string device_md5 = device->device_md5_hash(kernel_build_options);
@@ -562,12 +611,13 @@ void OpenCLDevice::OpenCLProgram::load()
program = device->load_cached_kernel(cache_key,
cache_locker);
- if(!program) {
+ if (!program)
+ {
+
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
/* need to create source to get md5 */
- string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
- source = path_source_replace_includes(source, path_get("source"));
+ string source = get_program_source(kernel_file);
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
basename = path_cache_get(path_join("kernels", basename));
@@ -582,49 +632,38 @@ void OpenCLDevice::OpenCLProgram::load()
}
/* If binary kernel exists already, try use it. */
- if(path_exists(clbin) && load_binary(clbin)) {
- /* Kernel loaded from binary, nothing to do. */
- add_log(string("Loaded program from ") + clbin + ".", true);
+ if(compile_separate(clbin)) {
+ add_log(string("Built and loaded program from ") + clbin + ".", true);
+ loaded = true;
}
else {
- add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
- if(!path_exists(clbin)) {
- if(compile_separate(clbin)) {
- add_log(string("Built and loaded program from ") + clbin + ".", true);
- loaded = true;
- }
- else {
- add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
-
- /* If does not exist or loading binary failed, compile kernel. */
- if(!compile_kernel(debug_src)) {
- return;
- }
-
- /* Save binary for reuse. */
- if(!save_binary(clbin)) {
- add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
- }
- }
+ add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+ /* If does not exist or loading binary failed, compile kernel. */
+ if(!compile_kernel(debug_src)) {
+ needs_compiling = false;
+ return;
}
- else {
- add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
- /* Fall back to compiling. */
- if(!compile_kernel(debug_src)) {
- return;
- }
+
+ /* Save binary for reuse. */
+ if(!save_binary(clbin)) {
+ add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
}
}
/* Cache the program. */
device->store_cached_kernel(program,
- cache_key,
- cache_locker);
- }
- else {
- add_log(string("Found cached OpenCL program ") + program_name + ".", true);
+ cache_key,
+ cache_locker);
}
+ create_kernels();
+ needs_compiling = false;
+ loaded = true;
+}
+
+void OpenCLDevice::OpenCLProgram::create_kernels()
+{
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
assert(kernel->second == NULL);
cl_int ciErr;
@@ -635,8 +674,15 @@ void OpenCLDevice::OpenCLProgram::load()
return;
}
}
+}
- loaded = true;
+bool OpenCLDevice::OpenCLProgram::wait_for_availability()
+{
+ add_log(string("Waiting for availability of ") + program_name + ".", true);
+ while (needs_compiling) {
+ time_sleep(0.1);
+ }
+ return loaded;
}
void OpenCLDevice::OpenCLProgram::report_error()
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index caa0057d997..281d9a25047 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -121,52 +121,62 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPENCL__
-/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
+# if defined(__KERNEL_OPENCL_AMD__) || defined(__KERNEL_OPENCL_INTEL_CPU__)
+# define __CL_USE_NATIVE__
+# endif
-# ifdef __KERNEL_OPENCL_NVIDIA__
-# define __KERNEL_SHADING__
-# define __KERNEL_ADV_SHADING__
-# define __SUBSURFACE__
-# define __PRINCIPLED__
-# define __VOLUME__
-# define __VOLUME_SCATTER__
-# define __SHADOW_RECORD_ALL__
-# define __CMJ__
-# define __BRANCHED_PATH__
-# endif /* __KERNEL_OPENCL_NVIDIA__ */
+/* Preview kernel is used as a small kernel when the optimized kernel is still being compiled. */
+# ifdef __KERNEL_OPENCL_PREVIEW__
+# define __AO__
+# define __PASSES__
+# define __HAIR__
+# else
+
+/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
-# ifdef __KERNEL_OPENCL_APPLE__
-# define __KERNEL_SHADING__
-# define __KERNEL_ADV_SHADING__
-# define __PRINCIPLED__
-# define __CMJ__
+# ifdef __KERNEL_OPENCL_NVIDIA__
+# define __KERNEL_SHADING__
+# define __KERNEL_ADV_SHADING__
+# define __SUBSURFACE__
+# define __PRINCIPLED__
+# define __VOLUME__
+# define __VOLUME_SCATTER__
+# define __SHADOW_RECORD_ALL__
+# define __CMJ__
+# define __BRANCHED_PATH__
+# endif /* __KERNEL_OPENCL_NVIDIA__ */
+
+# ifdef __KERNEL_OPENCL_APPLE__
+# define __KERNEL_SHADING__
+# define __KERNEL_ADV_SHADING__
+# define __PRINCIPLED__
+# define __CMJ__
/* TODO(sergey): Currently experimental section is ignored here,
* this is because megakernel in device_opencl does not support
* custom cflags depending on the scene features.
*/
-# endif /* __KERNEL_OPENCL_APPLE__ */
-
-# ifdef __KERNEL_OPENCL_AMD__
-# define __CL_USE_NATIVE__
-# define __KERNEL_SHADING__
-# define __KERNEL_ADV_SHADING__
-# define __SUBSURFACE__
-# define __PRINCIPLED__
-# define __VOLUME__
-# define __VOLUME_SCATTER__
-# define __SHADOW_RECORD_ALL__
-# define __CMJ__
-# define __BRANCHED_PATH__
-# endif /* __KERNEL_OPENCL_AMD__ */
-
-# ifdef __KERNEL_OPENCL_INTEL_CPU__
-# define __CL_USE_NATIVE__
-# define __KERNEL_SHADING__
-# define __KERNEL_ADV_SHADING__
-# define __PRINCIPLED__
-# define __CMJ__
-# endif /* __KERNEL_OPENCL_INTEL_CPU__ */
-
+# endif /* __KERNEL_OPENCL_APPLE__ */
+
+# ifdef __KERNEL_OPENCL_AMD__
+# define __KERNEL_SHADING__
+# define __KERNEL_ADV_SHADING__
+# define __SUBSURFACE__
+# define __PRINCIPLED__
+# define __VOLUME__
+# define __VOLUME_SCATTER__
+# define __SHADOW_RECORD_ALL__
+# define __CMJ__
+# define __BRANCHED_PATH__
+# endif /* __KERNEL_OPENCL_AMD__ */
+
+# ifdef __KERNEL_OPENCL_INTEL_CPU__
+# define __KERNEL_SHADING__
+# define __KERNEL_ADV_SHADING__
+# define __PRINCIPLED__
+# define __CMJ__
+# endif /* __KERNEL_OPENCL_INTEL_CPU__ */
+
+# endif /* KERNEL_OPENCL_PREVIEW__ */
#endif /* __KERNEL_OPENCL__ */
/* Kernel features */
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 866832333eb..e9274fbf49e 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -212,6 +212,11 @@ void Session::run_gpu()
/* advance to next tile */
bool no_tiles = !tile_manager.next();
+ DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+ if (no_tiles) {
+ kernel_state = device->get_active_kernel_switch_state();
+ }
+
if(params.background) {
/* if no work left and in background mode, we can stop immediately */
if(no_tiles) {
@@ -219,6 +224,16 @@ void Session::run_gpu()
break;
}
}
+
+ /* Don't go in pause mode when image was rendered with preview kernels
+ * When feature kernels become available the session will be resetted. */
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+ time_sleep(0.1);
+ }
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+ reset_gpu(tile_manager.params, params.samples);
+ }
+
else {
/* if in interactive mode, and we are either paused or done for now,
* wait for pause condition notify to wake up again */
@@ -540,6 +555,11 @@ void Session::run_cpu()
bool no_tiles = !tile_manager.next();
bool need_tonemap = false;
+ DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+ if (no_tiles) {
+ kernel_state = device->get_active_kernel_switch_state();
+ }
+
if(params.background) {
/* if no work left and in background mode, we can stop immediately */
if(no_tiles) {
@@ -547,6 +567,16 @@ void Session::run_cpu()
break;
}
}
+
+ /* Don't go in pause mode when preview kernels are used
+ * When feature kernels become available the session will be resetted. */
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+ time_sleep(0.1);
+ }
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+ reset_cpu(tile_manager.params, params.samples);
+ }
+
else {
/* if in interactive mode, and we are either paused or done for now,
* wait for pause condition notify to wake up again */
@@ -699,7 +729,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
return requested_features;
}
-void Session::load_kernels(bool lock_scene)
+bool Session::load_kernels(bool lock_scene)
{
thread_scoped_lock scene_lock;
if(lock_scene) {
@@ -722,7 +752,7 @@ void Session::load_kernels(bool lock_scene)
progress.set_error(message);
progress.set_status("Error", message);
progress.set_update();
- return;
+ return false;
}
progress.add_skip_time(timer, false);
@@ -730,14 +760,13 @@ void Session::load_kernels(bool lock_scene)
kernels_loaded = true;
loaded_kernel_features = requested_features;
+ return true;
}
+ return false;
}
void Session::run()
{
- /* load kernels */
- load_kernels();
-
if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
profiler.start();
}
@@ -879,7 +908,7 @@ bool Session::update_scene()
/* update scene */
if(scene->need_update()) {
- load_kernels(false);
+ bool new_kernels_needed = load_kernels(false);
/* Update max_closures. */
KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
@@ -894,6 +923,21 @@ bool Session::update_scene()
progress.set_status("Updating Scene");
MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
+ DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
+ bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
+ kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+ if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+ progress.set_kernel_status("Compiling render kernels");
+ }
+ if (new_kernels_needed || kernel_switch_needed) {
+ progress.set_kernel_status("Compiling render kernels");
+ device->wait_for_availability(loaded_kernel_features);
+ progress.set_kernel_status("");
+ }
+
+ if (kernel_switch_needed) {
+ reset(tile_manager.params, params.samples);
+ }
return true;
}
return false;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index cbdfc75a905..404b7b7a945 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -162,7 +162,7 @@ public:
void set_pause(bool pause);
bool update_scene();
- void load_kernels(bool lock_scene=true);
+ bool load_kernels(bool lock_scene=true);
void device_free();
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 4ed9ebd60ff..06900d14cdc 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -46,6 +46,7 @@ public:
substatus = "";
sync_status = "";
sync_substatus = "";
+ kernel_status = "";
update_cb = function_null;
cancel = false;
cancel_message = "";
@@ -86,6 +87,7 @@ public:
substatus = "";
sync_status = "";
sync_substatus = "";
+ kernel_status = "";
cancel = false;
cancel_message = "";
error = false;
@@ -313,6 +315,25 @@ public:
}
}
+
+ /* kernel status */
+
+ void set_kernel_status(const string &kernel_status_)
+ {
+ {
+ thread_scoped_lock lock(progress_mutex);
+ kernel_status = kernel_status_;
+ }
+
+ set_update();
+ }
+
+ void get_kernel_status(string &kernel_status_)
+ {
+ thread_scoped_lock lock(progress_mutex);
+ kernel_status_ = kernel_status;
+ }
+
/* callback */
void set_update()
@@ -356,6 +377,8 @@ protected:
string sync_status;
string sync_substatus;
+ string kernel_status;
+
volatile bool cancel;
string cancel_message;
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 2a705c2432b..ce166af206a 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -148,6 +148,12 @@ bool TaskPool::canceled()
return do_cancel;
}
+bool TaskPool::finished()
+{
+ thread_scoped_lock num_lock(num_mutex);
+ return num == 0;
+}
+
void TaskPool::num_decrease(int done)
{
num_mutex.lock();
diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h
index 15f0d341be7..a7e19d1ab75 100644
--- a/intern/cycles/util/util_task.h
+++ b/intern/cycles/util/util_task.h
@@ -93,6 +93,7 @@ public:
void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
void cancel(); /* cancel all tasks, keep worker threads running */
void stop(); /* stop all worker threads */
+ bool finished(); /* check if all work has been completed */
bool canceled(); /* for worker threads, test if canceled */