Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2019-02-15 10:18:38 +0300
committerJeroen Bakker <j.bakker@atmind.nl>2019-02-15 10:49:25 +0300
commit4ce9785e01587638ae26256fe23315e436c658ff (patch)
tree338144aec436dc68a6fdd9240786988dd3f8705e /intern/cycles/device
parentd17a0270344101c4cd6e5026ea8239240868d24f (diff)
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working. So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function. Depends on D2231. Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97 Reviewed By: brecht Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli Differential Revision: https://developer.blender.org/D2264
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_intern.h1
-rw-r--r--intern/cycles/device/opencl/opencl.h20
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp20
-rw-r--r--intern/cycles/device/opencl/opencl_mega.cpp24
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp171
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp140
6 files changed, 331 insertions, 45 deletions
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 0b26057c3ba..94df1e009eb 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -24,6 +24,7 @@ class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string>& parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 9b763167459..a2c0e53b3e7 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -268,6 +268,7 @@ public:
cl_platform_id cpPlatform;
cl_device_id cdDevice;
cl_int ciErr;
+ int device_num;
class OpenCLProgram {
public:
@@ -293,7 +294,15 @@ public:
private:
bool build_kernel(const string *debug_src);
+ /* Build the program by calling the own process.
+ * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+ * build calls internally if they come from the same process.
+ * If that is not supported, this function just returns false.
+ */
+ bool compile_separate(const string& clbin);
+ /* Build the program by calling OpenCL directly. */
bool compile_kernel(const string *debug_src);
+ /* Loading and saving the program from/to disk. */
bool load_binary(const string& clbin, const string *debug_src = NULL);
bool save_binary(const string& clbin);
@@ -342,12 +351,17 @@ public:
bool opencl_version_check();
string device_md5_hash(string kernel_custom_build_options = "");
- bool load_kernels(const DeviceRequestedFeatures& requested_features);
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features);
/* Has to be implemented by the real device classes.
* The base device will then load all these programs. */
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
- vector<OpenCLProgram*> &programs) = 0;
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
+ vector<OpenCLProgram*> &programs) = 0;
+
+ /* Get the name of the opencl program for the given kernel */
+ virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0;
+ /* Get the program file name to compile (*.cl) for the given kernel */
+ virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0;
void mem_alloc(device_memory& mem);
void mem_copy_to(device_memory& mem);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 4417065bb7f..d8f9a242ac8 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
}
assert(info.num < usable_devices.size());
OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+ device_num = info.num;
cpPlatform = platform_device.platform_id;
cdDevice = platform_device.device_id;
platform_name = platform_device.platform_name;
@@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
texture_info.resize(1);
memory_manager.alloc("texture_info", texture_info);
- fprintf(stderr, "Device init success\n");
device_initialized = true;
}
@@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
programs.push_back(&base_program);
programs.push_back(&denoising_program);
/* Call actual class to fill the vector with its programs. */
- if(!load_kernels(requested_features, programs)) {
+ if(!add_kernel_programs(requested_features, programs)) {
return false;
}
- /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
- * serialize the calls internally, so it's not much use right now.
- * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
- * should be set to false as well. */
-#if 0
+ /* Parallel compilation of Cycles kernels, this launches multiple
+ * processes to workaround OpenCL frameworks serializing the calls
+ * internally within a single process. */
TaskPool task_pool;
foreach(OpenCLProgram *program, programs) {
task_pool.push(function_bind(&OpenCLProgram::load, program));
@@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
return false;
}
}
-#else
- foreach(OpenCLProgram *program, programs) {
- program->load();
- if(!program->is_loaded()) {
- return false;
- }
- }
-#endif
return true;
}
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 0a7bf96fed7..c0b9e81d4d3 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -35,19 +35,35 @@ public:
OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
: OpenCLDeviceBase(info, stats, profiler, background_),
- path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+ path_trace_program(this,
+ get_opencl_program_name(false, "megakernel"),
+ get_opencl_program_filename(false, "megakernel"),
+ "-D__COMPILE_ONLY_MEGAKERNEL__ ")
{
}
- virtual bool show_samples() const {
+
+ virtual bool show_samples() const
+ {
return true;
}
- virtual BVHLayoutMask get_bvh_layout_mask() const {
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+ const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
+ {
+ return kernel_name;
+ }
+
+ const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
+ {
+ return "kernel.cl";
+ }
+
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
path_trace_program.add_kernel(ustring("path_trace"));
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 5a2555f9f80..b759f69d3ab 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -79,6 +79,27 @@ public:
OpenCLProgram program_data_init;
OpenCLProgram program_state_buffer_size;
+ OpenCLProgram program_split;
+
+ OpenCLProgram program_path_init;
+ OpenCLProgram program_scene_intersect;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_queue_enqueue;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_setup;
+ OpenCLProgram program_shader_sort;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+ OpenCLProgram program_enqueue_inactive;
+ OpenCLProgram program_next_iteration_setup;
+ OpenCLProgram program_indirect_subsurface;
+ OpenCLProgram program_buffer_update;
+
OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features)
+ {
+ if (!OpenCLDeviceBase::load_kernels(requested_features)) {
+ return false;
+ }
+ return split_kernel->load_kernels(requested_features);
+ }
+
+ const string fast_compiled_kernels =
+ "path_init "
+ "scene_intersect "
+ "queue_enqueue "
+ "shader_setup "
+ "shader_sort "
+ "enqueue_inactive "
+ "next_iteration_setup "
+ "indirect_subsurface "
+ "buffer_update";
+
+ const string get_opencl_program_name(bool single_program, const string& kernel_name)
+ {
+ if (single_program) {
+ return "split";
+ }
+ else {
+ if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ return "split_bundle";
+ }
+ else {
+ return "split_" + kernel_name;
+ }
+ }
+ }
+
+ const string get_opencl_program_filename(bool single_program, const string& kernel_name)
+ {
+ if (single_program) {
+ return "kernel_split.cl";
+ }
+ else {
+ if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+ return "kernel_split_bundle.cl";
+ }
+ else {
+ return "kernel_" + kernel_name + ".cl";
+ }
+ }
+ }
+
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
{
bool single_program = OpenCLInfo::use_single_program();
- program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
- single_program ? "split" : "split_data_init",
- single_program ? "kernel_split.cl" : "kernel_data_init.cl",
- get_build_options(this, requested_features));
-
+ program_data_init = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ get_opencl_program_name(single_program, "data_init"),
+ get_opencl_program_filename(single_program, "data_init"),
+ get_build_options(this, requested_features));
program_data_init.add_kernel(ustring("path_trace_data_init"));
programs.push_back(&program_data_init);
- program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
- single_program ? "split" : "split_state_buffer_size",
- single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl",
- get_build_options(this, requested_features));
+ program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ get_opencl_program_name(single_program, "state_buffer_size"),
+ get_opencl_program_filename(single_program, "state_buffer_size"),
+ get_build_options(this, requested_features));
+
program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
programs.push_back(&program_state_buffer_size);
- return split_kernel->load_kernels(requested_features);
+
+#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \
+ program_##kernel_name = \
+ OpenCLDeviceBase::OpenCLProgram(this, \
+ "split_"#kernel_name, \
+ "kernel_"#kernel_name".cl", \
+ get_build_options(this, requested_features)); \
+ program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+ programs.push_back(&program_##kernel_name);
+
+ if (single_program) {
+ program_split = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ "split" ,
+ "kernel_split.cl",
+ get_build_options(this, requested_features));
+
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+
+ programs.push_back(&program_split);
+ }
+ else {
+ /* Ordered with most complex kernels first, to reduce overall compile time. */
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background);
+ ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval);
+
+ /* Quick kernels bundled in a single program to reduce overhead of starting
+ * Blender processes. */
+ program_split = OpenCLDeviceBase::OpenCLProgram(
+ this,
+ "split_bundle" ,
+ "kernel_split_bundle.cl",
+ get_build_options(this, requested_features));
+
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+ ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+ programs.push_back(&program_split);
+ }
+#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM
+#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM
+
+ return true;
}
void thread_run(DeviceTask *task)
@@ -281,8 +426,8 @@ public:
bool single_program = OpenCLInfo::use_single_program();
kernel->program =
OpenCLDeviceBase::OpenCLProgram(device,
- single_program ? "split" : "split_" + kernel_name,
- single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl",
+ device->get_opencl_program_name(single_program, kernel_name),
+ device->get_opencl_program_filename(single_program, kernel_name),
get_build_options(device, requested_features));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index f43aa5f350a..fe5ba4886a9 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -17,12 +17,14 @@
#ifdef WITH_OPENCL
#include "device/opencl/opencl.h"
+#include "device/device_intern.h"
#include "util/util_debug.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_path.h"
#include "util/util_time.h"
+#include "util/util_system.h"
using std::cerr;
using std::endl;
@@ -369,17 +371,119 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
}
double starttime = time_dt();
- add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
+ add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
add_log(string("Build flags: ") + kernel_build_options, true);
if(!build_kernel(debug_src))
return false;
- add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
+ double elapsed = time_dt() - starttime;
+ add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
return true;
}
+bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin)
+{
+ vector<string> args;
+ args.push_back("--background");
+ args.push_back("--factory-startup");
+ args.push_back("--python-expr");
+
+ args.push_back(
+ string_printf(
+ "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')",
+ (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false",
+ device->device_num,
+ device->device_name.c_str(),
+ device->platform_name.c_str(),
+ (device->kernel_build_options(NULL) + kernel_build_options).c_str(),
+ kernel_file.c_str(),
+ clbin.c_str()));
+
+ double starttime = time_dt();
+ add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
+ add_log(string("Build flags: ") + kernel_build_options, true);
+ if(!system_call_self(args) || !path_exists(clbin)) {
+ return false;
+ }
+
+ double elapsed = time_dt() - starttime;
+ add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
+
+ return load_binary(clbin);
+}
+
+/* Compile opencl kernel. This method is called from the _cycles Python
+ * module compile kernels. Parameters must match function above. */
+bool device_opencl_compile_kernel(const vector<string>& parameters)
+{
+ bool force_all_platforms = parameters[0] == "true";
+ int device_platform_id = std::stoi(parameters[1]);
+ const string& device_name = parameters[2];
+ const string& platform_name = parameters[3];
+ const string& build_options = parameters[4];
+ const string& kernel_file = parameters[5];
+ const string& binary_path = parameters[6];
+
+ if(clewInit() != CLEW_SUCCESS) {
+ return false;
+ }
+
+ vector<OpenCLPlatformDevice> usable_devices;
+ OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms);
+ if(device_platform_id >= usable_devices.size()) {
+ return false;
+ }
+
+ OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id];
+ if(platform_device.platform_name != platform_name ||
+ platform_device.device_name != device_name)
+ {
+ return false;
+ }
+
+ cl_platform_id platform = platform_device.platform_id;
+ cl_device_id device = platform_device.device_id;
+ const cl_context_properties context_props[] = {
+ CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
+ 0, 0
+ };
+
+ cl_int err;
+ cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
+ if(err != CL_SUCCESS) {
+ return false;
+ }
+
+ string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
+ source = path_source_replace_includes(source, path_get("source"));
+ size_t source_len = source.size();
+ const char *source_str = source.c_str();
+ cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
+ bool result = false;
+
+ if(err == CL_SUCCESS) {
+ err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
+
+ if(err == CL_SUCCESS) {
+ size_t size = 0;
+ clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+ if(size > 0) {
+ vector<uint8_t> binary(size);
+ uint8_t *bytes = &binary[0];
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+ result = path_write_binary(binary_path, binary);
+ }
+ }
+ clReleaseProgram(program);
+ }
+
+ clReleaseContext(context);
+
+ return result;
+}
+
bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
const string *debug_src)
{
@@ -467,15 +571,31 @@ void OpenCLDeviceBase::OpenCLProgram::load()
}
else {
add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
-
- /* If does not exist or loading binary failed, compile kernel. */
- if(!compile_kernel(debug_src)) {
- return;
+ if(!path_exists(clbin)) {
+ if(compile_separate(clbin)) {
+ add_log(string("Built and loaded program from ") + clbin + ".", true);
+ loaded = true;
+ }
+ else {
+ add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+ /* If does not exist or loading binary failed, compile kernel. */
+ if(!compile_kernel(debug_src)) {
+ return;
+ }
+
+ /* Save binary for reuse. */
+ if(!save_binary(clbin)) {
+ add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+ }
+ }
}
-
- /* Save binary for reuse. */
- if(!save_binary(clbin)) {
- add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+ else {
+ add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
+ /* Fall back to compiling. */
+ if(!compile_kernel(debug_src)) {
+ return;
+ }
}
}