Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2013-05-27 20:21:07 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2013-05-27 20:21:07 +0400
commit4bdb54a76e3b15f99f2efc149b9d78aeef3203a4 (patch)
tree24defa9eeb8e4d7d04508fbca6229ffe23e13d9b /intern
parent5460e31d56fda5957cf13f77c910d378d4bd85fb (diff)
Cycles OpenCL: patch #35514 by Doug Gale
* Support using devices from all OpenCL platforms, so that you can use e.g. both Intel and NVidia OpenCL implementations if you have them installed. * Fix compile error due to missing fmodf after recent math node change. * Enable advanced shading for Intel OpenCL. * CYCLES_OPENCL_DEBUG environment variable for generating debug symbols so you can debug with gdb. This crashes the compiler with Intel OpenCL on Linux though. To make this work the preprocessed kernel source code is written out, as gdb needs this. * Show OpenCL compiler warnings even if the build succeeded. * Some small fixes to initialize cdDevice to NULL, add missing NULL check when creating buffer and add missing space at end of build options for Apple OpenCL. * Fix crash with multi device + opencl, now e.g. CPU + GPU render should work. I did a few tweaks to the code and also: * Fix viewport render failing sometimes with Apple CPU OpenCL, was not taking workgroup size limits into account properly. * Add compile error when advanced shading in the Blender binary and OpenCL kernel are not in sync.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_opencl.cpp349
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h1
-rw-r--r--intern/cycles/kernel/kernel_types.h16
-rw-r--r--intern/cycles/util/util_path.cpp10
-rw-r--r--intern/cycles/util/util_path.h1
5 files changed, 246 insertions, 131 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 4608c06c3d5..8e14c281155 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -38,7 +38,7 @@
CCL_NAMESPACE_BEGIN
-#define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
+#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
static cl_device_type opencl_device_type()
{
@@ -57,7 +57,57 @@ static cl_device_type opencl_device_type()
return CL_DEVICE_TYPE_ACCELERATOR;
}
- return CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
+ return CL_DEVICE_TYPE_ALL;
+}
+
+static bool opencl_kernel_use_debug()
+{
+ return (getenv("CYCLES_OPENCL_DEBUG") != NULL);
+}
+
+static bool opencl_kernel_use_advanced_shading(const string& platform)
+{
+ /* keep this in sync with kernel_types.h! */
+ if(platform == "NVIDIA CUDA")
+ return false;
+ else if(platform == "Apple")
+ return false;
+ else if(platform == "AMD Accelerated Parallel Processing")
+ return false;
+ else if(platform == "Intel(R) OpenCL")
+ return true;
+
+ return false;
+}
+
+static string opencl_kernel_build_options(const string& platform, const string *debug_src = NULL)
+{
+ string build_options = " -cl-fast-relaxed-math ";
+
+ if(platform == "NVIDIA CUDA")
+ build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+
+ else if(platform == "Apple")
+ build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes ";
+
+ else if(platform == "AMD Accelerated Parallel Processing")
+ build_options += "-D__KERNEL_OPENCL_AMD__ ";
+
+ else if(platform == "Intel(R) OpenCL") {
+ build_options += "-D__KERNEL_OPENCL_INTEL_CPU__";
+
+ /* options for gdb source level kernel debugging. this segfaults on linux currently */
+ if(opencl_kernel_use_debug() && debug_src)
+ build_options += "-g -s \"" + *debug_src + "\"";
+ }
+
+ if(opencl_kernel_use_debug())
+ build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
+
+ if (opencl_kernel_use_advanced_shading(platform))
+ build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ ";
+
+ return build_options;
}
class OpenCLDevice : public Device
@@ -72,9 +122,14 @@ public:
cl_kernel ckPathTraceKernel;
cl_kernel ckFilmConvertKernel;
cl_int ciErr;
- map<string, device_vector<uchar>*> const_mem_map;
- map<string, device_memory*> mem_map;
+
+ typedef map<string, device_vector<uchar>*> ConstMemMap;
+ typedef map<string, device_ptr> MemMap;
+
+ ConstMemMap const_mem_map;
+ MemMap mem_map;
device_ptr null_mem;
+
bool device_initialized;
string platform_name;
@@ -169,6 +224,7 @@ public:
{
background = background_;
cpPlatform = NULL;
+ cdDevice = NULL;
cxContext = NULL;
cqCommandQueue = NULL;
cpProgram = NULL;
@@ -189,38 +245,64 @@ public:
return;
}
- ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
+ vector<cl_platform_id> platforms(num_platforms, NULL);
+
+ ciErr = clGetPlatformIDs(num_platforms, &platforms[0], NULL);
if(opencl_error(ciErr))
return;
- char name[256];
- clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
- platform_name = name;
+ int num_base = 0;
+ int total_devices = 0;
- /* get devices */
- vector<cl_device_id> device_ids;
- cl_uint num_devices;
+ for (int platform = 0; platform < num_platforms; platform++) {
+ cl_uint num_devices;
- if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices)))
- return;
+ if(opencl_error(clGetDeviceIDs(platforms[platform], opencl_device_type(), 0, NULL, &num_devices)))
+ return;
- if(info.num > num_devices) {
- if(num_devices == 0)
- opencl_error("OpenCL: no devices found.");
- else
- opencl_error("OpenCL: specified device not found.");
- return;
+ total_devices += num_devices;
+
+ if(info.num - num_base >= num_devices) {
+ /* num doesn't refer to a device in this platform */
+ num_base += num_devices;
+ continue;
+ }
+
+ /* device is in this platform */
+ cpPlatform = platforms[platform];
+
+ /* get devices */
+ vector<cl_device_id> device_ids(num_devices, NULL);
+
+ if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+ return;
+
+ cdDevice = device_ids[info.num - num_base];
+
+ char name[256];
+ clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+ platform_name = name;
+
+ break;
}
- device_ids.resize(num_devices);
-
- if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+ if(total_devices == 0) {
+ opencl_error("OpenCL: no devices found.");
return;
+ }
+ else if (!cdDevice) {
+ opencl_error("OpenCL: specified device not found.");
+ return;
+ }
- cdDevice = device_ids[info.num];
+ /* Create context properties array to specify platform */
+ const cl_context_properties context_props[] = {
+ CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
+ 0, 0
+ };
/* create context */
- cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
+ cxContext = clCreateContext(context_props, 1, &cdDevice, NULL, NULL, &ciErr);
if(opencl_error(ciErr))
return;
@@ -229,6 +311,9 @@ public:
return;
null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
+ if(opencl_error(ciErr))
+ return;
+
device_initialized = true;
}
@@ -265,7 +350,7 @@ public:
return true;
}
- bool load_binary(const string& kernel_path, const string& clbin)
+ bool load_binary(const string& kernel_path, const string& clbin, const string *debug_src = NULL)
{
/* read binary into memory */
vector<uint8_t> binary;
@@ -288,7 +373,7 @@ public:
return false;
}
- if(!build_kernel(kernel_path))
+ if(!build_kernel(kernel_path, debug_src))
return false;
return true;
@@ -315,51 +400,35 @@ public:
return true;
}
- string kernel_build_options()
- {
- string build_options = " -cl-fast-relaxed-math ";
-
- if(platform_name == "NVIDIA CUDA")
- build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
-
- else if(platform_name == "Apple")
- build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes";
-
- else if(platform_name == "AMD Accelerated Parallel Processing")
- build_options += "-D__KERNEL_OPENCL_AMD__ ";
-
- return build_options;
- }
-
- bool build_kernel(const string& kernel_path)
+ bool build_kernel(const string& kernel_path, const string *debug_src = NULL)
{
- string build_options = kernel_build_options();
+ string build_options = opencl_kernel_build_options(platform_name, debug_src);
ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
- if(ciErr != CL_SUCCESS) {
- /* show build errors */
- char *build_log;
- size_t ret_val_size;
+ /* show warnings even if build is successful */
+ size_t ret_val_size = 0;
- clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+ clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
- build_log = new char[ret_val_size+1];
- clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
+ if(ret_val_size > 1) {
+ vector<char> build_log(ret_val_size+1);
+ clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
build_log[ret_val_size] = '\0';
- opencl_error("OpenCL build failed: errors in console");
- fprintf(stderr, "%s\n", build_log);
-
- delete[] build_log;
+ fprintf(stderr, "OpenCL kernel build output:\n");
+ fprintf(stderr, "%s\n", &build_log[0]);
+ }
+ if(ciErr != CL_SUCCESS) {
+ opencl_error("OpenCL build failed: errors in console");
return false;
}
return true;
}
- bool compile_kernel(const string& kernel_path, const string& kernel_md5)
+ bool compile_kernel(const string& kernel_path, const string& kernel_md5, const string *debug_src = NULL)
{
/* we compile kernels consisting of many files. unfortunately opencl
* kernel caches do not seem to recognize changes in included files.
@@ -367,6 +436,9 @@ public:
string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
source = path_source_replace_includes(source, kernel_path);
+ if (debug_src)
+ path_write_text(*debug_src, source);
+
size_t source_len = source.size();
const char *source_str = source.c_str();
@@ -378,7 +450,7 @@ public:
double starttime = time_dt();
printf("Compiling OpenCL kernel ...\n");
- if(!build_kernel(kernel_path))
+ if(!build_kernel(kernel_path, debug_src))
return false;
printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
@@ -401,7 +473,7 @@ public:
md5.append((uint8_t*)name, strlen(name));
md5.append((uint8_t*)driver, strlen(driver));
- string options = kernel_build_options();
+ string options = opencl_kernel_build_options(platform_name);
md5.append((uint8_t*)options.c_str(), options.size());
return md5.get_hex();
@@ -424,18 +496,26 @@ public:
string kernel_md5 = path_files_md5_hash(kernel_path);
string device_md5 = device_md5_hash();
- /* try to use cache binary */
+ /* path to cached binary */
string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
clbin = path_user_get(path_join("cache", clbin));
- if(path_exists(clbin)) {
- /* if exists already, try use it */
- if(!load_binary(kernel_path, clbin))
- return false;
+ /* path to preprocessed source for debugging */
+ string clsrc, *debug_src = NULL;
+
+ if (opencl_kernel_use_debug()) {
+ clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str());
+ clsrc = path_user_get(path_join("cache", clsrc));
+ debug_src = &clsrc;
+ }
+
+ /* if exists already, try use it */
+ if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) {
+ /* kernel loaded from binary */
}
else {
- /* compile kernel */
- if(!compile_kernel(kernel_path, kernel_md5))
+ /* if does not exist or loading binary failed, compile kernel */
+ if(!compile_kernel(kernel_path, kernel_md5, debug_src))
return false;
/* save binary for reuse */
@@ -461,7 +541,7 @@ public:
if(null_mem)
clReleaseMemObject(CL_MEM_PTR(null_mem));
- map<string, device_vector<uchar>*>::iterator mt;
+ ConstMemMap::iterator mt;
for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
mem_free(*(mt->second));
delete mt->second;
@@ -533,26 +613,29 @@ public:
void const_copy_to(const char *name, void *host, size_t size)
{
- if(const_mem_map.find(name) == const_mem_map.end()) {
+ ConstMemMap::iterator i = const_mem_map.find(name);
+
+ if(i == const_mem_map.end()) {
device_vector<uchar> *data = new device_vector<uchar>();
data->copy((uchar*)host, size);
mem_alloc(*data, MEM_READ_ONLY);
- const_mem_map[name] = data;
+ i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
}
else {
- device_vector<uchar> *data = const_mem_map[name];
+ device_vector<uchar> *data = i->second;
data->copy((uchar*)host, size);
}
- mem_copy_to(*const_mem_map[name]);
+ mem_copy_to(*i->second);
}
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
- mem_map[name] = &mem;
+ assert(mem_map.find(name) == mem_map.end());
+ mem_map.insert(MemMap::value_type(name, mem.device_pointer));
}
void tex_free(device_memory& mem)
@@ -567,6 +650,33 @@ public:
return global_size + ((r == 0)? 0: group_size - r);
}
+ void enqueue_kernel(cl_kernel kernel, size_t w, size_t h)
+ {
+ size_t workgroup_size, max_work_items[3];
+
+ clGetKernelWorkGroupInfo(kernel, cdDevice,
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
+ clGetDeviceInfo(cdDevice,
+ CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
+
+ /* try to divide evenly over 2 dimensions */
+ size_t sqrt_workgroup_size = max(sqrt((double)workgroup_size), 1.0);
+ size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
+
+ /* some implementations have max size 1 on 2nd dimension */
+ if (local_size[1] > max_work_items[1]) {
+ local_size[0] = workgroup_size/max_work_items[1];
+ local_size[1] = max_work_items[1];
+ }
+
+ size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)};
+
+ /* run kernel */
+ ciErr = clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
+ opencl_assert(ciErr);
+ opencl_assert(clFinish(cqCommandQueue));
+ }
+
void path_trace(RenderTile& rtile, int sample)
{
/* cast arguments to cl types */
@@ -582,7 +692,7 @@ public:
cl_int d_stride = rtile.stride;
/* sample arguments */
- int narg = 0;
+ cl_uint narg = 0;
ciErr = 0;
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
@@ -603,31 +713,17 @@ public:
opencl_assert(ciErr);
- size_t workgroup_size;
-
- clGetKernelWorkGroupInfo(ckPathTraceKernel, cdDevice,
- CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
-
- workgroup_size = max(sqrt((double)workgroup_size), 1.0);
-
- size_t local_size[2] = {workgroup_size, workgroup_size};
- size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};
-
- /* run kernel */
- ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckPathTraceKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
- opencl_assert(ciErr);
- opencl_assert(clFinish(cqCommandQueue));
+ enqueue_kernel(ckPathTraceKernel, d_w, d_h);
}
- cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
+ cl_int set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
{
cl_mem ptr;
cl_int err = 0;
- if(mem_map.find(name) != mem_map.end()) {
- device_memory *mem = mem_map[name];
-
- ptr = CL_MEM_PTR(mem->device_pointer);
+ MemMap::iterator i = mem_map.find(name);
+ if(i != mem_map.end()) {
+ ptr = CL_MEM_PTR(i->second);
}
else {
/* work around NULL not working, even though the spec says otherwise */
@@ -655,7 +751,7 @@ public:
cl_int d_stride = task.stride;
/* sample arguments */
- int narg = 0;
+ cl_uint narg = 0;
ciErr = 0;
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
@@ -676,20 +772,7 @@ public:
opencl_assert(ciErr);
- size_t workgroup_size;
-
- clGetKernelWorkGroupInfo(ckFilmConvertKernel, cdDevice,
- CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
-
- workgroup_size = max(sqrt((double)workgroup_size), 1.0);
-
- size_t local_size[2] = {workgroup_size, workgroup_size};
- size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};
-
- /* run kernel */
- ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckFilmConvertKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
- opencl_assert(ciErr);
- opencl_assert(clFinish(cqCommandQueue));
+ enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
}
void thread_run(DeviceTask *task)
@@ -769,34 +852,44 @@ void device_opencl_info(vector<DeviceInfo>& devices)
if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS)
return;
- if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
- return;
-
- device_ids.resize(num_devices);
+ /* devices are numbered consecutively across platforms */
+ int num_base = 0;
- if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS)
- return;
-
- /* add devices */
- for(int num = 0; num < num_devices; num++) {
- cl_device_id device_id = device_ids[num];
- char name[1024] = "\0";
+ for (int platform = 0; platform < num_platforms; platform++, num_base += num_devices) {
+ num_devices = 0;
+ if(clGetDeviceIDs(platform_ids[platform], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
+ continue;
- if(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL) != CL_SUCCESS)
+ device_ids.resize(num_devices);
+
+ if(clGetDeviceIDs(platform_ids[platform], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS)
continue;
- DeviceInfo info;
+ char pname[256];
+ clGetPlatformInfo(platform_ids[platform], CL_PLATFORM_NAME, sizeof(pname), &pname, NULL);
+ string platform_name = pname;
+
+ /* add devices */
+ for(int num = 0; num < num_devices; num++) {
+ cl_device_id device_id = device_ids[num];
+ char name[1024] = "\0";
- info.type = DEVICE_OPENCL;
- info.description = string(name);
- info.id = string_printf("OPENCL_%d", num);
- info.num = num;
- /* we don't know if it's used for display, but assume it is */
- info.display_device = true;
- info.advanced_shading = false;
- info.pack_images = true;
+ if(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL) != CL_SUCCESS)
+ continue;
- devices.push_back(info);
+ DeviceInfo info;
+
+ info.type = DEVICE_OPENCL;
+ info.description = string(name);
+ info.num = num_base + num;
+ info.id = string_printf("OPENCL_%d", info.num);
+ /* we don't know if it's used for display, but assume it is */
+ info.display_device = true;
+ info.advanced_shading = opencl_kernel_use_advanced_shading(platform_name);
+ info.pack_images = true;
+
+ devices.push_back(info);
+ }
}
}
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 6c41bfa5521..66cf0bb996b 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -103,6 +103,7 @@
#define atan2f(x, y) atan2(((float)x), ((float)y))
#define fmaxf(x, y) fmax(((float)x), ((float)y))
#define fminf(x, y) fmin(((float)x), ((float)y))
+#define fmodf(x, y) fmod((float)x, (float)y)
/* data lookup defines */
#define kernel_data (*kg->data)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 535b9489985..1dcd3a52b6a 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -66,9 +66,11 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPENCL__
+/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
+
#ifdef __KERNEL_OPENCL_NVIDIA__
#define __KERNEL_SHADING__
-#define __MULTI_CLOSURE__
+//#define __KERNEL_ADV_SHADING__
#endif
#ifdef __KERNEL_OPENCL_APPLE__
@@ -85,6 +87,11 @@ CCL_NAMESPACE_BEGIN
#define __EXTRA_NODES__
#endif
+#ifdef __KERNEL_OPENCL_INTEL_CPU__
+#define __KERNEL_SHADING__
+#define __KERNEL_ADV_SHADING__
+#endif
+
#endif
/* kernel features */
@@ -122,7 +129,12 @@ CCL_NAMESPACE_BEGIN
#define __OBJECT_MOTION__
#define __HAIR__
#endif
-//#define __SOBOL_FULL_SCREEN__
+
+/* Sanity check */
+
+#if defined(__KERNEL_OPENCL_NEED_ADVANCED_SHADING__) && !defined(__MULTI_CLOSURE__)
+#error "OpenCL: mismatch between advanced shading flags in device_opencl.cpp and kernel_types.h"
+#endif
/* Shader Evaluation */
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 8cf23bc6a76..79062fe251f 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -145,6 +145,14 @@ bool path_write_binary(const string& path, const vector<uint8_t>& binary)
return true;
}
+bool path_write_text(const string& path, string& text)
+{
+ vector<uint8_t> binary(text.length(), 0);
+ std::copy(text.begin(), text.end(), binary.begin());
+
+ return path_write_binary(path, binary);
+}
+
bool path_read_binary(const string& path, vector<uint8_t>& binary)
{
binary.resize(boost::filesystem::file_size(path));
@@ -176,7 +184,7 @@ bool path_read_text(const string& path, string& text)
if(!path_exists(path) || !path_read_binary(path, binary))
return false;
-
+
const char *str = (const char*)&binary[0];
size_t size = binary.size();
text = string(str, size);
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
index 89e4452ecd9..d5257e79c05 100644
--- a/intern/cycles/util/util_path.h
+++ b/intern/cycles/util/util_path.h
@@ -44,6 +44,7 @@ string path_files_md5_hash(const string& dir);
void path_create_directories(const string& path);
bool path_write_binary(const string& path, const vector<uint8_t>& binary);
+bool path_write_text(const string& path, string& text);
bool path_read_binary(const string& path, vector<uint8_t>& binary);
bool path_read_text(const string& path, string& text);