Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2019-04-17 07:17:24 +0300
committerCampbell Barton <ideasman42@gmail.com>2019-04-17 07:21:24 +0300
commite12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch)
tree8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/device/opencl/opencl.h
parentb3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff)
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211. For details on usage and instructions for migrating branches without conflicts, see: https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'intern/cycles/device/opencl/opencl.h')
-rw-r--r--intern/cycles/device/opencl/opencl.h1222
1 files changed, 609 insertions, 613 deletions
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 89761293638..e7bafa0b8a8 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -16,645 +16,641 @@
#ifdef WITH_OPENCL
-#include "device/device.h"
-#include "device/device_denoising.h"
-#include "device/device_split_kernel.h"
+# include "device/device.h"
+# include "device/device_denoising.h"
+# include "device/device_split_kernel.h"
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_string.h"
+# include "util/util_map.h"
+# include "util/util_param.h"
+# include "util/util_string.h"
-#include "clew.h"
+# include "clew.h"
-#include "device/opencl/memory_manager.h"
+# include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
/* Disable workarounds, seems to be working fine on latest drivers. */
-#define CYCLES_DISABLE_DRIVER_WORKAROUNDS
+# define CYCLES_DISABLE_DRIVER_WORKAROUNDS
/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
-#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
+# ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
-# undef clEnqueueNDRangeKernel
-# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
- CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
- clFinish(a);
+# undef clEnqueueNDRangeKernel
+# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
+ CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
+ clFinish(a);
-# undef clEnqueueWriteBuffer
-# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
- CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
- clFinish(a);
+# undef clEnqueueWriteBuffer
+# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
+ CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
+ clFinish(a);
-# undef clEnqueueReadBuffer
-# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
- CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
- clFinish(a);
-#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
+# undef clEnqueueReadBuffer
+# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
+ CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
+ clFinish(a);
+# endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
-#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
+# define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
struct OpenCLPlatformDevice {
- OpenCLPlatformDevice(cl_platform_id platform_id,
- const string& platform_name,
- cl_device_id device_id,
- cl_device_type device_type,
- const string& device_name,
- const string& hardware_id,
- const string& device_extensions)
- : platform_id(platform_id),
- platform_name(platform_name),
- device_id(device_id),
- device_type(device_type),
- device_name(device_name),
- hardware_id(hardware_id),
- device_extensions(device_extensions) {}
- cl_platform_id platform_id;
- string platform_name;
- cl_device_id device_id;
- cl_device_type device_type;
- string device_name;
- string hardware_id;
- string device_extensions;
+ OpenCLPlatformDevice(cl_platform_id platform_id,
+ const string &platform_name,
+ cl_device_id device_id,
+ cl_device_type device_type,
+ const string &device_name,
+ const string &hardware_id,
+ const string &device_extensions)
+ : platform_id(platform_id),
+ platform_name(platform_name),
+ device_id(device_id),
+ device_type(device_type),
+ device_name(device_name),
+ hardware_id(hardware_id),
+ device_extensions(device_extensions)
+ {
+ }
+ cl_platform_id platform_id;
+ string platform_name;
+ cl_device_id device_id;
+ cl_device_type device_type;
+ string device_name;
+ string hardware_id;
+ string device_extensions;
};
/* Contains all static OpenCL helper functions. */
-class OpenCLInfo
-{
-public:
- static cl_device_type device_type();
- static bool use_debug();
- static bool device_supported(const string& platform_name,
- const cl_device_id device_id);
- static bool platform_version_check(cl_platform_id platform,
- string *error = NULL);
- static bool device_version_check(cl_device_id device,
- string *error = NULL);
- static string get_hardware_id(const string& platform_name,
- cl_device_id device_id);
- static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
- bool force_all = false);
-
- /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
-
- /* Platform information. */
- static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
- static cl_uint get_num_platforms();
-
- static bool get_platforms(vector<cl_platform_id> *platform_ids,
- cl_int *error = NULL);
- static vector<cl_platform_id> get_platforms();
-
- static bool get_platform_name(cl_platform_id platform_id,
- string *platform_name);
- static string get_platform_name(cl_platform_id platform_id);
-
- static bool get_num_platform_devices(cl_platform_id platform_id,
- cl_device_type device_type,
- cl_uint *num_devices,
- cl_int *error = NULL);
- static cl_uint get_num_platform_devices(cl_platform_id platform_id,
- cl_device_type device_type);
-
- static bool get_platform_devices(cl_platform_id platform_id,
- cl_device_type device_type,
- vector<cl_device_id> *device_ids,
- cl_int* error = NULL);
- static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
- cl_device_type device_type);
-
- /* Device information. */
- static bool get_device_name(cl_device_id device_id,
- string *device_name,
- cl_int* error = NULL);
-
- static string get_device_name(cl_device_id device_id);
-
- static bool get_device_extensions(cl_device_id device_id,
- string *device_extensions,
- cl_int* error = NULL);
-
- static string get_device_extensions(cl_device_id device_id);
-
- static bool get_device_type(cl_device_id device_id,
- cl_device_type *device_type,
- cl_int* error = NULL);
- static cl_device_type get_device_type(cl_device_id device_id);
-
- static bool get_driver_version(cl_device_id device_id,
- int *major,
- int *minor,
- cl_int* error = NULL);
-
- static int mem_sub_ptr_alignment(cl_device_id device_id);
-
- /* Get somewhat more readable device name.
- * Main difference is AMD OpenCL here which only gives code name
- * for the regular device name. This will give more sane device
- * name using some extensions.
- */
- static string get_readable_device_name(cl_device_id device_id);
+class OpenCLInfo {
+ public:
+ static cl_device_type device_type();
+ static bool use_debug();
+ static bool device_supported(const string &platform_name, const cl_device_id device_id);
+ static bool platform_version_check(cl_platform_id platform, string *error = NULL);
+ static bool device_version_check(cl_device_id device, string *error = NULL);
+ static string get_hardware_id(const string &platform_name, cl_device_id device_id);
+ static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
+ bool force_all = false);
+
+ /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
+
+ /* Platform information. */
+ static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
+ static cl_uint get_num_platforms();
+
+ static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL);
+ static vector<cl_platform_id> get_platforms();
+
+ static bool get_platform_name(cl_platform_id platform_id, string *platform_name);
+ static string get_platform_name(cl_platform_id platform_id);
+
+ static bool get_num_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ cl_uint *num_devices,
+ cl_int *error = NULL);
+ static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type);
+
+ static bool get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ vector<cl_device_id> *device_ids,
+ cl_int *error = NULL);
+ static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type);
+
+ /* Device information. */
+ static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL);
+
+ static string get_device_name(cl_device_id device_id);
+
+ static bool get_device_extensions(cl_device_id device_id,
+ string *device_extensions,
+ cl_int *error = NULL);
+
+ static string get_device_extensions(cl_device_id device_id);
+
+ static bool get_device_type(cl_device_id device_id,
+ cl_device_type *device_type,
+ cl_int *error = NULL);
+ static cl_device_type get_device_type(cl_device_id device_id);
+
+ static bool get_driver_version(cl_device_id device_id,
+ int *major,
+ int *minor,
+ cl_int *error = NULL);
+
+ static int mem_sub_ptr_alignment(cl_device_id device_id);
+
+ /* Get somewhat more readable device name.
+ * Main difference is AMD OpenCL here which only gives code name
+ * for the regular device name. This will give more sane device
+ * name using some extensions.
+ */
+ static string get_readable_device_name(cl_device_id device_id);
};
/* Thread safe cache for contexts and programs.
*/
-class OpenCLCache
-{
- struct Slot
- {
- struct ProgramEntry
- {
- ProgramEntry();
- ProgramEntry(const ProgramEntry& rhs);
- ~ProgramEntry();
- cl_program program;
- thread_mutex *mutex;
- };
-
- Slot();
- Slot(const Slot& rhs);
- ~Slot();
-
- thread_mutex *context_mutex;
- cl_context context;
- typedef map<ustring, ProgramEntry> EntryMap;
- EntryMap programs;
-
- };
-
- /* key is combination of platform ID and device ID */
- typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
-
- /* map of Slot objects */
- typedef map<PlatformDevicePair, Slot> CacheMap;
- CacheMap cache;
-
- /* MD5 hash of the kernel source. */
- string kernel_md5;
-
- thread_mutex cache_lock;
- thread_mutex kernel_md5_lock;
-
- /* lazy instantiate */
- static OpenCLCache& global_instance();
-
-public:
-
- enum ProgramName {
- OCL_DEV_BASE_PROGRAM,
- OCL_DEV_MEGAKERNEL_PROGRAM,
- };
-
- /* Lookup context in the cache. If this returns NULL, slot_locker
- * will be holding a lock for the cache. slot_locker should refer to a
- * default constructed thread_scoped_lock. */
- static cl_context get_context(cl_platform_id platform,
- cl_device_id device,
- thread_scoped_lock& slot_locker);
- /* Same as above. */
- static cl_program get_program(cl_platform_id platform,
- cl_device_id device,
- ustring key,
- thread_scoped_lock& slot_locker);
-
- /* Store context in the cache. You MUST have tried to get the item before storing to it. */
- static void store_context(cl_platform_id platform,
- cl_device_id device,
- cl_context context,
- thread_scoped_lock& slot_locker);
- /* Same as above. */
- static void store_program(cl_platform_id platform,
- cl_device_id device,
- cl_program program,
- ustring key,
- thread_scoped_lock& slot_locker);
-
- static string get_kernel_md5();
+class OpenCLCache {
+ struct Slot {
+ struct ProgramEntry {
+ ProgramEntry();
+ ProgramEntry(const ProgramEntry &rhs);
+ ~ProgramEntry();
+ cl_program program;
+ thread_mutex *mutex;
+ };
+
+ Slot();
+ Slot(const Slot &rhs);
+ ~Slot();
+
+ thread_mutex *context_mutex;
+ cl_context context;
+ typedef map<ustring, ProgramEntry> EntryMap;
+ EntryMap programs;
+ };
+
+ /* key is combination of platform ID and device ID */
+ typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
+
+ /* map of Slot objects */
+ typedef map<PlatformDevicePair, Slot> CacheMap;
+ CacheMap cache;
+
+ /* MD5 hash of the kernel source. */
+ string kernel_md5;
+
+ thread_mutex cache_lock;
+ thread_mutex kernel_md5_lock;
+
+ /* lazy instantiate */
+ static OpenCLCache &global_instance();
+
+ public:
+ enum ProgramName {
+ OCL_DEV_BASE_PROGRAM,
+ OCL_DEV_MEGAKERNEL_PROGRAM,
+ };
+
+ /* Lookup context in the cache. If this returns NULL, slot_locker
+ * will be holding a lock for the cache. slot_locker should refer to a
+ * default constructed thread_scoped_lock. */
+ static cl_context get_context(cl_platform_id platform,
+ cl_device_id device,
+ thread_scoped_lock &slot_locker);
+ /* Same as above. */
+ static cl_program get_program(cl_platform_id platform,
+ cl_device_id device,
+ ustring key,
+ thread_scoped_lock &slot_locker);
+
+ /* Store context in the cache. You MUST have tried to get the item before storing to it. */
+ static void store_context(cl_platform_id platform,
+ cl_device_id device,
+ cl_context context,
+ thread_scoped_lock &slot_locker);
+ /* Same as above. */
+ static void store_program(cl_platform_id platform,
+ cl_device_id device,
+ cl_program program,
+ ustring key,
+ thread_scoped_lock &slot_locker);
+
+ static string get_kernel_md5();
};
-#define opencl_device_assert(device, stmt) \
- { \
- cl_int err = stmt; \
- \
- if(err != CL_SUCCESS) { \
- string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
- if((device)->error_message() == "") \
- (device)->set_error(message); \
- fprintf(stderr, "%s\n", message.c_str()); \
- } \
- } (void) 0
-
-#define opencl_assert(stmt) \
- { \
- cl_int err = stmt; \
- \
- if(err != CL_SUCCESS) { \
- string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
- if(error_msg == "") \
- error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- } \
- } (void) 0
-
-class OpenCLDevice : public Device
-{
-public:
- DedicatedTaskPool task_pool;
-
- /* Task pool for required kernels (base, AO kernels during foreground rendering) */
- TaskPool load_required_kernel_task_pool;
- /* Task pool for optional kernels (feature kernels during foreground rendering) */
- TaskPool load_kernel_task_pool;
- cl_context cxContext;
- cl_command_queue cqCommandQueue;
- cl_platform_id cpPlatform;
- cl_device_id cdDevice;
- cl_int ciErr;
- int device_num;
- bool use_preview_kernels;
-
- class OpenCLProgram {
- public:
- OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {}
- OpenCLProgram(OpenCLDevice *device,
- const string& program_name,
- const string& kernel_name,
- const string& kernel_build_options,
- bool use_stdout = true);
- ~OpenCLProgram();
-
- void add_kernel(ustring name);
-
- /* Try to load the program from device cache or disk */
- bool load();
- /* Compile the kernel (first separate, failback to local) */
- void compile();
- /* Create the OpenCL kernels after loading or compiling */
- void create_kernels();
-
- bool is_loaded() const { return loaded; }
- const string& get_log() const { return log; }
- void report_error();
-
- /* Wait until this kernel is available to be used
- * It will return true when the kernel is available.
- * It will return false when the kernel is not available
- * or could not be loaded. */
- bool wait_for_availability();
-
- cl_kernel operator()();
- cl_kernel operator()(ustring name);
-
- void release();
-
- private:
- bool build_kernel(const string *debug_src);
- /* Build the program by calling the own process.
- * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
- * build calls internally if they come from the same process.
- * If that is not supported, this function just returns false.
- */
- bool compile_separate(const string& clbin);
- /* Build the program by calling OpenCL directly. */
- bool compile_kernel(const string *debug_src);
- /* Loading and saving the program from/to disk. */
- bool load_binary(const string& clbin, const string *debug_src = NULL);
- bool save_binary(const string& clbin);
-
- void add_log(const string& msg, bool is_debug);
- void add_error(const string& msg);
-
- bool loaded;
- bool needs_compiling;
-
- cl_program program;
- OpenCLDevice *device;
-
- /* Used for the OpenCLCache key. */
- string program_name;
-
- string kernel_file, kernel_build_options, device_md5;
-
- bool use_stdout;
- string log, error_msg;
- string compile_output;
-
- map<ustring, cl_kernel> kernels;
- };
-
- /* Container for all types of split programs. */
- class OpenCLSplitPrograms {
- public:
- OpenCLDevice *device;
- OpenCLProgram program_split;
- OpenCLProgram program_lamp_emission;
- OpenCLProgram program_do_volume;
- OpenCLProgram program_indirect_background;
- OpenCLProgram program_shader_eval;
- OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
- OpenCLProgram program_subsurface_scatter;
- OpenCLProgram program_direct_lighting;
- OpenCLProgram program_shadow_blocked_ao;
- OpenCLProgram program_shadow_blocked_dl;
-
- OpenCLSplitPrograms(OpenCLDevice *device);
- ~OpenCLSplitPrograms();
-
- /* Load the kernels and put the created kernels in the given `programs`
- * paramter. */
- void load_kernels(vector<OpenCLProgram*> &programs,
- const DeviceRequestedFeatures& requested_features,
- bool is_preview=false);
- };
-
- DeviceSplitKernel *split_kernel;
-
- OpenCLProgram base_program;
- OpenCLProgram bake_program;
- OpenCLProgram displace_program;
- OpenCLProgram background_program;
- OpenCLProgram denoising_program;
-
- OpenCLSplitPrograms kernel_programs;
- OpenCLSplitPrograms preview_programs;
-
- typedef map<string, device_vector<uchar>*> ConstMemMap;
- typedef map<string, device_ptr> MemMap;
-
- ConstMemMap const_mem_map;
- MemMap mem_map;
- device_ptr null_mem;
-
- bool device_initialized;
- string platform_name;
- string device_name;
-
- bool opencl_error(cl_int err);
- void opencl_error(const string& message);
- void opencl_assert_err(cl_int err, const char* where);
-
- OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
- ~OpenCLDevice();
-
- static void CL_CALLBACK context_notify_callback(const char *err_info,
- const void * /*private_info*/, size_t /*cb*/, void *user_data);
-
- bool opencl_version_check();
- OpenCLSplitPrograms* get_split_programs();
-
- string device_md5_hash(string kernel_custom_build_options = "");
- bool load_kernels(const DeviceRequestedFeatures& requested_features);
- void load_required_kernels(const DeviceRequestedFeatures& requested_features);
- void load_preview_kernels();
-
- bool wait_for_availability(const DeviceRequestedFeatures& requested_features);
- DeviceKernelStatus get_active_kernel_switch_state();
-
- /* Get the name of the opencl program for the given kernel */
- const string get_opencl_program_name(const string& kernel_name);
- /* Get the program file name to compile (*.cl) for the given kernel */
- const string get_opencl_program_filename(const string& kernel_name);
- string get_build_options(const DeviceRequestedFeatures& requested_features,
- const string& opencl_program_name,
- bool preview_kernel=false);
- /* Enable the default features to reduce recompilation events */
- void enable_default_features(DeviceRequestedFeatures& features);
-
- void mem_alloc(device_memory& mem);
- void mem_copy_to(device_memory& mem);
- void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
- void mem_zero(device_memory& mem);
- void mem_free(device_memory& mem);
-
- int mem_sub_ptr_alignment();
-
- void const_copy_to(const char *name, void *host, size_t size);
- void tex_alloc(device_memory& mem);
- void tex_free(device_memory& mem);
-
- size_t global_size_round_up(int group_size, int global_size);
- void enqueue_kernel(cl_kernel kernel, size_t w, size_t h,
- bool x_workgroups = false,
- size_t max_workgroup_size = -1);
- void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
- void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
-
- void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
- void shader(DeviceTask& task);
-
- void denoise(RenderTile& tile, DenoisingTask& denoising);
-
- class OpenCLDeviceTask : public DeviceTask {
- public:
- OpenCLDeviceTask(OpenCLDevice *device, DeviceTask& task)
- : DeviceTask(task)
- {
- run = function_bind(&OpenCLDevice::thread_run,
- device,
- this);
- }
- };
-
- int get_split_task_count(DeviceTask& /*task*/)
- {
- return 1;
- }
-
- void task_add(DeviceTask& task)
- {
- task_pool.push(new OpenCLDeviceTask(this, task));
- }
-
- void task_wait()
- {
- task_pool.wait();
- }
-
- void task_cancel()
- {
- task_pool.cancel();
- }
-
- void thread_run(DeviceTask *task);
-
- virtual BVHLayoutMask get_bvh_layout_mask() const {
- return BVH_LAYOUT_BVH2;
- }
-
- virtual bool show_samples() const {
- return true;
- }
-
-
-protected:
- string kernel_build_options(const string *debug_src = NULL);
-
- void mem_zero_kernel(device_ptr ptr, size_t size);
-
- bool denoising_non_local_means(device_ptr image_ptr,
- device_ptr guide_ptr,
- device_ptr variance_ptr,
- device_ptr out_ptr,
- DenoisingTask *task);
- bool denoising_construct_transform(DenoisingTask *task);
- bool denoising_accumulate(device_ptr color_ptr,
- device_ptr color_variance_ptr,
- device_ptr scale_ptr,
- int frame,
- DenoisingTask *task);
- bool denoising_solve(device_ptr output_ptr,
- DenoisingTask *task);
- bool denoising_combine_halves(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- int r, int4 rect,
- DenoisingTask *task);
- bool denoising_divide_shadow(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr sample_variance_ptr,
- device_ptr sv_variance_ptr,
- device_ptr buffer_variance_ptr,
- DenoisingTask *task);
- bool denoising_get_feature(int mean_offset,
- int variance_offset,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- float scale,
- DenoisingTask *task);
- bool denoising_write_feature(int to_offset,
- device_ptr from_ptr,
- device_ptr buffer_ptr,
- DenoisingTask *task);
- bool denoising_detect_outliers(device_ptr image_ptr,
- device_ptr variance_ptr,
- device_ptr depth_ptr,
- device_ptr output_ptr,
- DenoisingTask *task);
-
- device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
- void mem_free_sub_ptr(device_ptr ptr);
-
- class ArgumentWrapper {
- public:
- ArgumentWrapper() : size(0), pointer(NULL)
- {
- }
-
- ArgumentWrapper(device_memory& argument) : size(sizeof(void*)),
- pointer((void*)(&argument.device_pointer))
- {
- }
-
- template<typename T>
- ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)),
- pointer((void*)(&argument.device_pointer))
- {
- }
-
- template<typename T>
- ArgumentWrapper(device_only_memory<T>& argument) : size(sizeof(void*)),
- pointer((void*)(&argument.device_pointer))
- {
- }
- template<typename T>
- ArgumentWrapper(T& argument) : size(sizeof(argument)),
- pointer(&argument)
- {
- }
-
- ArgumentWrapper(int argument) : size(sizeof(int)),
- int_value(argument),
- pointer(&int_value)
- {
- }
-
- ArgumentWrapper(float argument) : size(sizeof(float)),
- float_value(argument),
- pointer(&float_value)
- {
- }
-
- size_t size;
- int int_value;
- float float_value;
- void *pointer;
- };
-
- /* TODO(sergey): In the future we can use variadic templates, once
- * C++0x is allowed. Should allow to clean this up a bit.
- */
- int kernel_set_args(cl_kernel kernel,
- int start_argument_index,
- const ArgumentWrapper& arg1 = ArgumentWrapper(),
- const ArgumentWrapper& arg2 = ArgumentWrapper(),
- const ArgumentWrapper& arg3 = ArgumentWrapper(),
- const ArgumentWrapper& arg4 = ArgumentWrapper(),
- const ArgumentWrapper& arg5 = ArgumentWrapper(),
- const ArgumentWrapper& arg6 = ArgumentWrapper(),
- const ArgumentWrapper& arg7 = ArgumentWrapper(),
- const ArgumentWrapper& arg8 = ArgumentWrapper(),
- const ArgumentWrapper& arg9 = ArgumentWrapper(),
- const ArgumentWrapper& arg10 = ArgumentWrapper(),
- const ArgumentWrapper& arg11 = ArgumentWrapper(),
- const ArgumentWrapper& arg12 = ArgumentWrapper(),
- const ArgumentWrapper& arg13 = ArgumentWrapper(),
- const ArgumentWrapper& arg14 = ArgumentWrapper(),
- const ArgumentWrapper& arg15 = ArgumentWrapper(),
- const ArgumentWrapper& arg16 = ArgumentWrapper(),
- const ArgumentWrapper& arg17 = ArgumentWrapper(),
- const ArgumentWrapper& arg18 = ArgumentWrapper(),
- const ArgumentWrapper& arg19 = ArgumentWrapper(),
- const ArgumentWrapper& arg20 = ArgumentWrapper(),
- const ArgumentWrapper& arg21 = ArgumentWrapper(),
- const ArgumentWrapper& arg22 = ArgumentWrapper(),
- const ArgumentWrapper& arg23 = ArgumentWrapper(),
- const ArgumentWrapper& arg24 = ArgumentWrapper(),
- const ArgumentWrapper& arg25 = ArgumentWrapper(),
- const ArgumentWrapper& arg26 = ArgumentWrapper(),
- const ArgumentWrapper& arg27 = ArgumentWrapper(),
- const ArgumentWrapper& arg28 = ArgumentWrapper(),
- const ArgumentWrapper& arg29 = ArgumentWrapper(),
- const ArgumentWrapper& arg30 = ArgumentWrapper(),
- const ArgumentWrapper& arg31 = ArgumentWrapper(),
- const ArgumentWrapper& arg32 = ArgumentWrapper(),
- const ArgumentWrapper& arg33 = ArgumentWrapper());
-
- void release_kernel_safe(cl_kernel kernel);
- void release_mem_object_safe(cl_mem mem);
- void release_program_safe(cl_program program);
-
- /* ** Those guys are for workign around some compiler-specific bugs ** */
-
- cl_program load_cached_kernel(
- ustring key,
- thread_scoped_lock& cache_locker);
-
- void store_cached_kernel(
- cl_program program,
- ustring key,
- thread_scoped_lock& cache_locker);
-
-private:
- MemoryManager memory_manager;
- friend class MemoryManager;
-
- static_assert_align(TextureInfo, 16);
- device_vector<TextureInfo> texture_info;
-
- typedef map<string, device_memory*> TexturesMap;
- TexturesMap textures;
-
- bool textures_need_update;
-
-protected:
- void flush_texture_buffers();
-
- friend class OpenCLSplitKernel;
- friend class OpenCLSplitKernelFunction;
+# define opencl_device_assert(device, stmt) \
+ { \
+ cl_int err = stmt; \
+\
+ if (err != CL_SUCCESS) { \
+ string message = string_printf( \
+ "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+ if ((device)->error_message() == "") \
+ (device)->set_error(message); \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } \
+ (void)0
+
+# define opencl_assert(stmt) \
+ { \
+ cl_int err = stmt; \
+\
+ if (err != CL_SUCCESS) { \
+ string message = string_printf( \
+ "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+ if (error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } \
+ (void)0
+
+class OpenCLDevice : public Device {
+ public:
+ DedicatedTaskPool task_pool;
+
+ /* Task pool for required kernels (base, AO kernels during foreground rendering) */
+ TaskPool load_required_kernel_task_pool;
+ /* Task pool for optional kernels (feature kernels during foreground rendering) */
+ TaskPool load_kernel_task_pool;
+ cl_context cxContext;
+ cl_command_queue cqCommandQueue;
+ cl_platform_id cpPlatform;
+ cl_device_id cdDevice;
+ cl_int ciErr;
+ int device_num;
+ bool use_preview_kernels;
+
+ class OpenCLProgram {
+ public:
+ OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL)
+ {
+ }
+ OpenCLProgram(OpenCLDevice *device,
+ const string &program_name,
+ const string &kernel_name,
+ const string &kernel_build_options,
+ bool use_stdout = true);
+ ~OpenCLProgram();
+
+ void add_kernel(ustring name);
+
+ /* Try to load the program from device cache or disk */
+ bool load();
+ /* Compile the kernel (first separate, failback to local) */
+ void compile();
+ /* Create the OpenCL kernels after loading or compiling */
+ void create_kernels();
+
+ bool is_loaded() const
+ {
+ return loaded;
+ }
+ const string &get_log() const
+ {
+ return log;
+ }
+ void report_error();
+
+ /* Wait until this kernel is available to be used
+ * It will return true when the kernel is available.
+ * It will return false when the kernel is not available
+ * or could not be loaded. */
+ bool wait_for_availability();
+
+ cl_kernel operator()();
+ cl_kernel operator()(ustring name);
+
+ void release();
+
+ private:
+ bool build_kernel(const string *debug_src);
+ /* Build the program by calling the own process.
+ * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+ * build calls internally if they come from the same process.
+ * If that is not supported, this function just returns false.
+ */
+ bool compile_separate(const string &clbin);
+ /* Build the program by calling OpenCL directly. */
+ bool compile_kernel(const string *debug_src);
+ /* Loading and saving the program from/to disk. */
+ bool load_binary(const string &clbin, const string *debug_src = NULL);
+ bool save_binary(const string &clbin);
+
+ void add_log(const string &msg, bool is_debug);
+ void add_error(const string &msg);
+
+ bool loaded;
+ bool needs_compiling;
+
+ cl_program program;
+ OpenCLDevice *device;
+
+ /* Used for the OpenCLCache key. */
+ string program_name;
+
+ string kernel_file, kernel_build_options, device_md5;
+
+ bool use_stdout;
+ string log, error_msg;
+ string compile_output;
+
+ map<ustring, cl_kernel> kernels;
+ };
+
+ /* Container for all types of split programs. */
+ class OpenCLSplitPrograms {
+ public:
+ OpenCLDevice *device;
+ OpenCLProgram program_split;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+
+ OpenCLSplitPrograms(OpenCLDevice *device);
+ ~OpenCLSplitPrograms();
+
+ /* Load the kernels and put the created kernels in the given `programs`
+ * paramter. */
+ void load_kernels(vector<OpenCLProgram *> &programs,
+ const DeviceRequestedFeatures &requested_features,
+ bool is_preview = false);
+ };
+
+ DeviceSplitKernel *split_kernel;
+
+ OpenCLProgram base_program;
+ OpenCLProgram bake_program;
+ OpenCLProgram displace_program;
+ OpenCLProgram background_program;
+ OpenCLProgram denoising_program;
+
+ OpenCLSplitPrograms kernel_programs;
+ OpenCLSplitPrograms preview_programs;
+
+ typedef map<string, device_vector<uchar> *> ConstMemMap;
+ typedef map<string, device_ptr> MemMap;
+
+ ConstMemMap const_mem_map;
+ MemMap mem_map;
+ device_ptr null_mem;
+
+ bool device_initialized;
+ string platform_name;
+ string device_name;
+
+ bool opencl_error(cl_int err);
+ void opencl_error(const string &message);
+ void opencl_assert_err(cl_int err, const char *where);
+
+ OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+ ~OpenCLDevice();
+
+ static void CL_CALLBACK context_notify_callback(const char *err_info,
+ const void * /*private_info*/,
+ size_t /*cb*/,
+ void *user_data);
+
+ bool opencl_version_check();
+ OpenCLSplitPrograms *get_split_programs();
+
+ string device_md5_hash(string kernel_custom_build_options = "");
+ bool load_kernels(const DeviceRequestedFeatures &requested_features);
+ void load_required_kernels(const DeviceRequestedFeatures &requested_features);
+ void load_preview_kernels();
+
+ bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
+ DeviceKernelStatus get_active_kernel_switch_state();
+
+ /* Get the name of the opencl program for the given kernel */
+ const string get_opencl_program_name(const string &kernel_name);
+ /* Get the program file name to compile (*.cl) for the given kernel */
+ const string get_opencl_program_filename(const string &kernel_name);
+ string get_build_options(const DeviceRequestedFeatures &requested_features,
+ const string &opencl_program_name,
+ bool preview_kernel = false);
+ /* Enable the default features to reduce recompilation events */
+ void enable_default_features(DeviceRequestedFeatures &features);
+
+ void mem_alloc(device_memory &mem);
+ void mem_copy_to(device_memory &mem);
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
+ void mem_zero(device_memory &mem);
+ void mem_free(device_memory &mem);
+
+ int mem_sub_ptr_alignment();
+
+ void const_copy_to(const char *name, void *host, size_t size);
+ void tex_alloc(device_memory &mem);
+ void tex_free(device_memory &mem);
+
+ size_t global_size_round_up(int group_size, int global_size);
+ void enqueue_kernel(cl_kernel kernel,
+ size_t w,
+ size_t h,
+ bool x_workgroups = false,
+ size_t max_workgroup_size = -1);
+ void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
+ void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
+
+ void film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half);
+ void shader(DeviceTask &task);
+
+ void denoise(RenderTile &tile, DenoisingTask &denoising);
+
+ class OpenCLDeviceTask : public DeviceTask {
+ public:
+ OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
+ {
+ run = function_bind(&OpenCLDevice::thread_run, device, this);
+ }
+ };
+
+ int get_split_task_count(DeviceTask & /*task*/)
+ {
+ return 1;
+ }
+
+ void task_add(DeviceTask &task)
+ {
+ task_pool.push(new OpenCLDeviceTask(this, task));
+ }
+
+ void task_wait()
+ {
+ task_pool.wait();
+ }
+
+ void task_cancel()
+ {
+ task_pool.cancel();
+ }
+
+ void thread_run(DeviceTask *task);
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
+ return BVH_LAYOUT_BVH2;
+ }
+
+ virtual bool show_samples() const
+ {
+ return true;
+ }
+
+ protected:
+ string kernel_build_options(const string *debug_src = NULL);
+
+ void mem_zero_kernel(device_ptr ptr, size_t size);
+
+ bool denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task);
+ bool denoising_construct_transform(DenoisingTask *task);
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task);
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+ bool denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task);
+ bool denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task);
+ bool denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task);
+ bool denoising_write_feature(int to_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task);
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task);
+
+ device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int size);
+ void mem_free_sub_ptr(device_ptr ptr);
+
+ class ArgumentWrapper {
+ public:
+ ArgumentWrapper() : size(0), pointer(NULL)
+ {
+ }
+
+ ArgumentWrapper(device_memory &argument)
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+ {
+ }
+
+ template<typename T>
+ ArgumentWrapper(device_vector<T> &argument)
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+ {
+ }
+
+ template<typename T>
+ ArgumentWrapper(device_only_memory<T> &argument)
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+ {
+ }
+ template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument)
+ {
+ }
+
+ ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value)
+ {
+ }
+
+ ArgumentWrapper(float argument)
+ : size(sizeof(float)), float_value(argument), pointer(&float_value)
+ {
+ }
+
+ size_t size;
+ int int_value;
+ float float_value;
+ void *pointer;
+ };
+
+ /* TODO(sergey): In the future we can use variadic templates, once
+ * C++0x is allowed. Should allow to clean this up a bit.
+ */
+ int kernel_set_args(cl_kernel kernel,
+ int start_argument_index,
+ const ArgumentWrapper &arg1 = ArgumentWrapper(),
+ const ArgumentWrapper &arg2 = ArgumentWrapper(),
+ const ArgumentWrapper &arg3 = ArgumentWrapper(),
+ const ArgumentWrapper &arg4 = ArgumentWrapper(),
+ const ArgumentWrapper &arg5 = ArgumentWrapper(),
+ const ArgumentWrapper &arg6 = ArgumentWrapper(),
+ const ArgumentWrapper &arg7 = ArgumentWrapper(),
+ const ArgumentWrapper &arg8 = ArgumentWrapper(),
+ const ArgumentWrapper &arg9 = ArgumentWrapper(),
+ const ArgumentWrapper &arg10 = ArgumentWrapper(),
+ const ArgumentWrapper &arg11 = ArgumentWrapper(),
+ const ArgumentWrapper &arg12 = ArgumentWrapper(),
+ const ArgumentWrapper &arg13 = ArgumentWrapper(),
+ const ArgumentWrapper &arg14 = ArgumentWrapper(),
+ const ArgumentWrapper &arg15 = ArgumentWrapper(),
+ const ArgumentWrapper &arg16 = ArgumentWrapper(),
+ const ArgumentWrapper &arg17 = ArgumentWrapper(),
+ const ArgumentWrapper &arg18 = ArgumentWrapper(),
+ const ArgumentWrapper &arg19 = ArgumentWrapper(),
+ const ArgumentWrapper &arg20 = ArgumentWrapper(),
+ const ArgumentWrapper &arg21 = ArgumentWrapper(),
+ const ArgumentWrapper &arg22 = ArgumentWrapper(),
+ const ArgumentWrapper &arg23 = ArgumentWrapper(),
+ const ArgumentWrapper &arg24 = ArgumentWrapper(),
+ const ArgumentWrapper &arg25 = ArgumentWrapper(),
+ const ArgumentWrapper &arg26 = ArgumentWrapper(),
+ const ArgumentWrapper &arg27 = ArgumentWrapper(),
+ const ArgumentWrapper &arg28 = ArgumentWrapper(),
+ const ArgumentWrapper &arg29 = ArgumentWrapper(),
+ const ArgumentWrapper &arg30 = ArgumentWrapper(),
+ const ArgumentWrapper &arg31 = ArgumentWrapper(),
+ const ArgumentWrapper &arg32 = ArgumentWrapper(),
+ const ArgumentWrapper &arg33 = ArgumentWrapper());
+
+ void release_kernel_safe(cl_kernel kernel);
+ void release_mem_object_safe(cl_mem mem);
+ void release_program_safe(cl_program program);
+
+ /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+ cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker);
+
+ void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker);
+
+ private:
+ MemoryManager memory_manager;
+ friend class MemoryManager;
+
+ static_assert_align(TextureInfo, 16);
+ device_vector<TextureInfo> texture_info;
+
+ typedef map<string, device_memory *> TexturesMap;
+ TexturesMap textures;
+
+ bool textures_need_update;
+
+ protected:
+ void flush_texture_buffers();
+
+ friend class OpenCLSplitKernel;
+ friend class OpenCLSplitKernelFunction;
};
-Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, Profiler &profiler, bool background);
+Device *opencl_create_split_device(DeviceInfo &info,
+ Stats &stats,
+ Profiler &profiler,
+ bool background);
CCL_NAMESPACE_END