diff options
Diffstat (limited to 'intern/cycles/device/device_opencl.cpp')
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 524 |
1 files changed, 83 insertions, 441 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index c3392d27b2c..1b4e5421b5a 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -27,6 +27,7 @@ #include "buffers.h" +#include "util_debug.h" #include "util_foreach.h" #include "util_logging.h" #include "util_map.h" @@ -84,29 +85,28 @@ namespace { cl_device_type opencl_device_type() { - char *device = getenv("CYCLES_OPENCL_TEST"); - - if(device) { - if(strcmp(device, "NONE") == 0) + switch(DebugFlags().opencl.device_type) + { + case DebugFlags::OpenCL::DEVICE_NONE: return 0; - if(strcmp(device, "ALL") == 0) + case DebugFlags::OpenCL::DEVICE_ALL: return CL_DEVICE_TYPE_ALL; - else if(strcmp(device, "DEFAULT") == 0) + case DebugFlags::OpenCL::DEVICE_DEFAULT: return CL_DEVICE_TYPE_DEFAULT; - else if(strcmp(device, "CPU") == 0) + case DebugFlags::OpenCL::DEVICE_CPU: return CL_DEVICE_TYPE_CPU; - else if(strcmp(device, "GPU") == 0) + case DebugFlags::OpenCL::DEVICE_GPU: return CL_DEVICE_TYPE_GPU; - else if(strcmp(device, "ACCELERATOR") == 0) + case DebugFlags::OpenCL::DEVICE_ACCELERATOR: return CL_DEVICE_TYPE_ACCELERATOR; + default: + return CL_DEVICE_TYPE_ALL; } - - return CL_DEVICE_TYPE_ALL; } -bool opencl_kernel_use_debug() +inline bool opencl_kernel_use_debug() { - return (getenv("CYCLES_OPENCL_DEBUG") != NULL); + return DebugFlags().opencl.debug; } bool opencl_kernel_use_advanced_shading(const string& platform) @@ -129,9 +129,14 @@ bool opencl_kernel_use_advanced_shading(const string& platform) bool opencl_kernel_use_split(const string& platform_name, const cl_device_type device_type) { - if(getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST") != NULL) { + if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) { + VLOG(1) << "Forcing split kernel to use."; return true; } + if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) { + VLOG(1) << "Forcing mega kernel to use."; + return false; + } /* TODO(sergey): Replace string lookups with more enum-like API, * similar to device/vendor checks blender's gpu. */ @@ -224,8 +229,7 @@ bool opencl_device_version_check(cl_device_id device, void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices) { const bool force_all_platforms = - (getenv("CYCLES_OPENCL_TEST") != NULL) || - (getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST")) != NULL; + (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT); const cl_device_type device_type = opencl_device_type(); static bool first_time = true; #define FIRST_VLOG(severity) if(first_time) VLOG(severity) @@ -313,7 +317,7 @@ void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices) continue; } if(!opencl_device_version_check(device_id)) { - FIRST_VLOG(2) << "Ignoting device " << device_name + FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version."; continue; } @@ -327,8 +331,8 @@ void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices) &device_type, NULL) != CL_SUCCESS) { - FIRST_VLOG(2) << "Ignoting device " << device_name - << ", faield to fetch device type."; + FIRST_VLOG(2) << "Ignoring device " << device_name + << ", failed to fetch device type."; continue; } FIRST_VLOG(2) << "Adding new device " << device_name << "."; @@ -339,7 +343,7 @@ void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices) device_name)); } else { - FIRST_VLOG(2) << "Ignoting device " << device_name + FIRST_VLOG(2) << "Ignoring device " << device_name << ", not officially supported yet."; } } @@ -581,7 +585,7 @@ public: ProgramName program_name, thread_scoped_lock& slot_locker) { - switch (program_name) { + switch(program_name) { case OCL_DEV_BASE_PROGRAM: store_something<cl_program>(platform, device, @@ -990,7 +994,8 @@ public: if(path_exists(clbin) && load_binary(kernel_path, clbin, build_flags, - &cpProgram)) { + &cpProgram)) + { /* Kernel loaded from binary, nothing to do. */ VLOG(2) << "Loaded kernel from " << clbin << "."; } @@ -1110,7 +1115,7 @@ public: { /* this is blocking */ size_t size = mem.memory_size(); - if(size != 0){ + if(size != 0) { opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, @@ -1299,7 +1304,9 @@ public: cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_input = CL_MEM_PTR(task.shader_input); cl_mem d_output = CL_MEM_PTR(task.shader_output); + cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma); cl_int d_shader_eval_type = task.shader_eval_type; + cl_int d_shader_filter = task.shader_filter; cl_int d_shader_x = task.shader_x; cl_int d_shader_w = task.shader_w; cl_int d_offset = task.offset; @@ -1325,14 +1332,27 @@ public: d_input, d_output); + if(task.shader_eval_type < SHADER_EVAL_BAKE) { + start_arg_index += kernel_set_args(kernel, + start_arg_index, + d_output_luma); + } + #define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(kernel, &start_arg_index, #name); + set_kernel_arg_mem(kernel, &start_arg_index, #name); #include "kernel_textures.h" #undef KERNEL_TEX start_arg_index += kernel_set_args(kernel, start_arg_index, - d_shader_eval_type, + d_shader_eval_type); + if(task.shader_eval_type >= SHADER_EVAL_BAKE) { + start_arg_index += kernel_set_args(kernel, + start_arg_index, + d_shader_filter); + } + start_arg_index += kernel_set_args(kernel, + start_arg_index, d_shader_x, d_shader_w, d_offset, @@ -1380,7 +1400,7 @@ public: protected: string kernel_build_options(const string *debug_src = NULL) { - string build_options = " -cl-fast-relaxed-math "; + string build_options = "-cl-fast-relaxed-math "; if(platform_name == "NVIDIA CUDA") { build_options += "-D__KERNEL_OPENCL_NVIDIA__ " @@ -1430,7 +1450,15 @@ protected: template <typename T> ArgumentWrapper(T& argument) : size(sizeof(argument)), pointer(&argument) { } + ArgumentWrapper(int argument) : size(sizeof(int)), + int_value(argument), + pointer(&int_value) { } + ArgumentWrapper(float argument) : size(sizeof(float)), + float_value(argument), + pointer(&float_value) { } size_t size; + int int_value; + float float_value; void *pointer; }; @@ -1545,34 +1573,6 @@ protected: } } - string build_options_from_requested_features( - const DeviceRequestedFeatures& requested_features) - { - string build_options = ""; - if(requested_features.experimental) { - build_options += " -D__KERNEL_EXPERIMENTAL__"; - } - build_options += " -D__NODES_MAX_GROUP__=" + - string_printf("%d", requested_features.max_nodes_group); - build_options += " -D__NODES_FEATURES__=" + - string_printf("%d", requested_features.nodes_features); - build_options += string_printf(" -D__MAX_CLOSURE__=%d", - requested_features.max_closure); - if(!requested_features.use_hair) { - build_options += " -D__NO_HAIR__"; - } - if(!requested_features.use_object_motion) { - build_options += " -D__NO_OBJECT_MOTION__"; - } - if(!requested_features.use_camera_motion) { - build_options += " -D__NO_CAMERA_MOTION__"; - } - if(!requested_features.use_baking) { - build_options += " -D__NO_BAKING__"; - } - return build_options; - } - /* ** Those guys are for workign around some compiler-specific bugs ** */ virtual cl_program load_cached_kernel( @@ -1673,7 +1673,8 @@ public: clbin, custom_kernel_build_options, &path_trace_program, - debug_src)) { + debug_src)) + { /* Kernel loaded from binary, nothing to do. */ } else { @@ -1909,63 +1910,6 @@ public: * shadow_blocked kernel. */ - /* Global buffers of each member of ShaderData. */ - cl_mem P_sd; - cl_mem P_sd_DL_shadow; - cl_mem N_sd; - cl_mem N_sd_DL_shadow; - cl_mem Ng_sd; - cl_mem Ng_sd_DL_shadow; - cl_mem I_sd; - cl_mem I_sd_DL_shadow; - cl_mem shader_sd; - cl_mem shader_sd_DL_shadow; - cl_mem flag_sd; - cl_mem flag_sd_DL_shadow; - cl_mem prim_sd; - cl_mem prim_sd_DL_shadow; - cl_mem type_sd; - cl_mem type_sd_DL_shadow; - cl_mem u_sd; - cl_mem u_sd_DL_shadow; - cl_mem v_sd; - cl_mem v_sd_DL_shadow; - cl_mem object_sd; - cl_mem object_sd_DL_shadow; - cl_mem time_sd; - cl_mem time_sd_DL_shadow; - cl_mem ray_length_sd; - cl_mem ray_length_sd_DL_shadow; - cl_mem ray_depth_sd; - cl_mem ray_depth_sd_DL_shadow; - cl_mem transparent_depth_sd; - cl_mem transparent_depth_sd_DL_shadow; - - /* Ray differentials. */ - cl_mem dP_sd, dI_sd; - cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow; - cl_mem du_sd, dv_sd; - cl_mem du_sd_DL_shadow, dv_sd_DL_shadow; - - /* Dp/Du */ - cl_mem dPdu_sd, dPdv_sd; - cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow; - - /* Object motion. */ - cl_mem ob_tfm_sd, ob_itfm_sd; - cl_mem ob_tfm_sd_DL_shadow, ob_itfm_sd_DL_shadow; - - cl_mem closure_sd; - cl_mem closure_sd_DL_shadow; - cl_mem num_closure_sd; - cl_mem num_closure_sd_DL_shadow; - cl_mem randb_closure_sd; - cl_mem randb_closure_sd_DL_shadow; - cl_mem ray_P_sd; - cl_mem ray_P_sd_DL_shadow; - cl_mem ray_dP_sd; - cl_mem ray_dP_sd_DL_shadow; - /* Global memory required for shadow blocked and accum_radiance. */ cl_mem BSDFEval_coop; cl_mem ISLamp_coop; @@ -1973,8 +1917,7 @@ public: cl_mem AOAlpha_coop; cl_mem AOBSDF_coop; cl_mem AOLightRay_coop; - cl_mem Intersection_coop_AO; - cl_mem Intersection_coop_DL; + cl_mem Intersection_coop_shadow; #ifdef WITH_CYCLES_DEBUG /* DebugData memory */ @@ -2063,70 +2006,6 @@ public: sd = NULL; sd_DL_shadow = NULL; - P_sd = NULL; - P_sd_DL_shadow = NULL; - N_sd = NULL; - N_sd_DL_shadow = NULL; - Ng_sd = NULL; - Ng_sd_DL_shadow = NULL; - I_sd = NULL; - I_sd_DL_shadow = NULL; - shader_sd = NULL; - shader_sd_DL_shadow = NULL; - flag_sd = NULL; - flag_sd_DL_shadow = NULL; - prim_sd = NULL; - prim_sd_DL_shadow = NULL; - type_sd = NULL; - type_sd_DL_shadow = NULL; - u_sd = NULL; - u_sd_DL_shadow = NULL; - v_sd = NULL; - v_sd_DL_shadow = NULL; - object_sd = NULL; - object_sd_DL_shadow = NULL; - time_sd = NULL; - time_sd_DL_shadow = NULL; - ray_length_sd = NULL; - ray_length_sd_DL_shadow = NULL; - ray_depth_sd = NULL; - ray_depth_sd_DL_shadow = NULL; - transparent_depth_sd = NULL; - transparent_depth_sd_DL_shadow = NULL; - - /* Ray differentials. */ - dP_sd = NULL; - dI_sd = NULL; - dP_sd_DL_shadow = NULL; - dI_sd_DL_shadow = NULL; - du_sd = NULL; - dv_sd = NULL; - du_sd_DL_shadow = NULL; - dv_sd_DL_shadow = NULL; - - /* Dp/Du */ - dPdu_sd = NULL; - dPdv_sd = NULL; - dPdu_sd_DL_shadow = NULL; - dPdv_sd_DL_shadow = NULL; - - /* Object motion. */ - ob_tfm_sd = NULL; - ob_itfm_sd = NULL; - ob_tfm_sd_DL_shadow = NULL; - ob_itfm_sd_DL_shadow = NULL; - - closure_sd = NULL; - closure_sd_DL_shadow = NULL; - num_closure_sd = NULL; - num_closure_sd_DL_shadow = NULL; - randb_closure_sd = NULL; - randb_closure_sd_DL_shadow = NULL; - ray_P_sd = NULL; - ray_P_sd_DL_shadow = NULL; - ray_dP_sd = NULL; - ray_dP_sd_DL_shadow = NULL; - rng_coop = NULL; throughput_coop = NULL; L_transparent_coop = NULL; @@ -2142,8 +2021,7 @@ public: BSDFEval_coop = NULL; ISLamp_coop = NULL; LightRay_coop = NULL; - Intersection_coop_AO = NULL; - Intersection_coop_DL = NULL; + Intersection_coop_shadow = NULL; #ifdef WITH_CYCLES_DEBUG debugdata_coop = NULL; @@ -2204,7 +2082,8 @@ public: clbin, custom_kernel_build_options, program, - debug_src)) { + debug_src)) + { /* Kernel loaded from binary. */ } else { @@ -2243,17 +2122,10 @@ public: return ret_size; } - size_t get_shader_closure_size(int max_closure) + size_t get_shader_data_size(size_t max_closure) { - return (sizeof(ShaderClosure) * max_closure); - } - - size_t get_shader_data_size(size_t shader_closure_size) - { - /* ShaderData size without accounting for ShaderClosure array. */ - size_t shader_data_size = - sizeof(ShaderData) - (sizeof(ShaderClosure) * MAX_CLOSURE); - return (shader_data_size + shader_closure_size); + /* ShaderData size with variable size ShaderClosure array */ + return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure)); } /* Returns size of KernelGlobals structure associated with OpenCL. */ @@ -2268,25 +2140,13 @@ public: ccl_global type *name; #include "kernel_textures.h" #undef KERNEL_TEX + void *sd_input; + void *isect_shadow; } KernelGlobals; return sizeof(KernelGlobals); } - /* Returns size of Structure of arrays implementation of. */ - size_t get_shaderdata_soa_size() - { - size_t shader_soa_size = 0; - -#define SD_VAR(type, what) shader_soa_size += sizeof(void *); -#define SD_CLOSURE_VAR(type, what, max_closure) shader_soa_size += sizeof(void *); - #include "kernel_shaderdata_vars.h" -#undef SD_VAR -#undef SD_CLOSURE_VAR - - return shader_soa_size; - } - bool load_kernels(const DeviceRequestedFeatures& requested_features) { /* Get Shader, bake and film_convert kernels. @@ -2303,11 +2163,11 @@ public: string clbin; string clsrc, *debug_src = NULL; - string build_options = "-D__SPLIT_KERNEL__"; + string build_options = "-D__SPLIT_KERNEL__ "; #ifdef __WORK_STEALING__ - build_options += " -D__WORK_STEALING__"; + build_options += "-D__WORK_STEALING__ "; #endif - build_options += build_options_from_requested_features(requested_features); + build_options += requested_features.get_build_options(); /* Set compute device build option. */ cl_device_type device_type; @@ -2407,70 +2267,6 @@ public: release_kernel_safe(ckPathTraceKernel_sum_all_radiance); /* Release global memory */ - release_mem_object_safe(P_sd); - release_mem_object_safe(P_sd_DL_shadow); - release_mem_object_safe(N_sd); - release_mem_object_safe(N_sd_DL_shadow); - release_mem_object_safe(Ng_sd); - release_mem_object_safe(Ng_sd_DL_shadow); - release_mem_object_safe(I_sd); - release_mem_object_safe(I_sd_DL_shadow); - release_mem_object_safe(shader_sd); - release_mem_object_safe(shader_sd_DL_shadow); - release_mem_object_safe(flag_sd); - release_mem_object_safe(flag_sd_DL_shadow); - release_mem_object_safe(prim_sd); - release_mem_object_safe(prim_sd_DL_shadow); - release_mem_object_safe(type_sd); - release_mem_object_safe(type_sd_DL_shadow); - release_mem_object_safe(u_sd); - release_mem_object_safe(u_sd_DL_shadow); - release_mem_object_safe(v_sd); - release_mem_object_safe(v_sd_DL_shadow); - release_mem_object_safe(object_sd); - release_mem_object_safe(object_sd_DL_shadow); - release_mem_object_safe(time_sd); - release_mem_object_safe(time_sd_DL_shadow); - release_mem_object_safe(ray_length_sd); - release_mem_object_safe(ray_length_sd_DL_shadow); - release_mem_object_safe(ray_depth_sd); - release_mem_object_safe(ray_depth_sd_DL_shadow); - release_mem_object_safe(transparent_depth_sd); - release_mem_object_safe(transparent_depth_sd_DL_shadow); - - /* Ray differentials. */ - release_mem_object_safe(dP_sd); - release_mem_object_safe(dP_sd_DL_shadow); - release_mem_object_safe(dI_sd); - release_mem_object_safe(dI_sd_DL_shadow); - release_mem_object_safe(du_sd); - release_mem_object_safe(du_sd_DL_shadow); - release_mem_object_safe(dv_sd); - release_mem_object_safe(dv_sd_DL_shadow); - - /* Dp/Du */ - release_mem_object_safe(dPdu_sd); - release_mem_object_safe(dPdu_sd_DL_shadow); - release_mem_object_safe(dPdv_sd); - release_mem_object_safe(dPdv_sd_DL_shadow); - - /* Object motion. */ - release_mem_object_safe(ob_tfm_sd); - release_mem_object_safe(ob_itfm_sd); - - release_mem_object_safe(ob_tfm_sd_DL_shadow); - release_mem_object_safe(ob_itfm_sd_DL_shadow); - - release_mem_object_safe(closure_sd); - release_mem_object_safe(closure_sd_DL_shadow); - release_mem_object_safe(num_closure_sd); - release_mem_object_safe(num_closure_sd_DL_shadow); - release_mem_object_safe(randb_closure_sd); - release_mem_object_safe(randb_closure_sd_DL_shadow); - release_mem_object_safe(ray_P_sd); - release_mem_object_safe(ray_P_sd_DL_shadow); - release_mem_object_safe(ray_dP_sd); - release_mem_object_safe(ray_dP_sd_DL_shadow); release_mem_object_safe(rng_coop); release_mem_object_safe(throughput_coop); release_mem_object_safe(L_transparent_coop); @@ -2488,8 +2284,7 @@ public: release_mem_object_safe(BSDFEval_coop); release_mem_object_safe(ISLamp_coop); release_mem_object_safe(LightRay_coop); - release_mem_object_safe(Intersection_coop_AO); - release_mem_object_safe(Intersection_coop_DL); + release_mem_object_safe(Intersection_coop_shadow); #ifdef WITH_CYCLES_DEBUG release_mem_object_safe(debugdata_coop); #endif @@ -2586,7 +2381,7 @@ public: /* TODO(sergey): This will actually over-allocate if * particular kernel does not support multiclosure. */ - size_t ShaderClosure_size = get_shader_closure_size(current_max_closure); + size_t shaderdata_size = get_shader_data_size(current_max_closure); #ifdef __WORK_STEALING__ /* Calculate max groups */ @@ -2607,71 +2402,8 @@ public: kgbuffer = mem_alloc(get_KernelGlobals_size()); /* Create global buffers for ShaderData. */ - sd = mem_alloc(get_shaderdata_soa_size()); - sd_DL_shadow = mem_alloc(get_shaderdata_soa_size()); - P_sd = mem_alloc(num_global_elements * sizeof(float3)); - P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - N_sd = mem_alloc(num_global_elements * sizeof(float3)); - N_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - Ng_sd = mem_alloc(num_global_elements * sizeof(float3)); - Ng_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - I_sd = mem_alloc(num_global_elements * sizeof(float3)); - I_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - shader_sd = mem_alloc(num_global_elements * sizeof(int)); - shader_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - flag_sd = mem_alloc(num_global_elements * sizeof(int)); - flag_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - prim_sd = mem_alloc(num_global_elements * sizeof(int)); - prim_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - type_sd = mem_alloc(num_global_elements * sizeof(int)); - type_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - u_sd = mem_alloc(num_global_elements * sizeof(float)); - u_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - v_sd = mem_alloc(num_global_elements * sizeof(float)); - v_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - object_sd = mem_alloc(num_global_elements * sizeof(int)); - object_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - time_sd = mem_alloc(num_global_elements * sizeof(float)); - time_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - ray_length_sd = mem_alloc(num_global_elements * sizeof(float)); - ray_length_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - ray_depth_sd = mem_alloc(num_global_elements * sizeof(int)); - ray_depth_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - transparent_depth_sd = mem_alloc(num_global_elements * sizeof(int)); - transparent_depth_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - - /* Ray differentials. */ - dP_sd = mem_alloc(num_global_elements * sizeof(differential3)); - dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3)); - dI_sd = mem_alloc(num_global_elements * sizeof(differential3)); - dI_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3)); - du_sd = mem_alloc(num_global_elements * sizeof(differential)); - du_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential)); - dv_sd = mem_alloc(num_global_elements * sizeof(differential)); - dv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential)); - - /* Dp/Du */ - dPdu_sd = mem_alloc(num_global_elements * sizeof(float3)); - dPdu_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - dPdv_sd = mem_alloc(num_global_elements * sizeof(float3)); - dPdv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - - /* Object motion. */ - ob_tfm_sd = mem_alloc(num_global_elements * sizeof(Transform)); - ob_tfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform)); - ob_itfm_sd = mem_alloc(num_global_elements * sizeof(Transform)); - ob_itfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform)); - - closure_sd = mem_alloc(num_global_elements * ShaderClosure_size); - closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * ShaderClosure_size); - num_closure_sd = mem_alloc(num_global_elements * sizeof(int)); - num_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - randb_closure_sd = mem_alloc(num_global_elements * sizeof(float)); - randb_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - ray_P_sd = mem_alloc(num_global_elements * sizeof(float3)); - ray_P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - ray_dP_sd = mem_alloc(num_global_elements * sizeof(differential3)); - ray_dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3)); + sd = mem_alloc(num_global_elements * shaderdata_size); + sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size); /* Creation of global memory buffers which are shared among * the kernels. @@ -2689,8 +2421,7 @@ public: BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval)); ISLamp_coop = mem_alloc(num_global_elements * sizeof(int)); LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray)); - Intersection_coop_AO = mem_alloc(num_global_elements * sizeof(Intersection)); - Intersection_coop_DL = mem_alloc(num_global_elements * sizeof(Intersection)); + Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection)); #ifdef WITH_CYCLES_DEBUG debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData)); @@ -2708,89 +2439,12 @@ public: } cl_int dQueue_size = global_size[0] * global_size[1]; - cl_int total_num_rays = global_size[0] * global_size[1]; cl_uint start_arg_index = kernel_set_args(ckPathTraceKernel_data_init, 0, kgbuffer, - sd, sd_DL_shadow, - P_sd, - P_sd_DL_shadow, - N_sd, - N_sd_DL_shadow, - Ng_sd, - Ng_sd_DL_shadow, - I_sd, - I_sd_DL_shadow, - shader_sd, - shader_sd_DL_shadow, - flag_sd, - flag_sd_DL_shadow, - prim_sd, - prim_sd_DL_shadow, - type_sd, - type_sd_DL_shadow, - u_sd, - u_sd_DL_shadow, - v_sd, - v_sd_DL_shadow, - object_sd, - object_sd_DL_shadow, - time_sd, - time_sd_DL_shadow, - ray_length_sd, - ray_length_sd_DL_shadow, - ray_depth_sd, - ray_depth_sd_DL_shadow, - transparent_depth_sd, - transparent_depth_sd_DL_shadow); - - /* Ray differentials. */ - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - dP_sd, - dP_sd_DL_shadow, - dI_sd, - dI_sd_DL_shadow, - du_sd, - du_sd_DL_shadow, - dv_sd, - dv_sd_DL_shadow); - - /* Dp/Du */ - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - dPdu_sd, - dPdu_sd_DL_shadow, - dPdv_sd, - dPdv_sd_DL_shadow); - - /* Object motion. */ - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - ob_tfm_sd, - ob_tfm_sd_DL_shadow, - ob_itfm_sd, - ob_itfm_sd_DL_shadow); - - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - closure_sd, - closure_sd_DL_shadow, - num_closure_sd, - num_closure_sd_DL_shadow, - randb_closure_sd, - randb_closure_sd_DL_shadow, - ray_P_sd, - ray_P_sd_DL_shadow, - ray_dP_sd, - ray_dP_sd_DL_shadow, d_data, per_sample_output_buffers, d_rng_state, @@ -2800,9 +2454,10 @@ public: PathRadiance_coop, Ray_coop, PathState_coop, + Intersection_coop_shadow, ray_state); -/* TODO(segrey): Avoid map lookup here. */ +/* TODO(sergey): Avoid map lookup here. */ #define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_mem(ckPathTraceKernel_data_init, &start_arg_index, #name); #include "kernel_textures.h" @@ -2859,7 +2514,6 @@ public: 0, kgbuffer, d_data, - sd, throughput_coop, PathRadiance_coop, Ray_coop, @@ -2885,7 +2539,6 @@ public: 0, kgbuffer, d_data, - sd, per_sample_output_buffers, d_rng_state, rng_coop, @@ -2967,7 +2620,6 @@ public: kgbuffer, d_data, sd, - sd_DL_shadow, rng_coop, PathState_coop, ISLamp_coop, @@ -2982,17 +2634,13 @@ public: 0, kgbuffer, d_data, - sd_DL_shadow, PathState_coop, LightRay_coop, AOLightRay_coop, - Intersection_coop_AO, - Intersection_coop_DL, ray_state, Queue_data, Queue_index, - dQueue_size, - total_num_rays); + dQueue_size); kernel_set_args(ckPathTraceKernel_next_iteration_setup, 0, @@ -3162,16 +2810,12 @@ public: { size_t total_invariable_mem_allocated = 0; size_t KernelGlobals_size = 0; - size_t ShaderData_SOA_size = 0; KernelGlobals_size = get_KernelGlobals_size(); - ShaderData_SOA_size = get_shaderdata_soa_size(); total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */ total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */ total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */ - total_invariable_mem_allocated += ShaderData_SOA_size; /* sd size */ - total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_DL_shadow size */ return total_invariable_mem_allocated; } @@ -3238,13 +2882,11 @@ public: /* Calculate the memory required for one thread in split kernel. */ size_t get_per_thread_memory() { - size_t shader_closure_size = 0; - size_t shaderdata_volume = 0; - shader_closure_size = get_shader_closure_size(current_max_closure); + size_t shaderdata_size = 0; /* TODO(sergey): This will actually over-allocate if * particular kernel does not support multiclosure. */ - shaderdata_volume = get_shader_data_size(shader_closure_size); + shaderdata_size = get_shader_data_size(current_max_closure); size_t retval = sizeof(RNG) + sizeof(float3) /* Throughput size */ + sizeof(float) /* L transparent size */ @@ -3255,8 +2897,8 @@ public: + sizeof(Intersection) /* Overall isect */ + sizeof(Intersection) /* Instersection_coop_AO */ + sizeof(Intersection) /* Intersection coop DL */ - + shaderdata_volume /* Overall ShaderData */ - + (shaderdata_volume * 2) /* ShaderData : DL and shadow */ + + shaderdata_size /* Overall ShaderData */ + + (shaderdata_size * 2) /* ShaderData : DL and shadow */ + sizeof(Ray) + sizeof(BsdfEval) + sizeof(float3) /* AOAlpha size */ + sizeof(float3) /* AOBSDF size */ @@ -3580,7 +3222,7 @@ protected: string build_options_for_base_program( const DeviceRequestedFeatures& requested_features) { - return build_options_from_requested_features(requested_features); + return requested_features.get_build_options(); } }; @@ -3690,7 +3332,7 @@ string device_opencl_capabilities(void) APPEND_STRING_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what) vector<cl_device_id> device_ids; - for (cl_uint platform = 0; platform < num_platforms; ++platform) { + for(cl_uint platform = 0; platform < num_platforms; ++platform) { cl_platform_id platform_id = platform_ids[platform]; result += string_printf("Platform #%u\n", platform); @@ -3715,7 +3357,7 @@ string device_opencl_capabilities(void) num_devices, &device_ids[0], NULL)); - for (cl_uint device = 0; device < num_devices; ++device) { + for(cl_uint device = 0; device < num_devices; ++device) { cl_device_id device_id = device_ids[device]; result += string_printf("\t\tDevice: #%u\n", device); |