Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_opencl.cpp')
-rw-r--r--intern/cycles/device/device_opencl.cpp524
1 files changed, 83 insertions, 441 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index c3392d27b2c..1b4e5421b5a 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -27,6 +27,7 @@
#include "buffers.h"
+#include "util_debug.h"
#include "util_foreach.h"
#include "util_logging.h"
#include "util_map.h"
@@ -84,29 +85,28 @@ namespace {
cl_device_type opencl_device_type()
{
- char *device = getenv("CYCLES_OPENCL_TEST");
-
- if(device) {
- if(strcmp(device, "NONE") == 0)
+ switch(DebugFlags().opencl.device_type)
+ {
+ case DebugFlags::OpenCL::DEVICE_NONE:
return 0;
- if(strcmp(device, "ALL") == 0)
+ case DebugFlags::OpenCL::DEVICE_ALL:
return CL_DEVICE_TYPE_ALL;
- else if(strcmp(device, "DEFAULT") == 0)
+ case DebugFlags::OpenCL::DEVICE_DEFAULT:
return CL_DEVICE_TYPE_DEFAULT;
- else if(strcmp(device, "CPU") == 0)
+ case DebugFlags::OpenCL::DEVICE_CPU:
return CL_DEVICE_TYPE_CPU;
- else if(strcmp(device, "GPU") == 0)
+ case DebugFlags::OpenCL::DEVICE_GPU:
return CL_DEVICE_TYPE_GPU;
- else if(strcmp(device, "ACCELERATOR") == 0)
+ case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
return CL_DEVICE_TYPE_ACCELERATOR;
+ default:
+ return CL_DEVICE_TYPE_ALL;
}
-
- return CL_DEVICE_TYPE_ALL;
}
-bool opencl_kernel_use_debug()
+inline bool opencl_kernel_use_debug()
{
- return (getenv("CYCLES_OPENCL_DEBUG") != NULL);
+ return DebugFlags().opencl.debug;
}
bool opencl_kernel_use_advanced_shading(const string& platform)
@@ -129,9 +129,14 @@ bool opencl_kernel_use_advanced_shading(const string& platform)
bool opencl_kernel_use_split(const string& platform_name,
const cl_device_type device_type)
{
- if(getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST") != NULL) {
+ if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
+ VLOG(1) << "Forcing split kernel to use.";
return true;
}
+ if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
+ VLOG(1) << "Forcing mega kernel to use.";
+ return false;
+ }
/* TODO(sergey): Replace string lookups with more enum-like API,
* similar to device/vendor checks blender's gpu.
*/
@@ -224,8 +229,7 @@ bool opencl_device_version_check(cl_device_id device,
void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
{
const bool force_all_platforms =
- (getenv("CYCLES_OPENCL_TEST") != NULL) ||
- (getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST")) != NULL;
+ (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
const cl_device_type device_type = opencl_device_type();
static bool first_time = true;
#define FIRST_VLOG(severity) if(first_time) VLOG(severity)
@@ -313,7 +317,7 @@ void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
continue;
}
if(!opencl_device_version_check(device_id)) {
- FIRST_VLOG(2) << "Ignoting device " << device_name
+ FIRST_VLOG(2) << "Ignoring device " << device_name
<< " due to old compiler version.";
continue;
}
@@ -327,8 +331,8 @@ void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
&device_type,
NULL) != CL_SUCCESS)
{
- FIRST_VLOG(2) << "Ignoting device " << device_name
- << ", faield to fetch device type.";
+ FIRST_VLOG(2) << "Ignoring device " << device_name
+ << ", failed to fetch device type.";
continue;
}
FIRST_VLOG(2) << "Adding new device " << device_name << ".";
@@ -339,7 +343,7 @@ void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
device_name));
}
else {
- FIRST_VLOG(2) << "Ignoting device " << device_name
+ FIRST_VLOG(2) << "Ignoring device " << device_name
<< ", not officially supported yet.";
}
}
@@ -581,7 +585,7 @@ public:
ProgramName program_name,
thread_scoped_lock& slot_locker)
{
- switch (program_name) {
+ switch(program_name) {
case OCL_DEV_BASE_PROGRAM:
store_something<cl_program>(platform,
device,
@@ -990,7 +994,8 @@ public:
if(path_exists(clbin) && load_binary(kernel_path,
clbin,
build_flags,
- &cpProgram)) {
+ &cpProgram))
+ {
/* Kernel loaded from binary, nothing to do. */
VLOG(2) << "Loaded kernel from " << clbin << ".";
}
@@ -1110,7 +1115,7 @@ public:
{
/* this is blocking */
size_t size = mem.memory_size();
- if(size != 0){
+ if(size != 0) {
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
CL_MEM_PTR(mem.device_pointer),
CL_TRUE,
@@ -1299,7 +1304,9 @@ public:
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_input = CL_MEM_PTR(task.shader_input);
cl_mem d_output = CL_MEM_PTR(task.shader_output);
+ cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
cl_int d_shader_eval_type = task.shader_eval_type;
+ cl_int d_shader_filter = task.shader_filter;
cl_int d_shader_x = task.shader_x;
cl_int d_shader_w = task.shader_w;
cl_int d_offset = task.offset;
@@ -1325,14 +1332,27 @@ public:
d_input,
d_output);
+ if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_output_luma);
+ }
+
#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &start_arg_index, #name);
+ set_kernel_arg_mem(kernel, &start_arg_index, #name);
#include "kernel_textures.h"
#undef KERNEL_TEX
start_arg_index += kernel_set_args(kernel,
start_arg_index,
- d_shader_eval_type,
+ d_shader_eval_type);
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_shader_filter);
+ }
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
d_shader_x,
d_shader_w,
d_offset,
@@ -1380,7 +1400,7 @@ public:
protected:
string kernel_build_options(const string *debug_src = NULL)
{
- string build_options = " -cl-fast-relaxed-math ";
+ string build_options = "-cl-fast-relaxed-math ";
if(platform_name == "NVIDIA CUDA") {
build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
@@ -1430,7 +1450,15 @@ protected:
template <typename T>
ArgumentWrapper(T& argument) : size(sizeof(argument)),
pointer(&argument) { }
+ ArgumentWrapper(int argument) : size(sizeof(int)),
+ int_value(argument),
+ pointer(&int_value) { }
+ ArgumentWrapper(float argument) : size(sizeof(float)),
+ float_value(argument),
+ pointer(&float_value) { }
size_t size;
+ int int_value;
+ float float_value;
void *pointer;
};
@@ -1545,34 +1573,6 @@ protected:
}
}
- string build_options_from_requested_features(
- const DeviceRequestedFeatures& requested_features)
- {
- string build_options = "";
- if(requested_features.experimental) {
- build_options += " -D__KERNEL_EXPERIMENTAL__";
- }
- build_options += " -D__NODES_MAX_GROUP__=" +
- string_printf("%d", requested_features.max_nodes_group);
- build_options += " -D__NODES_FEATURES__=" +
- string_printf("%d", requested_features.nodes_features);
- build_options += string_printf(" -D__MAX_CLOSURE__=%d",
- requested_features.max_closure);
- if(!requested_features.use_hair) {
- build_options += " -D__NO_HAIR__";
- }
- if(!requested_features.use_object_motion) {
- build_options += " -D__NO_OBJECT_MOTION__";
- }
- if(!requested_features.use_camera_motion) {
- build_options += " -D__NO_CAMERA_MOTION__";
- }
- if(!requested_features.use_baking) {
- build_options += " -D__NO_BAKING__";
- }
- return build_options;
- }
-
/* ** Those guys are for workign around some compiler-specific bugs ** */
virtual cl_program load_cached_kernel(
@@ -1673,7 +1673,8 @@ public:
clbin,
custom_kernel_build_options,
&path_trace_program,
- debug_src)) {
+ debug_src))
+ {
/* Kernel loaded from binary, nothing to do. */
}
else {
@@ -1909,63 +1910,6 @@ public:
* shadow_blocked kernel.
*/
- /* Global buffers of each member of ShaderData. */
- cl_mem P_sd;
- cl_mem P_sd_DL_shadow;
- cl_mem N_sd;
- cl_mem N_sd_DL_shadow;
- cl_mem Ng_sd;
- cl_mem Ng_sd_DL_shadow;
- cl_mem I_sd;
- cl_mem I_sd_DL_shadow;
- cl_mem shader_sd;
- cl_mem shader_sd_DL_shadow;
- cl_mem flag_sd;
- cl_mem flag_sd_DL_shadow;
- cl_mem prim_sd;
- cl_mem prim_sd_DL_shadow;
- cl_mem type_sd;
- cl_mem type_sd_DL_shadow;
- cl_mem u_sd;
- cl_mem u_sd_DL_shadow;
- cl_mem v_sd;
- cl_mem v_sd_DL_shadow;
- cl_mem object_sd;
- cl_mem object_sd_DL_shadow;
- cl_mem time_sd;
- cl_mem time_sd_DL_shadow;
- cl_mem ray_length_sd;
- cl_mem ray_length_sd_DL_shadow;
- cl_mem ray_depth_sd;
- cl_mem ray_depth_sd_DL_shadow;
- cl_mem transparent_depth_sd;
- cl_mem transparent_depth_sd_DL_shadow;
-
- /* Ray differentials. */
- cl_mem dP_sd, dI_sd;
- cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow;
- cl_mem du_sd, dv_sd;
- cl_mem du_sd_DL_shadow, dv_sd_DL_shadow;
-
- /* Dp/Du */
- cl_mem dPdu_sd, dPdv_sd;
- cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow;
-
- /* Object motion. */
- cl_mem ob_tfm_sd, ob_itfm_sd;
- cl_mem ob_tfm_sd_DL_shadow, ob_itfm_sd_DL_shadow;
-
- cl_mem closure_sd;
- cl_mem closure_sd_DL_shadow;
- cl_mem num_closure_sd;
- cl_mem num_closure_sd_DL_shadow;
- cl_mem randb_closure_sd;
- cl_mem randb_closure_sd_DL_shadow;
- cl_mem ray_P_sd;
- cl_mem ray_P_sd_DL_shadow;
- cl_mem ray_dP_sd;
- cl_mem ray_dP_sd_DL_shadow;
-
/* Global memory required for shadow blocked and accum_radiance. */
cl_mem BSDFEval_coop;
cl_mem ISLamp_coop;
@@ -1973,8 +1917,7 @@ public:
cl_mem AOAlpha_coop;
cl_mem AOBSDF_coop;
cl_mem AOLightRay_coop;
- cl_mem Intersection_coop_AO;
- cl_mem Intersection_coop_DL;
+ cl_mem Intersection_coop_shadow;
#ifdef WITH_CYCLES_DEBUG
/* DebugData memory */
@@ -2063,70 +2006,6 @@ public:
sd = NULL;
sd_DL_shadow = NULL;
- P_sd = NULL;
- P_sd_DL_shadow = NULL;
- N_sd = NULL;
- N_sd_DL_shadow = NULL;
- Ng_sd = NULL;
- Ng_sd_DL_shadow = NULL;
- I_sd = NULL;
- I_sd_DL_shadow = NULL;
- shader_sd = NULL;
- shader_sd_DL_shadow = NULL;
- flag_sd = NULL;
- flag_sd_DL_shadow = NULL;
- prim_sd = NULL;
- prim_sd_DL_shadow = NULL;
- type_sd = NULL;
- type_sd_DL_shadow = NULL;
- u_sd = NULL;
- u_sd_DL_shadow = NULL;
- v_sd = NULL;
- v_sd_DL_shadow = NULL;
- object_sd = NULL;
- object_sd_DL_shadow = NULL;
- time_sd = NULL;
- time_sd_DL_shadow = NULL;
- ray_length_sd = NULL;
- ray_length_sd_DL_shadow = NULL;
- ray_depth_sd = NULL;
- ray_depth_sd_DL_shadow = NULL;
- transparent_depth_sd = NULL;
- transparent_depth_sd_DL_shadow = NULL;
-
- /* Ray differentials. */
- dP_sd = NULL;
- dI_sd = NULL;
- dP_sd_DL_shadow = NULL;
- dI_sd_DL_shadow = NULL;
- du_sd = NULL;
- dv_sd = NULL;
- du_sd_DL_shadow = NULL;
- dv_sd_DL_shadow = NULL;
-
- /* Dp/Du */
- dPdu_sd = NULL;
- dPdv_sd = NULL;
- dPdu_sd_DL_shadow = NULL;
- dPdv_sd_DL_shadow = NULL;
-
- /* Object motion. */
- ob_tfm_sd = NULL;
- ob_itfm_sd = NULL;
- ob_tfm_sd_DL_shadow = NULL;
- ob_itfm_sd_DL_shadow = NULL;
-
- closure_sd = NULL;
- closure_sd_DL_shadow = NULL;
- num_closure_sd = NULL;
- num_closure_sd_DL_shadow = NULL;
- randb_closure_sd = NULL;
- randb_closure_sd_DL_shadow = NULL;
- ray_P_sd = NULL;
- ray_P_sd_DL_shadow = NULL;
- ray_dP_sd = NULL;
- ray_dP_sd_DL_shadow = NULL;
-
rng_coop = NULL;
throughput_coop = NULL;
L_transparent_coop = NULL;
@@ -2142,8 +2021,7 @@ public:
BSDFEval_coop = NULL;
ISLamp_coop = NULL;
LightRay_coop = NULL;
- Intersection_coop_AO = NULL;
- Intersection_coop_DL = NULL;
+ Intersection_coop_shadow = NULL;
#ifdef WITH_CYCLES_DEBUG
debugdata_coop = NULL;
@@ -2204,7 +2082,8 @@ public:
clbin,
custom_kernel_build_options,
program,
- debug_src)) {
+ debug_src))
+ {
/* Kernel loaded from binary. */
}
else {
@@ -2243,17 +2122,10 @@ public:
return ret_size;
}
- size_t get_shader_closure_size(int max_closure)
+ size_t get_shader_data_size(size_t max_closure)
{
- return (sizeof(ShaderClosure) * max_closure);
- }
-
- size_t get_shader_data_size(size_t shader_closure_size)
- {
- /* ShaderData size without accounting for ShaderClosure array. */
- size_t shader_data_size =
- sizeof(ShaderData) - (sizeof(ShaderClosure) * MAX_CLOSURE);
- return (shader_data_size + shader_closure_size);
+ /* ShaderData size with variable size ShaderClosure array */
+ return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure));
}
/* Returns size of KernelGlobals structure associated with OpenCL. */
@@ -2268,25 +2140,13 @@ public:
ccl_global type *name;
#include "kernel_textures.h"
#undef KERNEL_TEX
+ void *sd_input;
+ void *isect_shadow;
} KernelGlobals;
return sizeof(KernelGlobals);
}
- /* Returns size of Structure of arrays implementation of. */
- size_t get_shaderdata_soa_size()
- {
- size_t shader_soa_size = 0;
-
-#define SD_VAR(type, what) shader_soa_size += sizeof(void *);
-#define SD_CLOSURE_VAR(type, what, max_closure) shader_soa_size += sizeof(void *);
- #include "kernel_shaderdata_vars.h"
-#undef SD_VAR
-#undef SD_CLOSURE_VAR
-
- return shader_soa_size;
- }
-
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
/* Get Shader, bake and film_convert kernels.
@@ -2303,11 +2163,11 @@ public:
string clbin;
string clsrc, *debug_src = NULL;
- string build_options = "-D__SPLIT_KERNEL__";
+ string build_options = "-D__SPLIT_KERNEL__ ";
#ifdef __WORK_STEALING__
- build_options += " -D__WORK_STEALING__";
+ build_options += "-D__WORK_STEALING__ ";
#endif
- build_options += build_options_from_requested_features(requested_features);
+ build_options += requested_features.get_build_options();
/* Set compute device build option. */
cl_device_type device_type;
@@ -2407,70 +2267,6 @@ public:
release_kernel_safe(ckPathTraceKernel_sum_all_radiance);
/* Release global memory */
- release_mem_object_safe(P_sd);
- release_mem_object_safe(P_sd_DL_shadow);
- release_mem_object_safe(N_sd);
- release_mem_object_safe(N_sd_DL_shadow);
- release_mem_object_safe(Ng_sd);
- release_mem_object_safe(Ng_sd_DL_shadow);
- release_mem_object_safe(I_sd);
- release_mem_object_safe(I_sd_DL_shadow);
- release_mem_object_safe(shader_sd);
- release_mem_object_safe(shader_sd_DL_shadow);
- release_mem_object_safe(flag_sd);
- release_mem_object_safe(flag_sd_DL_shadow);
- release_mem_object_safe(prim_sd);
- release_mem_object_safe(prim_sd_DL_shadow);
- release_mem_object_safe(type_sd);
- release_mem_object_safe(type_sd_DL_shadow);
- release_mem_object_safe(u_sd);
- release_mem_object_safe(u_sd_DL_shadow);
- release_mem_object_safe(v_sd);
- release_mem_object_safe(v_sd_DL_shadow);
- release_mem_object_safe(object_sd);
- release_mem_object_safe(object_sd_DL_shadow);
- release_mem_object_safe(time_sd);
- release_mem_object_safe(time_sd_DL_shadow);
- release_mem_object_safe(ray_length_sd);
- release_mem_object_safe(ray_length_sd_DL_shadow);
- release_mem_object_safe(ray_depth_sd);
- release_mem_object_safe(ray_depth_sd_DL_shadow);
- release_mem_object_safe(transparent_depth_sd);
- release_mem_object_safe(transparent_depth_sd_DL_shadow);
-
- /* Ray differentials. */
- release_mem_object_safe(dP_sd);
- release_mem_object_safe(dP_sd_DL_shadow);
- release_mem_object_safe(dI_sd);
- release_mem_object_safe(dI_sd_DL_shadow);
- release_mem_object_safe(du_sd);
- release_mem_object_safe(du_sd_DL_shadow);
- release_mem_object_safe(dv_sd);
- release_mem_object_safe(dv_sd_DL_shadow);
-
- /* Dp/Du */
- release_mem_object_safe(dPdu_sd);
- release_mem_object_safe(dPdu_sd_DL_shadow);
- release_mem_object_safe(dPdv_sd);
- release_mem_object_safe(dPdv_sd_DL_shadow);
-
- /* Object motion. */
- release_mem_object_safe(ob_tfm_sd);
- release_mem_object_safe(ob_itfm_sd);
-
- release_mem_object_safe(ob_tfm_sd_DL_shadow);
- release_mem_object_safe(ob_itfm_sd_DL_shadow);
-
- release_mem_object_safe(closure_sd);
- release_mem_object_safe(closure_sd_DL_shadow);
- release_mem_object_safe(num_closure_sd);
- release_mem_object_safe(num_closure_sd_DL_shadow);
- release_mem_object_safe(randb_closure_sd);
- release_mem_object_safe(randb_closure_sd_DL_shadow);
- release_mem_object_safe(ray_P_sd);
- release_mem_object_safe(ray_P_sd_DL_shadow);
- release_mem_object_safe(ray_dP_sd);
- release_mem_object_safe(ray_dP_sd_DL_shadow);
release_mem_object_safe(rng_coop);
release_mem_object_safe(throughput_coop);
release_mem_object_safe(L_transparent_coop);
@@ -2488,8 +2284,7 @@ public:
release_mem_object_safe(BSDFEval_coop);
release_mem_object_safe(ISLamp_coop);
release_mem_object_safe(LightRay_coop);
- release_mem_object_safe(Intersection_coop_AO);
- release_mem_object_safe(Intersection_coop_DL);
+ release_mem_object_safe(Intersection_coop_shadow);
#ifdef WITH_CYCLES_DEBUG
release_mem_object_safe(debugdata_coop);
#endif
@@ -2586,7 +2381,7 @@ public:
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- size_t ShaderClosure_size = get_shader_closure_size(current_max_closure);
+ size_t shaderdata_size = get_shader_data_size(current_max_closure);
#ifdef __WORK_STEALING__
/* Calculate max groups */
@@ -2607,71 +2402,8 @@ public:
kgbuffer = mem_alloc(get_KernelGlobals_size());
/* Create global buffers for ShaderData. */
- sd = mem_alloc(get_shaderdata_soa_size());
- sd_DL_shadow = mem_alloc(get_shaderdata_soa_size());
- P_sd = mem_alloc(num_global_elements * sizeof(float3));
- P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- N_sd = mem_alloc(num_global_elements * sizeof(float3));
- N_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- Ng_sd = mem_alloc(num_global_elements * sizeof(float3));
- Ng_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- I_sd = mem_alloc(num_global_elements * sizeof(float3));
- I_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- shader_sd = mem_alloc(num_global_elements * sizeof(int));
- shader_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- flag_sd = mem_alloc(num_global_elements * sizeof(int));
- flag_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- prim_sd = mem_alloc(num_global_elements * sizeof(int));
- prim_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- type_sd = mem_alloc(num_global_elements * sizeof(int));
- type_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- u_sd = mem_alloc(num_global_elements * sizeof(float));
- u_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- v_sd = mem_alloc(num_global_elements * sizeof(float));
- v_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- object_sd = mem_alloc(num_global_elements * sizeof(int));
- object_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- time_sd = mem_alloc(num_global_elements * sizeof(float));
- time_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_length_sd = mem_alloc(num_global_elements * sizeof(float));
- ray_length_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_depth_sd = mem_alloc(num_global_elements * sizeof(int));
- ray_depth_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- transparent_depth_sd = mem_alloc(num_global_elements * sizeof(int));
- transparent_depth_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
-
- /* Ray differentials. */
- dP_sd = mem_alloc(num_global_elements * sizeof(differential3));
- dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
- dI_sd = mem_alloc(num_global_elements * sizeof(differential3));
- dI_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
- du_sd = mem_alloc(num_global_elements * sizeof(differential));
- du_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential));
- dv_sd = mem_alloc(num_global_elements * sizeof(differential));
- dv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential));
-
- /* Dp/Du */
- dPdu_sd = mem_alloc(num_global_elements * sizeof(float3));
- dPdu_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- dPdv_sd = mem_alloc(num_global_elements * sizeof(float3));
- dPdv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
-
- /* Object motion. */
- ob_tfm_sd = mem_alloc(num_global_elements * sizeof(Transform));
- ob_tfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform));
- ob_itfm_sd = mem_alloc(num_global_elements * sizeof(Transform));
- ob_itfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform));
-
- closure_sd = mem_alloc(num_global_elements * ShaderClosure_size);
- closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * ShaderClosure_size);
- num_closure_sd = mem_alloc(num_global_elements * sizeof(int));
- num_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- randb_closure_sd = mem_alloc(num_global_elements * sizeof(float));
- randb_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_P_sd = mem_alloc(num_global_elements * sizeof(float3));
- ray_P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- ray_dP_sd = mem_alloc(num_global_elements * sizeof(differential3));
- ray_dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
+ sd = mem_alloc(num_global_elements * shaderdata_size);
+ sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size);
/* Creation of global memory buffers which are shared among
* the kernels.
@@ -2689,8 +2421,7 @@ public:
BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval));
ISLamp_coop = mem_alloc(num_global_elements * sizeof(int));
LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
- Intersection_coop_AO = mem_alloc(num_global_elements * sizeof(Intersection));
- Intersection_coop_DL = mem_alloc(num_global_elements * sizeof(Intersection));
+ Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection));
#ifdef WITH_CYCLES_DEBUG
debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData));
@@ -2708,89 +2439,12 @@ public:
}
cl_int dQueue_size = global_size[0] * global_size[1];
- cl_int total_num_rays = global_size[0] * global_size[1];
cl_uint start_arg_index =
kernel_set_args(ckPathTraceKernel_data_init,
0,
kgbuffer,
- sd,
sd_DL_shadow,
- P_sd,
- P_sd_DL_shadow,
- N_sd,
- N_sd_DL_shadow,
- Ng_sd,
- Ng_sd_DL_shadow,
- I_sd,
- I_sd_DL_shadow,
- shader_sd,
- shader_sd_DL_shadow,
- flag_sd,
- flag_sd_DL_shadow,
- prim_sd,
- prim_sd_DL_shadow,
- type_sd,
- type_sd_DL_shadow,
- u_sd,
- u_sd_DL_shadow,
- v_sd,
- v_sd_DL_shadow,
- object_sd,
- object_sd_DL_shadow,
- time_sd,
- time_sd_DL_shadow,
- ray_length_sd,
- ray_length_sd_DL_shadow,
- ray_depth_sd,
- ray_depth_sd_DL_shadow,
- transparent_depth_sd,
- transparent_depth_sd_DL_shadow);
-
- /* Ray differentials. */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- dP_sd,
- dP_sd_DL_shadow,
- dI_sd,
- dI_sd_DL_shadow,
- du_sd,
- du_sd_DL_shadow,
- dv_sd,
- dv_sd_DL_shadow);
-
- /* Dp/Du */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- dPdu_sd,
- dPdu_sd_DL_shadow,
- dPdv_sd,
- dPdv_sd_DL_shadow);
-
- /* Object motion. */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- ob_tfm_sd,
- ob_tfm_sd_DL_shadow,
- ob_itfm_sd,
- ob_itfm_sd_DL_shadow);
-
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- closure_sd,
- closure_sd_DL_shadow,
- num_closure_sd,
- num_closure_sd_DL_shadow,
- randb_closure_sd,
- randb_closure_sd_DL_shadow,
- ray_P_sd,
- ray_P_sd_DL_shadow,
- ray_dP_sd,
- ray_dP_sd_DL_shadow,
d_data,
per_sample_output_buffers,
d_rng_state,
@@ -2800,9 +2454,10 @@ public:
PathRadiance_coop,
Ray_coop,
PathState_coop,
+ Intersection_coop_shadow,
ray_state);
-/* TODO(segrey): Avoid map lookup here. */
+/* TODO(sergey): Avoid map lookup here. */
#define KERNEL_TEX(type, ttype, name) \
set_kernel_arg_mem(ckPathTraceKernel_data_init, &start_arg_index, #name);
#include "kernel_textures.h"
@@ -2859,7 +2514,6 @@ public:
0,
kgbuffer,
d_data,
- sd,
throughput_coop,
PathRadiance_coop,
Ray_coop,
@@ -2885,7 +2539,6 @@ public:
0,
kgbuffer,
d_data,
- sd,
per_sample_output_buffers,
d_rng_state,
rng_coop,
@@ -2967,7 +2620,6 @@ public:
kgbuffer,
d_data,
sd,
- sd_DL_shadow,
rng_coop,
PathState_coop,
ISLamp_coop,
@@ -2982,17 +2634,13 @@ public:
0,
kgbuffer,
d_data,
- sd_DL_shadow,
PathState_coop,
LightRay_coop,
AOLightRay_coop,
- Intersection_coop_AO,
- Intersection_coop_DL,
ray_state,
Queue_data,
Queue_index,
- dQueue_size,
- total_num_rays);
+ dQueue_size);
kernel_set_args(ckPathTraceKernel_next_iteration_setup,
0,
@@ -3162,16 +2810,12 @@ public:
{
size_t total_invariable_mem_allocated = 0;
size_t KernelGlobals_size = 0;
- size_t ShaderData_SOA_size = 0;
KernelGlobals_size = get_KernelGlobals_size();
- ShaderData_SOA_size = get_shaderdata_soa_size();
total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */
total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */
total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */
- total_invariable_mem_allocated += ShaderData_SOA_size; /* sd size */
- total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_DL_shadow size */
return total_invariable_mem_allocated;
}
@@ -3238,13 +2882,11 @@ public:
/* Calculate the memory required for one thread in split kernel. */
size_t get_per_thread_memory()
{
- size_t shader_closure_size = 0;
- size_t shaderdata_volume = 0;
- shader_closure_size = get_shader_closure_size(current_max_closure);
+ size_t shaderdata_size = 0;
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- shaderdata_volume = get_shader_data_size(shader_closure_size);
+ shaderdata_size = get_shader_data_size(current_max_closure);
size_t retval = sizeof(RNG)
+ sizeof(float3) /* Throughput size */
+ sizeof(float) /* L transparent size */
@@ -3255,8 +2897,8 @@ public:
+ sizeof(Intersection) /* Overall isect */
+ sizeof(Intersection) /* Instersection_coop_AO */
+ sizeof(Intersection) /* Intersection coop DL */
- + shaderdata_volume /* Overall ShaderData */
- + (shaderdata_volume * 2) /* ShaderData : DL and shadow */
+ + shaderdata_size /* Overall ShaderData */
+ + (shaderdata_size * 2) /* ShaderData : DL and shadow */
+ sizeof(Ray) + sizeof(BsdfEval)
+ sizeof(float3) /* AOAlpha size */
+ sizeof(float3) /* AOBSDF size */
@@ -3580,7 +3222,7 @@ protected:
string build_options_for_base_program(
const DeviceRequestedFeatures& requested_features)
{
- return build_options_from_requested_features(requested_features);
+ return requested_features.get_build_options();
}
};
@@ -3690,7 +3332,7 @@ string device_opencl_capabilities(void)
APPEND_STRING_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
vector<cl_device_id> device_ids;
- for (cl_uint platform = 0; platform < num_platforms; ++platform) {
+ for(cl_uint platform = 0; platform < num_platforms; ++platform) {
cl_platform_id platform_id = platform_ids[platform];
result += string_printf("Platform #%u\n", platform);
@@ -3715,7 +3357,7 @@ string device_opencl_capabilities(void)
num_devices,
&device_ids[0],
NULL));
- for (cl_uint device = 0; device < num_devices; ++device) {
+ for(cl_uint device = 0; device < num_devices; ++device) {
cl_device_id device_id = device_ids[device];
result += string_printf("\t\tDevice: #%u\n", device);