Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-10-29 18:56:27 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-01-30 02:23:06 +0300
commit9815f8a623d47c9a52aac8ac3a2bcc17d1c74b5e (patch)
treec42a1bbdd4f070802932bce88c11be8c8732b930
parentfef53c74b5520fe6404d581a3c15fad4177f29b7 (diff)
Cycles: Cleanup of OpenCL split kernel routines
The idea is to switch from allocating separate buffers for shader data's structure of arrays to allocating one huge memory block and do some index trickery to make it accessed as SOA. This saves quite reasonable amount of lines of code in device_opencl and also makes it possible to get rid of special declaration of ShaderData structure. As a side effect it also makes it easier to experiment with SOA vs. AOS for split kernel. Works fine here on NVidia GTX580, Intel CPU amd AMD Fiji cards. Reviewers: #cycles, brecht, juicyfruit, dingto Differential Revision: https://developer.blender.org/D1593
-rw-r--r--intern/cycles/device/device_opencl.cpp351
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/kernel_shaderdata_vars.h94
-rw-r--r--intern/cycles/kernel/kernel_shadow.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h97
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl147
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h166
7 files changed, 100 insertions, 758 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index e612b9f581f..c41da549c61 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1908,59 +1908,6 @@ public:
* shadow_blocked kernel.
*/
- /* Global buffers of each member of ShaderData. */
- cl_mem P_sd;
- cl_mem P_sd_DL_shadow;
- cl_mem N_sd;
- cl_mem N_sd_DL_shadow;
- cl_mem Ng_sd;
- cl_mem Ng_sd_DL_shadow;
- cl_mem I_sd;
- cl_mem I_sd_DL_shadow;
- cl_mem shader_sd;
- cl_mem shader_sd_DL_shadow;
- cl_mem flag_sd;
- cl_mem flag_sd_DL_shadow;
- cl_mem prim_sd;
- cl_mem prim_sd_DL_shadow;
- cl_mem type_sd;
- cl_mem type_sd_DL_shadow;
- cl_mem u_sd;
- cl_mem u_sd_DL_shadow;
- cl_mem v_sd;
- cl_mem v_sd_DL_shadow;
- cl_mem object_sd;
- cl_mem object_sd_DL_shadow;
- cl_mem time_sd;
- cl_mem time_sd_DL_shadow;
- cl_mem ray_length_sd;
- cl_mem ray_length_sd_DL_shadow;
-
- /* Ray differentials. */
- cl_mem dP_sd, dI_sd;
- cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow;
- cl_mem du_sd, dv_sd;
- cl_mem du_sd_DL_shadow, dv_sd_DL_shadow;
-
- /* Dp/Du */
- cl_mem dPdu_sd, dPdv_sd;
- cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow;
-
- /* Object motion. */
- cl_mem ob_tfm_sd, ob_itfm_sd;
- cl_mem ob_tfm_sd_DL_shadow, ob_itfm_sd_DL_shadow;
-
- cl_mem closure_sd;
- cl_mem closure_sd_DL_shadow;
- cl_mem num_closure_sd;
- cl_mem num_closure_sd_DL_shadow;
- cl_mem randb_closure_sd;
- cl_mem randb_closure_sd_DL_shadow;
- cl_mem ray_P_sd;
- cl_mem ray_P_sd_DL_shadow;
- cl_mem ray_dP_sd;
- cl_mem ray_dP_sd_DL_shadow;
-
/* Global memory required for shadow blocked and accum_radiance. */
cl_mem BSDFEval_coop;
cl_mem ISLamp_coop;
@@ -2057,66 +2004,6 @@ public:
sd = NULL;
sd_DL_shadow = NULL;
- P_sd = NULL;
- P_sd_DL_shadow = NULL;
- N_sd = NULL;
- N_sd_DL_shadow = NULL;
- Ng_sd = NULL;
- Ng_sd_DL_shadow = NULL;
- I_sd = NULL;
- I_sd_DL_shadow = NULL;
- shader_sd = NULL;
- shader_sd_DL_shadow = NULL;
- flag_sd = NULL;
- flag_sd_DL_shadow = NULL;
- prim_sd = NULL;
- prim_sd_DL_shadow = NULL;
- type_sd = NULL;
- type_sd_DL_shadow = NULL;
- u_sd = NULL;
- u_sd_DL_shadow = NULL;
- v_sd = NULL;
- v_sd_DL_shadow = NULL;
- object_sd = NULL;
- object_sd_DL_shadow = NULL;
- time_sd = NULL;
- time_sd_DL_shadow = NULL;
- ray_length_sd = NULL;
- ray_length_sd_DL_shadow = NULL;
-
- /* Ray differentials. */
- dP_sd = NULL;
- dI_sd = NULL;
- dP_sd_DL_shadow = NULL;
- dI_sd_DL_shadow = NULL;
- du_sd = NULL;
- dv_sd = NULL;
- du_sd_DL_shadow = NULL;
- dv_sd_DL_shadow = NULL;
-
- /* Dp/Du */
- dPdu_sd = NULL;
- dPdv_sd = NULL;
- dPdu_sd_DL_shadow = NULL;
- dPdv_sd_DL_shadow = NULL;
-
- /* Object motion. */
- ob_tfm_sd = NULL;
- ob_itfm_sd = NULL;
- ob_tfm_sd_DL_shadow = NULL;
- ob_itfm_sd_DL_shadow = NULL;
-
- closure_sd = NULL;
- closure_sd_DL_shadow = NULL;
- num_closure_sd = NULL;
- num_closure_sd_DL_shadow = NULL;
- randb_closure_sd = NULL;
- randb_closure_sd_DL_shadow = NULL;
- ray_P_sd = NULL;
- ray_P_sd_DL_shadow = NULL;
- ray_dP_sd = NULL;
- ray_dP_sd_DL_shadow = NULL;
-
rng_coop = NULL;
throughput_coop = NULL;
L_transparent_coop = NULL;
@@ -2232,17 +2119,10 @@ public:
return ret_size;
}
- size_t get_shader_closure_size(int max_closure)
- {
- return (sizeof(ShaderClosure) * max_closure);
- }
-
- size_t get_shader_data_size(size_t shader_closure_size)
+ size_t get_shader_data_size(size_t max_closure)
{
- /* ShaderData size without accounting for ShaderClosure array. */
- size_t shader_data_size =
- sizeof(ShaderData) - (sizeof(ShaderClosure) * MAX_CLOSURE);
- return (shader_data_size + shader_closure_size);
+ /* ShaderData size with variable size ShaderClosure array */
+ return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure));
}
/* Returns size of KernelGlobals structure associated with OpenCL. */
@@ -2264,20 +2144,6 @@ public:
return sizeof(KernelGlobals);
}
- /* Returns size of Structure of arrays implementation of. */
- size_t get_shaderdata_soa_size()
- {
- size_t shader_soa_size = 0;
-
-#define SD_VAR(type, what) shader_soa_size += sizeof(void *);
-#define SD_CLOSURE_VAR(type, what, max_closure) shader_soa_size += sizeof(void *);
- #include "kernel_shaderdata_vars.h"
-#undef SD_VAR
-#undef SD_CLOSURE_VAR
-
- return shader_soa_size;
- }
-
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
/* Get Shader, bake and film_convert kernels.
@@ -2398,66 +2264,6 @@ public:
release_kernel_safe(ckPathTraceKernel_sum_all_radiance);
/* Release global memory */
- release_mem_object_safe(P_sd);
- release_mem_object_safe(P_sd_DL_shadow);
- release_mem_object_safe(N_sd);
- release_mem_object_safe(N_sd_DL_shadow);
- release_mem_object_safe(Ng_sd);
- release_mem_object_safe(Ng_sd_DL_shadow);
- release_mem_object_safe(I_sd);
- release_mem_object_safe(I_sd_DL_shadow);
- release_mem_object_safe(shader_sd);
- release_mem_object_safe(shader_sd_DL_shadow);
- release_mem_object_safe(flag_sd);
- release_mem_object_safe(flag_sd_DL_shadow);
- release_mem_object_safe(prim_sd);
- release_mem_object_safe(prim_sd_DL_shadow);
- release_mem_object_safe(type_sd);
- release_mem_object_safe(type_sd_DL_shadow);
- release_mem_object_safe(u_sd);
- release_mem_object_safe(u_sd_DL_shadow);
- release_mem_object_safe(v_sd);
- release_mem_object_safe(v_sd_DL_shadow);
- release_mem_object_safe(object_sd);
- release_mem_object_safe(object_sd_DL_shadow);
- release_mem_object_safe(time_sd);
- release_mem_object_safe(time_sd_DL_shadow);
- release_mem_object_safe(ray_length_sd);
- release_mem_object_safe(ray_length_sd_DL_shadow);
-
- /* Ray differentials. */
- release_mem_object_safe(dP_sd);
- release_mem_object_safe(dP_sd_DL_shadow);
- release_mem_object_safe(dI_sd);
- release_mem_object_safe(dI_sd_DL_shadow);
- release_mem_object_safe(du_sd);
- release_mem_object_safe(du_sd_DL_shadow);
- release_mem_object_safe(dv_sd);
- release_mem_object_safe(dv_sd_DL_shadow);
-
- /* Dp/Du */
- release_mem_object_safe(dPdu_sd);
- release_mem_object_safe(dPdu_sd_DL_shadow);
- release_mem_object_safe(dPdv_sd);
- release_mem_object_safe(dPdv_sd_DL_shadow);
-
- /* Object motion. */
- release_mem_object_safe(ob_tfm_sd);
- release_mem_object_safe(ob_itfm_sd);
-
- release_mem_object_safe(ob_tfm_sd_DL_shadow);
- release_mem_object_safe(ob_itfm_sd_DL_shadow);
-
- release_mem_object_safe(closure_sd);
- release_mem_object_safe(closure_sd_DL_shadow);
- release_mem_object_safe(num_closure_sd);
- release_mem_object_safe(num_closure_sd_DL_shadow);
- release_mem_object_safe(randb_closure_sd);
- release_mem_object_safe(randb_closure_sd_DL_shadow);
- release_mem_object_safe(ray_P_sd);
- release_mem_object_safe(ray_P_sd_DL_shadow);
- release_mem_object_safe(ray_dP_sd);
- release_mem_object_safe(ray_dP_sd_DL_shadow);
release_mem_object_safe(rng_coop);
release_mem_object_safe(throughput_coop);
release_mem_object_safe(L_transparent_coop);
@@ -2572,7 +2378,7 @@ public:
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- size_t ShaderClosure_size = get_shader_closure_size(current_max_closure);
+ size_t shaderdata_size = get_shader_data_size(current_max_closure);
#ifdef __WORK_STEALING__
/* Calculate max groups */
@@ -2593,67 +2399,8 @@ public:
kgbuffer = mem_alloc(get_KernelGlobals_size());
/* Create global buffers for ShaderData. */
- sd = mem_alloc(get_shaderdata_soa_size());
- sd_DL_shadow = mem_alloc(get_shaderdata_soa_size());
- P_sd = mem_alloc(num_global_elements * sizeof(float3));
- P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- N_sd = mem_alloc(num_global_elements * sizeof(float3));
- N_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- Ng_sd = mem_alloc(num_global_elements * sizeof(float3));
- Ng_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- I_sd = mem_alloc(num_global_elements * sizeof(float3));
- I_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- shader_sd = mem_alloc(num_global_elements * sizeof(int));
- shader_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- flag_sd = mem_alloc(num_global_elements * sizeof(int));
- flag_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- prim_sd = mem_alloc(num_global_elements * sizeof(int));
- prim_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- type_sd = mem_alloc(num_global_elements * sizeof(int));
- type_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- u_sd = mem_alloc(num_global_elements * sizeof(float));
- u_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- v_sd = mem_alloc(num_global_elements * sizeof(float));
- v_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- object_sd = mem_alloc(num_global_elements * sizeof(int));
- object_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- time_sd = mem_alloc(num_global_elements * sizeof(float));
- time_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_length_sd = mem_alloc(num_global_elements * sizeof(float));
- ray_length_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
-
- /* Ray differentials. */
- dP_sd = mem_alloc(num_global_elements * sizeof(differential3));
- dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
- dI_sd = mem_alloc(num_global_elements * sizeof(differential3));
- dI_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
- du_sd = mem_alloc(num_global_elements * sizeof(differential));
- du_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential));
- dv_sd = mem_alloc(num_global_elements * sizeof(differential));
- dv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential));
-
- /* Dp/Du */
- dPdu_sd = mem_alloc(num_global_elements * sizeof(float3));
- dPdu_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- dPdv_sd = mem_alloc(num_global_elements * sizeof(float3));
- dPdv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
-
- /* Object motion. */
- ob_tfm_sd = mem_alloc(num_global_elements * sizeof(Transform));
- ob_tfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform));
- ob_itfm_sd = mem_alloc(num_global_elements * sizeof(Transform));
- ob_itfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform));
-
- closure_sd = mem_alloc(num_global_elements * ShaderClosure_size);
- closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * ShaderClosure_size);
- num_closure_sd = mem_alloc(num_global_elements * sizeof(int));
- num_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- randb_closure_sd = mem_alloc(num_global_elements * sizeof(float));
- randb_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_P_sd = mem_alloc(num_global_elements * sizeof(float3));
- ray_P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- ray_dP_sd = mem_alloc(num_global_elements * sizeof(differential3));
- ray_dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
+ sd = mem_alloc(num_global_elements * shaderdata_size);
+ sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size);
/* Creation of global memory buffers which are shared among
* the kernels.
@@ -2694,79 +2441,7 @@ public:
kernel_set_args(ckPathTraceKernel_data_init,
0,
kgbuffer,
- sd,
sd_DL_shadow,
- P_sd,
- P_sd_DL_shadow,
- N_sd,
- N_sd_DL_shadow,
- Ng_sd,
- Ng_sd_DL_shadow,
- I_sd,
- I_sd_DL_shadow,
- shader_sd,
- shader_sd_DL_shadow,
- flag_sd,
- flag_sd_DL_shadow,
- prim_sd,
- prim_sd_DL_shadow,
- type_sd,
- type_sd_DL_shadow,
- u_sd,
- u_sd_DL_shadow,
- v_sd,
- v_sd_DL_shadow,
- object_sd,
- object_sd_DL_shadow,
- time_sd,
- time_sd_DL_shadow,
- ray_length_sd,
- ray_length_sd_DL_shadow);
-
- /* Ray differentials. */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- dP_sd,
- dP_sd_DL_shadow,
- dI_sd,
- dI_sd_DL_shadow,
- du_sd,
- du_sd_DL_shadow,
- dv_sd,
- dv_sd_DL_shadow);
-
- /* Dp/Du */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- dPdu_sd,
- dPdu_sd_DL_shadow,
- dPdv_sd,
- dPdv_sd_DL_shadow);
-
- /* Object motion. */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- ob_tfm_sd,
- ob_tfm_sd_DL_shadow,
- ob_itfm_sd,
- ob_itfm_sd_DL_shadow);
-
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- closure_sd,
- closure_sd_DL_shadow,
- num_closure_sd,
- num_closure_sd_DL_shadow,
- randb_closure_sd,
- randb_closure_sd_DL_shadow,
- ray_P_sd,
- ray_P_sd_DL_shadow,
- ray_dP_sd,
- ray_dP_sd_DL_shadow,
d_data,
per_sample_output_buffers,
d_rng_state,
@@ -3132,16 +2807,12 @@ public:
{
size_t total_invariable_mem_allocated = 0;
size_t KernelGlobals_size = 0;
- size_t ShaderData_SOA_size = 0;
KernelGlobals_size = get_KernelGlobals_size();
- ShaderData_SOA_size = get_shaderdata_soa_size();
total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */
total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */
total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */
- total_invariable_mem_allocated += ShaderData_SOA_size; /* sd size */
- total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_DL_shadow size */
return total_invariable_mem_allocated;
}
@@ -3208,13 +2879,11 @@ public:
/* Calculate the memory required for one thread in split kernel. */
size_t get_per_thread_memory()
{
- size_t shader_closure_size = 0;
- size_t shaderdata_volume = 0;
- shader_closure_size = get_shader_closure_size(current_max_closure);
+ size_t shaderdata_size = 0;
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- shaderdata_volume = get_shader_data_size(shader_closure_size);
+ shaderdata_size = get_shader_data_size(current_max_closure);
size_t retval = sizeof(RNG)
+ sizeof(float3) /* Throughput size */
+ sizeof(float) /* L transparent size */
@@ -3225,8 +2894,8 @@ public:
+ sizeof(Intersection) /* Overall isect */
+ sizeof(Intersection) /* Instersection_coop_AO */
+ sizeof(Intersection) /* Intersection coop DL */
- + shaderdata_volume /* Overall ShaderData */
- + (shaderdata_volume * 2) /* ShaderData : DL and shadow */
+ + shaderdata_size /* Overall ShaderData */
+ + (shaderdata_size * 2) /* ShaderData : DL and shadow */
+ sizeof(Ray) + sizeof(BsdfEval)
+ sizeof(float3) /* AOAlpha size */
+ sizeof(float3) /* AOBSDF size */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 5249416e838..b5b0b1f3fae 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -55,7 +55,6 @@ set(SRC_HEADERS
kernel_queues.h
kernel_random.h
kernel_shader.h
- kernel_shaderdata_vars.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
diff --git a/intern/cycles/kernel/kernel_shaderdata_vars.h b/intern/cycles/kernel/kernel_shaderdata_vars.h
deleted file mode 100644
index 3f63f94912e..00000000000
--- a/intern/cycles/kernel/kernel_shaderdata_vars.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-* Copyright 2011-2015 Blender Foundation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#ifndef SD_VAR
-#define SD_VAR(type, what)
-#endif
-#ifndef SD_CLOSURE_VAR
-#define SD_CLOSURE_VAR(type, what, max_closure)
-#endif
-
-/* position */
-SD_VAR(float3, P)
-/* smooth normal for shading */
-SD_VAR(float3, N)
-/* true geometric normal */
-SD_VAR(float3, Ng)
-/* view/incoming direction */
-SD_VAR(float3, I)
-/* shader id */
-SD_VAR(int, shader)
-/* booleans describing shader, see ShaderDataFlag */
-SD_VAR(int, flag)
-
-/* primitive id if there is one, ~0 otherwise */
-SD_VAR(int, prim)
-
-/* combined type and curve segment for hair */
-SD_VAR(int, type)
-
-/* parametric coordinates
-* - barycentric weights for triangles */
-SD_VAR(float, u)
-SD_VAR(float, v)
-/* object id if there is one, ~0 otherwise */
-SD_VAR(int, object)
-
-/* motion blur sample time */
-SD_VAR(float, time)
-
-/* length of the ray being shaded */
-SD_VAR(float, ray_length)
-
-#ifdef __RAY_DIFFERENTIALS__
-/* differential of P. these are orthogonal to Ng, not N */
-SD_VAR(differential3, dP)
-/* differential of I */
-SD_VAR(differential3, dI)
-/* differential of u, v */
-SD_VAR(differential, du)
-SD_VAR(differential, dv)
-#endif
-#ifdef __DPDU__
-/* differential of P w.r.t. parametric coordinates. note that dPdu is
-* not readily suitable as a tangent for shading on triangles. */
-SD_VAR(float3, dPdu)
-SD_VAR(float3, dPdv)
-#endif
-
-#ifdef __OBJECT_MOTION__
-/* object <-> world space transformations, cached to avoid
-* re-interpolating them constantly for shading */
-SD_VAR(Transform, ob_tfm)
-SD_VAR(Transform, ob_itfm)
-#endif
-
-/* Closure data, we store a fixed array of closures */
-SD_CLOSURE_VAR(ShaderClosure, closure, MAX_CLOSURE)
-SD_VAR(int, num_closure)
-SD_VAR(float, randb_closure)
-
-/* ray start position, only set for backgrounds */
-SD_VAR(float3, ray_P)
-SD_VAR(differential3, ray_dP)
-
-#ifdef __OSL__
-SD_VAR(struct KernelGlobals *, osl_globals)
-SD_VAR(struct PathState *, osl_path_state)
-#endif
-
-#undef SD_VAR
-#undef SD_CLOSURE_VAR
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index aefa5d5be85..3b1111e5069 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -201,7 +201,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#endif
#ifdef __SPLIT_KERNEL__
- Intersection *isect = &kg->isect_shadow[TIDX];
+ Intersection *isect = &kg->isect_shadow[SD_THREAD];
#else
Intersection isect_object;
Intersection *isect = &isect_object;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index da2416bc09b..49d8859c7de 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -718,22 +718,99 @@ enum ShaderDataFlag {
struct KernelGlobals;
#ifdef __SPLIT_KERNEL__
-#define SD_VAR(type, what) ccl_global type *what;
-#define SD_CLOSURE_VAR(type, what, max_closure) type *what;
-#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0))
-#define ccl_fetch(s, t) (s->t[TIDX])
-#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index])
+# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
+# if defined(__SPLIT_KERNEL_AOS__)
+ /* ShaderData is stored as an Array-of-Structures */
+# define ccl_fetch(s, t) (s[SD_THREAD].t)
+# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].t[index])
+# else
+ /* ShaderData is stored as an Structure-of-Arrays */
+# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
+# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
+# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
+# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(t) + SD_FIELD_SIZE(t) * SD_THREAD - SD_OFFSETOF(t)))->t)
+# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index])
+# endif
#else
-#define SD_VAR(type, what) type what;
-#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure];
-#define ccl_fetch(s, t) (s->t)
-#define ccl_fetch_array(s, t, index) (&s->t[index])
+# define ccl_fetch(s, t) (s->t)
+# define ccl_fetch_array(s, t, index) (&s->t[index])
#endif
typedef ccl_addr_space struct ShaderData {
+ /* position */
+ float3 P;
+ /* smooth normal for shading */
+ float3 N;
+ /* true geometric normal */
+ float3 Ng;
+ /* view/incoming direction */
+ float3 I;
+ /* shader id */
+ int shader;
+ /* booleans describing shader, see ShaderDataFlag */
+ int flag;
+
+ /* primitive id if there is one, ~0 otherwise */
+ int prim;
+
+ /* combined type and curve segment for hair */
+ int type;
+
+ /* parametric coordinates
+ * - barycentric weights for triangles */
+ float u;
+ float v;
+ /* object id if there is one, ~0 otherwise */
+ int object;
+
+ /* motion blur sample time */
+ float time;
+
+ /* length of the ray being shaded */
+ float ray_length;
-#include "kernel_shaderdata_vars.h"
+ /* ray bounce depth */
+ int ray_depth;
+ /* ray transparent depth */
+ int transparent_depth;
+
+#ifdef __RAY_DIFFERENTIALS__
+ /* differential of P. these are orthogonal to Ng, not N */
+ differential3 dP;
+ /* differential of I */
+ differential3 dI;
+ /* differential of u, v */
+ differential du;
+ differential dv;
+#endif
+#ifdef __DPDU__
+ /* differential of P w.r.t. parametric coordinates. note that dPdu is
+ * not readily suitable as a tangent for shading on triangles. */
+ float3 dPdu;
+ float3 dPdv;
+#endif
+
+#ifdef __OBJECT_MOTION__
+ /* object <-> world space transformations, cached to avoid
+ * re-interpolating them constantly for shading */
+ Transform ob_tfm;
+ Transform ob_itfm;
+#endif
+
+ /* Closure data, we store a fixed array of closures */
+ struct ShaderClosure closure[MAX_CLOSURE];
+ int num_closure;
+ float randb_closure;
+
+ /* ray start position, only set for backgrounds */
+ float3 ray_P;
+ differential3 ray_dP;
+
+#ifdef __OSL__
+ struct KernelGlobals * osl_globals;
+ struct PathState *osl_path_state;
+#endif
} ShaderData;
/* Path State */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index 401c4467afa..18139687eab 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -17,91 +17,8 @@
#include "split/kernel_data_init.h"
__kernel void kernel_ocl_path_trace_data_init(
- ccl_global char *kg,
- ccl_global char *sd,
+ ccl_global char *globals,
ccl_global char *sd_DL_shadow,
-
- ccl_global float3 *P_sd,
- ccl_global float3 *P_sd_DL_shadow,
-
- ccl_global float3 *N_sd,
- ccl_global float3 *N_sd_DL_shadow,
-
- ccl_global float3 *Ng_sd,
- ccl_global float3 *Ng_sd_DL_shadow,
-
- ccl_global float3 *I_sd,
- ccl_global float3 *I_sd_DL_shadow,
-
- ccl_global int *shader_sd,
- ccl_global int *shader_sd_DL_shadow,
-
- ccl_global int *flag_sd,
- ccl_global int *flag_sd_DL_shadow,
-
- ccl_global int *prim_sd,
- ccl_global int *prim_sd_DL_shadow,
-
- ccl_global int *type_sd,
- ccl_global int *type_sd_DL_shadow,
-
- ccl_global float *u_sd,
- ccl_global float *u_sd_DL_shadow,
-
- ccl_global float *v_sd,
- ccl_global float *v_sd_DL_shadow,
-
- ccl_global int *object_sd,
- ccl_global int *object_sd_DL_shadow,
-
- ccl_global float *time_sd,
- ccl_global float *time_sd_DL_shadow,
-
- ccl_global float *ray_length_sd,
- ccl_global float *ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- ccl_global differential3 *dP_sd,
- ccl_global differential3 *dP_sd_DL_shadow,
-
- ccl_global differential3 *dI_sd,
- ccl_global differential3 *dI_sd_DL_shadow,
-
- ccl_global differential *du_sd,
- ccl_global differential *du_sd_DL_shadow,
-
- ccl_global differential *dv_sd,
- ccl_global differential *dv_sd_DL_shadow,
-
- /* Dp/Du */
- ccl_global float3 *dPdu_sd,
- ccl_global float3 *dPdu_sd_DL_shadow,
-
- ccl_global float3 *dPdv_sd,
- ccl_global float3 *dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ccl_global Transform *ob_tfm_sd,
- ccl_global Transform *ob_tfm_sd_DL_shadow,
-
- ccl_global Transform *ob_itfm_sd,
- ccl_global Transform *ob_itfm_sd_DL_shadow,
-
- ShaderClosure *closure_sd,
- ShaderClosure *closure_sd_DL_shadow,
-
- ccl_global int *num_closure_sd,
- ccl_global int *num_closure_sd_DL_shadow,
-
- ccl_global float *randb_closure_sd,
- ccl_global float *randb_closure_sd_DL_shadow,
-
- ccl_global float3 *ray_P_sd,
- ccl_global float3 *ray_P_sd_DL_shadow,
-
- ccl_global differential3 *ray_dP_sd,
- ccl_global differential3 *ray_dP_sd_DL_shadow,
-
ccl_constant KernelData *data,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
@@ -136,68 +53,8 @@ __kernel void kernel_ocl_path_trace_data_init(
#endif
int parallel_samples) /* Number of samples to be processed in parallel */
{
- kernel_data_init((KernelGlobals *)kg,
- (ShaderData *)sd,
+ kernel_data_init((KernelGlobals *)globals,
(ShaderData *)sd_DL_shadow,
- P_sd,
- P_sd_DL_shadow,
- N_sd,
- N_sd_DL_shadow,
- Ng_sd,
- Ng_sd_DL_shadow,
- I_sd,
- I_sd_DL_shadow,
- shader_sd,
- shader_sd_DL_shadow,
- flag_sd,
- flag_sd_DL_shadow,
- prim_sd,
- prim_sd_DL_shadow,
- type_sd,
- type_sd_DL_shadow,
- u_sd,
- u_sd_DL_shadow,
- v_sd,
- v_sd_DL_shadow,
- object_sd,
- object_sd_DL_shadow,
- time_sd,
- time_sd_DL_shadow,
- ray_length_sd,
- ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- dP_sd,
- dP_sd_DL_shadow,
- dI_sd,
- dI_sd_DL_shadow,
- du_sd,
- du_sd_DL_shadow,
- dv_sd,
- dv_sd_DL_shadow,
-
- /* Dp/Du */
- dPdu_sd,
- dPdu_sd_DL_shadow,
- dPdv_sd,
- dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ob_tfm_sd,
- ob_tfm_sd_DL_shadow,
- ob_itfm_sd,
- ob_itfm_sd_DL_shadow,
-
- closure_sd,
- closure_sd_DL_shadow,
- num_closure_sd,
- num_closure_sd_DL_shadow,
- randb_closure_sd,
- randb_closure_sd_DL_shadow,
- ray_P_sd,
- ray_P_sd_DL_shadow,
- ray_dP_sd,
- ray_dP_sd_DL_shadow,
data,
per_sample_output_buffers,
rng_state,
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 6993a78a789..9891391a3a3 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -52,90 +52,7 @@
*/
ccl_device void kernel_data_init(
KernelGlobals *kg,
- ShaderData *sd,
ShaderData *sd_DL_shadow,
-
- ccl_global float3 *P_sd,
- ccl_global float3 *P_sd_DL_shadow,
-
- ccl_global float3 *N_sd,
- ccl_global float3 *N_sd_DL_shadow,
-
- ccl_global float3 *Ng_sd,
- ccl_global float3 *Ng_sd_DL_shadow,
-
- ccl_global float3 *I_sd,
- ccl_global float3 *I_sd_DL_shadow,
-
- ccl_global int *shader_sd,
- ccl_global int *shader_sd_DL_shadow,
-
- ccl_global int *flag_sd,
- ccl_global int *flag_sd_DL_shadow,
-
- ccl_global int *prim_sd,
- ccl_global int *prim_sd_DL_shadow,
-
- ccl_global int *type_sd,
- ccl_global int *type_sd_DL_shadow,
-
- ccl_global float *u_sd,
- ccl_global float *u_sd_DL_shadow,
-
- ccl_global float *v_sd,
- ccl_global float *v_sd_DL_shadow,
-
- ccl_global int *object_sd,
- ccl_global int *object_sd_DL_shadow,
-
- ccl_global float *time_sd,
- ccl_global float *time_sd_DL_shadow,
-
- ccl_global float *ray_length_sd,
- ccl_global float *ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- ccl_global differential3 *dP_sd,
- ccl_global differential3 *dP_sd_DL_shadow,
-
- ccl_global differential3 *dI_sd,
- ccl_global differential3 *dI_sd_DL_shadow,
-
- ccl_global differential *du_sd,
- ccl_global differential *du_sd_DL_shadow,
-
- ccl_global differential *dv_sd,
- ccl_global differential *dv_sd_DL_shadow,
-
- /* Dp/Du */
- ccl_global float3 *dPdu_sd,
- ccl_global float3 *dPdu_sd_DL_shadow,
-
- ccl_global float3 *dPdv_sd,
- ccl_global float3 *dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ccl_global Transform *ob_tfm_sd,
- ccl_global Transform *ob_tfm_sd_DL_shadow,
-
- ccl_global Transform *ob_itfm_sd,
- ccl_global Transform *ob_itfm_sd_DL_shadow,
-
- ShaderClosure *closure_sd,
- ShaderClosure *closure_sd_DL_shadow,
-
- ccl_global int *num_closure_sd,
- ccl_global int *num_closure_sd_DL_shadow,
-
- ccl_global float *randb_closure_sd,
- ccl_global float *randb_closure_sd_DL_shadow,
-
- ccl_global float3 *ray_P_sd,
- ccl_global float3 *ray_P_sd_DL_shadow,
-
- ccl_global differential3 *ray_dP_sd,
- ccl_global differential3 *ray_dP_sd_DL_shadow,
-
ccl_constant KernelData *data,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
@@ -177,89 +94,6 @@ ccl_device void kernel_data_init(
kg->name = name;
#include "../kernel_textures.h"
- sd->P = P_sd;
- sd_DL_shadow->P = P_sd_DL_shadow;
-
- sd->N = N_sd;
- sd_DL_shadow->N = N_sd_DL_shadow;
-
- sd->Ng = Ng_sd;
- sd_DL_shadow->Ng = Ng_sd_DL_shadow;
-
- sd->I = I_sd;
- sd_DL_shadow->I = I_sd_DL_shadow;
-
- sd->shader = shader_sd;
- sd_DL_shadow->shader = shader_sd_DL_shadow;
-
- sd->flag = flag_sd;
- sd_DL_shadow->flag = flag_sd_DL_shadow;
-
- sd->prim = prim_sd;
- sd_DL_shadow->prim = prim_sd_DL_shadow;
-
- sd->type = type_sd;
- sd_DL_shadow->type = type_sd_DL_shadow;
-
- sd->u = u_sd;
- sd_DL_shadow->u = u_sd_DL_shadow;
-
- sd->v = v_sd;
- sd_DL_shadow->v = v_sd_DL_shadow;
-
- sd->object = object_sd;
- sd_DL_shadow->object = object_sd_DL_shadow;
-
- sd->time = time_sd;
- sd_DL_shadow->time = time_sd_DL_shadow;
-
- sd->ray_length = ray_length_sd;
- sd_DL_shadow->ray_length = ray_length_sd_DL_shadow;
-
-#ifdef __RAY_DIFFERENTIALS__
- sd->dP = dP_sd;
- sd_DL_shadow->dP = dP_sd_DL_shadow;
-
- sd->dI = dI_sd;
- sd_DL_shadow->dI = dI_sd_DL_shadow;
-
- sd->du = du_sd;
- sd_DL_shadow->du = du_sd_DL_shadow;
-
- sd->dv = dv_sd;
- sd_DL_shadow->dv = dv_sd_DL_shadow;
-#ifdef __DPDU__
- sd->dPdu = dPdu_sd;
- sd_DL_shadow->dPdu = dPdu_sd_DL_shadow;
-
- sd->dPdv = dPdv_sd;
- sd_DL_shadow->dPdv = dPdv_sd_DL_shadow;
-#endif
-#endif
-
-#ifdef __OBJECT_MOTION__
- sd->ob_tfm = ob_tfm_sd;
- sd_DL_shadow->ob_tfm = ob_tfm_sd_DL_shadow;
-
- sd->ob_itfm = ob_itfm_sd;
- sd_DL_shadow->ob_itfm = ob_itfm_sd_DL_shadow;
-#endif
-
- sd->closure = closure_sd;
- sd_DL_shadow->closure = closure_sd_DL_shadow;
-
- sd->num_closure = num_closure_sd;
- sd_DL_shadow->num_closure = num_closure_sd_DL_shadow;
-
- sd->randb_closure = randb_closure_sd;
- sd_DL_shadow->randb_closure = randb_closure_sd_DL_shadow;
-
- sd->ray_P = ray_P_sd;
- sd_DL_shadow->ray_P = ray_P_sd_DL_shadow;
-
- sd->ray_dP = ray_dP_sd;
- sd_DL_shadow->ray_dP = ray_dP_sd_DL_shadow;
-
int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
#ifdef __WORK_STEALING__