Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_opencl.cpp351
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/kernel_shaderdata_vars.h94
-rw-r--r--intern/cycles/kernel/kernel_shadow.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h97
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl147
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h166
7 files changed, 100 insertions, 758 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index e612b9f581f..c41da549c61 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1908,59 +1908,6 @@ public:
* shadow_blocked kernel.
*/
- /* Global buffers of each member of ShaderData. */
- cl_mem P_sd;
- cl_mem P_sd_DL_shadow;
- cl_mem N_sd;
- cl_mem N_sd_DL_shadow;
- cl_mem Ng_sd;
- cl_mem Ng_sd_DL_shadow;
- cl_mem I_sd;
- cl_mem I_sd_DL_shadow;
- cl_mem shader_sd;
- cl_mem shader_sd_DL_shadow;
- cl_mem flag_sd;
- cl_mem flag_sd_DL_shadow;
- cl_mem prim_sd;
- cl_mem prim_sd_DL_shadow;
- cl_mem type_sd;
- cl_mem type_sd_DL_shadow;
- cl_mem u_sd;
- cl_mem u_sd_DL_shadow;
- cl_mem v_sd;
- cl_mem v_sd_DL_shadow;
- cl_mem object_sd;
- cl_mem object_sd_DL_shadow;
- cl_mem time_sd;
- cl_mem time_sd_DL_shadow;
- cl_mem ray_length_sd;
- cl_mem ray_length_sd_DL_shadow;
-
- /* Ray differentials. */
- cl_mem dP_sd, dI_sd;
- cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow;
- cl_mem du_sd, dv_sd;
- cl_mem du_sd_DL_shadow, dv_sd_DL_shadow;
-
- /* Dp/Du */
- cl_mem dPdu_sd, dPdv_sd;
- cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow;
-
- /* Object motion. */
- cl_mem ob_tfm_sd, ob_itfm_sd;
- cl_mem ob_tfm_sd_DL_shadow, ob_itfm_sd_DL_shadow;
-
- cl_mem closure_sd;
- cl_mem closure_sd_DL_shadow;
- cl_mem num_closure_sd;
- cl_mem num_closure_sd_DL_shadow;
- cl_mem randb_closure_sd;
- cl_mem randb_closure_sd_DL_shadow;
- cl_mem ray_P_sd;
- cl_mem ray_P_sd_DL_shadow;
- cl_mem ray_dP_sd;
- cl_mem ray_dP_sd_DL_shadow;
-
/* Global memory required for shadow blocked and accum_radiance. */
cl_mem BSDFEval_coop;
cl_mem ISLamp_coop;
@@ -2057,66 +2004,6 @@ public:
sd = NULL;
sd_DL_shadow = NULL;
- P_sd = NULL;
- P_sd_DL_shadow = NULL;
- N_sd = NULL;
- N_sd_DL_shadow = NULL;
- Ng_sd = NULL;
- Ng_sd_DL_shadow = NULL;
- I_sd = NULL;
- I_sd_DL_shadow = NULL;
- shader_sd = NULL;
- shader_sd_DL_shadow = NULL;
- flag_sd = NULL;
- flag_sd_DL_shadow = NULL;
- prim_sd = NULL;
- prim_sd_DL_shadow = NULL;
- type_sd = NULL;
- type_sd_DL_shadow = NULL;
- u_sd = NULL;
- u_sd_DL_shadow = NULL;
- v_sd = NULL;
- v_sd_DL_shadow = NULL;
- object_sd = NULL;
- object_sd_DL_shadow = NULL;
- time_sd = NULL;
- time_sd_DL_shadow = NULL;
- ray_length_sd = NULL;
- ray_length_sd_DL_shadow = NULL;
-
- /* Ray differentials. */
- dP_sd = NULL;
- dI_sd = NULL;
- dP_sd_DL_shadow = NULL;
- dI_sd_DL_shadow = NULL;
- du_sd = NULL;
- dv_sd = NULL;
- du_sd_DL_shadow = NULL;
- dv_sd_DL_shadow = NULL;
-
- /* Dp/Du */
- dPdu_sd = NULL;
- dPdv_sd = NULL;
- dPdu_sd_DL_shadow = NULL;
- dPdv_sd_DL_shadow = NULL;
-
- /* Object motion. */
- ob_tfm_sd = NULL;
- ob_itfm_sd = NULL;
- ob_tfm_sd_DL_shadow = NULL;
- ob_itfm_sd_DL_shadow = NULL;
-
- closure_sd = NULL;
- closure_sd_DL_shadow = NULL;
- num_closure_sd = NULL;
- num_closure_sd_DL_shadow = NULL;
- randb_closure_sd = NULL;
- randb_closure_sd_DL_shadow = NULL;
- ray_P_sd = NULL;
- ray_P_sd_DL_shadow = NULL;
- ray_dP_sd = NULL;
- ray_dP_sd_DL_shadow = NULL;
-
rng_coop = NULL;
throughput_coop = NULL;
L_transparent_coop = NULL;
@@ -2232,17 +2119,10 @@ public:
return ret_size;
}
- size_t get_shader_closure_size(int max_closure)
- {
- return (sizeof(ShaderClosure) * max_closure);
- }
-
- size_t get_shader_data_size(size_t shader_closure_size)
+ size_t get_shader_data_size(size_t max_closure)
{
- /* ShaderData size without accounting for ShaderClosure array. */
- size_t shader_data_size =
- sizeof(ShaderData) - (sizeof(ShaderClosure) * MAX_CLOSURE);
- return (shader_data_size + shader_closure_size);
+ /* ShaderData size with variable size ShaderClosure array */
+ return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure));
}
/* Returns size of KernelGlobals structure associated with OpenCL. */
@@ -2264,20 +2144,6 @@ public:
return sizeof(KernelGlobals);
}
- /* Returns size of Structure of arrays implementation of. */
- size_t get_shaderdata_soa_size()
- {
- size_t shader_soa_size = 0;
-
-#define SD_VAR(type, what) shader_soa_size += sizeof(void *);
-#define SD_CLOSURE_VAR(type, what, max_closure) shader_soa_size += sizeof(void *);
- #include "kernel_shaderdata_vars.h"
-#undef SD_VAR
-#undef SD_CLOSURE_VAR
-
- return shader_soa_size;
- }
-
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
/* Get Shader, bake and film_convert kernels.
@@ -2398,66 +2264,6 @@ public:
release_kernel_safe(ckPathTraceKernel_sum_all_radiance);
/* Release global memory */
- release_mem_object_safe(P_sd);
- release_mem_object_safe(P_sd_DL_shadow);
- release_mem_object_safe(N_sd);
- release_mem_object_safe(N_sd_DL_shadow);
- release_mem_object_safe(Ng_sd);
- release_mem_object_safe(Ng_sd_DL_shadow);
- release_mem_object_safe(I_sd);
- release_mem_object_safe(I_sd_DL_shadow);
- release_mem_object_safe(shader_sd);
- release_mem_object_safe(shader_sd_DL_shadow);
- release_mem_object_safe(flag_sd);
- release_mem_object_safe(flag_sd_DL_shadow);
- release_mem_object_safe(prim_sd);
- release_mem_object_safe(prim_sd_DL_shadow);
- release_mem_object_safe(type_sd);
- release_mem_object_safe(type_sd_DL_shadow);
- release_mem_object_safe(u_sd);
- release_mem_object_safe(u_sd_DL_shadow);
- release_mem_object_safe(v_sd);
- release_mem_object_safe(v_sd_DL_shadow);
- release_mem_object_safe(object_sd);
- release_mem_object_safe(object_sd_DL_shadow);
- release_mem_object_safe(time_sd);
- release_mem_object_safe(time_sd_DL_shadow);
- release_mem_object_safe(ray_length_sd);
- release_mem_object_safe(ray_length_sd_DL_shadow);
-
- /* Ray differentials. */
- release_mem_object_safe(dP_sd);
- release_mem_object_safe(dP_sd_DL_shadow);
- release_mem_object_safe(dI_sd);
- release_mem_object_safe(dI_sd_DL_shadow);
- release_mem_object_safe(du_sd);
- release_mem_object_safe(du_sd_DL_shadow);
- release_mem_object_safe(dv_sd);
- release_mem_object_safe(dv_sd_DL_shadow);
-
- /* Dp/Du */
- release_mem_object_safe(dPdu_sd);
- release_mem_object_safe(dPdu_sd_DL_shadow);
- release_mem_object_safe(dPdv_sd);
- release_mem_object_safe(dPdv_sd_DL_shadow);
-
- /* Object motion. */
- release_mem_object_safe(ob_tfm_sd);
- release_mem_object_safe(ob_itfm_sd);
-
- release_mem_object_safe(ob_tfm_sd_DL_shadow);
- release_mem_object_safe(ob_itfm_sd_DL_shadow);
-
- release_mem_object_safe(closure_sd);
- release_mem_object_safe(closure_sd_DL_shadow);
- release_mem_object_safe(num_closure_sd);
- release_mem_object_safe(num_closure_sd_DL_shadow);
- release_mem_object_safe(randb_closure_sd);
- release_mem_object_safe(randb_closure_sd_DL_shadow);
- release_mem_object_safe(ray_P_sd);
- release_mem_object_safe(ray_P_sd_DL_shadow);
- release_mem_object_safe(ray_dP_sd);
- release_mem_object_safe(ray_dP_sd_DL_shadow);
release_mem_object_safe(rng_coop);
release_mem_object_safe(throughput_coop);
release_mem_object_safe(L_transparent_coop);
@@ -2572,7 +2378,7 @@ public:
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- size_t ShaderClosure_size = get_shader_closure_size(current_max_closure);
+ size_t shaderdata_size = get_shader_data_size(current_max_closure);
#ifdef __WORK_STEALING__
/* Calculate max groups */
@@ -2593,67 +2399,8 @@ public:
kgbuffer = mem_alloc(get_KernelGlobals_size());
/* Create global buffers for ShaderData. */
- sd = mem_alloc(get_shaderdata_soa_size());
- sd_DL_shadow = mem_alloc(get_shaderdata_soa_size());
- P_sd = mem_alloc(num_global_elements * sizeof(float3));
- P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- N_sd = mem_alloc(num_global_elements * sizeof(float3));
- N_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- Ng_sd = mem_alloc(num_global_elements * sizeof(float3));
- Ng_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- I_sd = mem_alloc(num_global_elements * sizeof(float3));
- I_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- shader_sd = mem_alloc(num_global_elements * sizeof(int));
- shader_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- flag_sd = mem_alloc(num_global_elements * sizeof(int));
- flag_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- prim_sd = mem_alloc(num_global_elements * sizeof(int));
- prim_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- type_sd = mem_alloc(num_global_elements * sizeof(int));
- type_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- u_sd = mem_alloc(num_global_elements * sizeof(float));
- u_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- v_sd = mem_alloc(num_global_elements * sizeof(float));
- v_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- object_sd = mem_alloc(num_global_elements * sizeof(int));
- object_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- time_sd = mem_alloc(num_global_elements * sizeof(float));
- time_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_length_sd = mem_alloc(num_global_elements * sizeof(float));
- ray_length_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
-
- /* Ray differentials. */
- dP_sd = mem_alloc(num_global_elements * sizeof(differential3));
- dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
- dI_sd = mem_alloc(num_global_elements * sizeof(differential3));
- dI_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
- du_sd = mem_alloc(num_global_elements * sizeof(differential));
- du_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential));
- dv_sd = mem_alloc(num_global_elements * sizeof(differential));
- dv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential));
-
- /* Dp/Du */
- dPdu_sd = mem_alloc(num_global_elements * sizeof(float3));
- dPdu_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- dPdv_sd = mem_alloc(num_global_elements * sizeof(float3));
- dPdv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
-
- /* Object motion. */
- ob_tfm_sd = mem_alloc(num_global_elements * sizeof(Transform));
- ob_tfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform));
- ob_itfm_sd = mem_alloc(num_global_elements * sizeof(Transform));
- ob_itfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform));
-
- closure_sd = mem_alloc(num_global_elements * ShaderClosure_size);
- closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * ShaderClosure_size);
- num_closure_sd = mem_alloc(num_global_elements * sizeof(int));
- num_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int));
- randb_closure_sd = mem_alloc(num_global_elements * sizeof(float));
- randb_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float));
- ray_P_sd = mem_alloc(num_global_elements * sizeof(float3));
- ray_P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3));
- ray_dP_sd = mem_alloc(num_global_elements * sizeof(differential3));
- ray_dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3));
+ sd = mem_alloc(num_global_elements * shaderdata_size);
+ sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size);
/* Creation of global memory buffers which are shared among
* the kernels.
@@ -2694,79 +2441,7 @@ public:
kernel_set_args(ckPathTraceKernel_data_init,
0,
kgbuffer,
- sd,
sd_DL_shadow,
- P_sd,
- P_sd_DL_shadow,
- N_sd,
- N_sd_DL_shadow,
- Ng_sd,
- Ng_sd_DL_shadow,
- I_sd,
- I_sd_DL_shadow,
- shader_sd,
- shader_sd_DL_shadow,
- flag_sd,
- flag_sd_DL_shadow,
- prim_sd,
- prim_sd_DL_shadow,
- type_sd,
- type_sd_DL_shadow,
- u_sd,
- u_sd_DL_shadow,
- v_sd,
- v_sd_DL_shadow,
- object_sd,
- object_sd_DL_shadow,
- time_sd,
- time_sd_DL_shadow,
- ray_length_sd,
- ray_length_sd_DL_shadow);
-
- /* Ray differentials. */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- dP_sd,
- dP_sd_DL_shadow,
- dI_sd,
- dI_sd_DL_shadow,
- du_sd,
- du_sd_DL_shadow,
- dv_sd,
- dv_sd_DL_shadow);
-
- /* Dp/Du */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- dPdu_sd,
- dPdu_sd_DL_shadow,
- dPdv_sd,
- dPdv_sd_DL_shadow);
-
- /* Object motion. */
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- ob_tfm_sd,
- ob_tfm_sd_DL_shadow,
- ob_itfm_sd,
- ob_itfm_sd_DL_shadow);
-
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- closure_sd,
- closure_sd_DL_shadow,
- num_closure_sd,
- num_closure_sd_DL_shadow,
- randb_closure_sd,
- randb_closure_sd_DL_shadow,
- ray_P_sd,
- ray_P_sd_DL_shadow,
- ray_dP_sd,
- ray_dP_sd_DL_shadow,
d_data,
per_sample_output_buffers,
d_rng_state,
@@ -3132,16 +2807,12 @@ public:
{
size_t total_invariable_mem_allocated = 0;
size_t KernelGlobals_size = 0;
- size_t ShaderData_SOA_size = 0;
KernelGlobals_size = get_KernelGlobals_size();
- ShaderData_SOA_size = get_shaderdata_soa_size();
total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */
total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */
total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */
- total_invariable_mem_allocated += ShaderData_SOA_size; /* sd size */
- total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_DL_shadow size */
return total_invariable_mem_allocated;
}
@@ -3208,13 +2879,11 @@ public:
/* Calculate the memory required for one thread in split kernel. */
size_t get_per_thread_memory()
{
- size_t shader_closure_size = 0;
- size_t shaderdata_volume = 0;
- shader_closure_size = get_shader_closure_size(current_max_closure);
+ size_t shaderdata_size = 0;
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- shaderdata_volume = get_shader_data_size(shader_closure_size);
+ shaderdata_size = get_shader_data_size(current_max_closure);
size_t retval = sizeof(RNG)
+ sizeof(float3) /* Throughput size */
+ sizeof(float) /* L transparent size */
@@ -3225,8 +2894,8 @@ public:
+ sizeof(Intersection) /* Overall isect */
+ sizeof(Intersection) /* Instersection_coop_AO */
+ sizeof(Intersection) /* Intersection coop DL */
- + shaderdata_volume /* Overall ShaderData */
- + (shaderdata_volume * 2) /* ShaderData : DL and shadow */
+ + shaderdata_size /* Overall ShaderData */
+ + (shaderdata_size * 2) /* ShaderData : DL and shadow */
+ sizeof(Ray) + sizeof(BsdfEval)
+ sizeof(float3) /* AOAlpha size */
+ sizeof(float3) /* AOBSDF size */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 5249416e838..b5b0b1f3fae 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -55,7 +55,6 @@ set(SRC_HEADERS
kernel_queues.h
kernel_random.h
kernel_shader.h
- kernel_shaderdata_vars.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
diff --git a/intern/cycles/kernel/kernel_shaderdata_vars.h b/intern/cycles/kernel/kernel_shaderdata_vars.h
deleted file mode 100644
index 3f63f94912e..00000000000
--- a/intern/cycles/kernel/kernel_shaderdata_vars.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-* Copyright 2011-2015 Blender Foundation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#ifndef SD_VAR
-#define SD_VAR(type, what)
-#endif
-#ifndef SD_CLOSURE_VAR
-#define SD_CLOSURE_VAR(type, what, max_closure)
-#endif
-
-/* position */
-SD_VAR(float3, P)
-/* smooth normal for shading */
-SD_VAR(float3, N)
-/* true geometric normal */
-SD_VAR(float3, Ng)
-/* view/incoming direction */
-SD_VAR(float3, I)
-/* shader id */
-SD_VAR(int, shader)
-/* booleans describing shader, see ShaderDataFlag */
-SD_VAR(int, flag)
-
-/* primitive id if there is one, ~0 otherwise */
-SD_VAR(int, prim)
-
-/* combined type and curve segment for hair */
-SD_VAR(int, type)
-
-/* parametric coordinates
-* - barycentric weights for triangles */
-SD_VAR(float, u)
-SD_VAR(float, v)
-/* object id if there is one, ~0 otherwise */
-SD_VAR(int, object)
-
-/* motion blur sample time */
-SD_VAR(float, time)
-
-/* length of the ray being shaded */
-SD_VAR(float, ray_length)
-
-#ifdef __RAY_DIFFERENTIALS__
-/* differential of P. these are orthogonal to Ng, not N */
-SD_VAR(differential3, dP)
-/* differential of I */
-SD_VAR(differential3, dI)
-/* differential of u, v */
-SD_VAR(differential, du)
-SD_VAR(differential, dv)
-#endif
-#ifdef __DPDU__
-/* differential of P w.r.t. parametric coordinates. note that dPdu is
-* not readily suitable as a tangent for shading on triangles. */
-SD_VAR(float3, dPdu)
-SD_VAR(float3, dPdv)
-#endif
-
-#ifdef __OBJECT_MOTION__
-/* object <-> world space transformations, cached to avoid
-* re-interpolating them constantly for shading */
-SD_VAR(Transform, ob_tfm)
-SD_VAR(Transform, ob_itfm)
-#endif
-
-/* Closure data, we store a fixed array of closures */
-SD_CLOSURE_VAR(ShaderClosure, closure, MAX_CLOSURE)
-SD_VAR(int, num_closure)
-SD_VAR(float, randb_closure)
-
-/* ray start position, only set for backgrounds */
-SD_VAR(float3, ray_P)
-SD_VAR(differential3, ray_dP)
-
-#ifdef __OSL__
-SD_VAR(struct KernelGlobals *, osl_globals)
-SD_VAR(struct PathState *, osl_path_state)
-#endif
-
-#undef SD_VAR
-#undef SD_CLOSURE_VAR
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index aefa5d5be85..3b1111e5069 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -201,7 +201,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#endif
#ifdef __SPLIT_KERNEL__
- Intersection *isect = &kg->isect_shadow[TIDX];
+ Intersection *isect = &kg->isect_shadow[SD_THREAD];
#else
Intersection isect_object;
Intersection *isect = &isect_object;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index da2416bc09b..49d8859c7de 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -718,22 +718,99 @@ enum ShaderDataFlag {
struct KernelGlobals;
#ifdef __SPLIT_KERNEL__
-#define SD_VAR(type, what) ccl_global type *what;
-#define SD_CLOSURE_VAR(type, what, max_closure) type *what;
-#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0))
-#define ccl_fetch(s, t) (s->t[TIDX])
-#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index])
+# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
+# if defined(__SPLIT_KERNEL_AOS__)
+ /* ShaderData is stored as an Array-of-Structures */
+# define ccl_fetch(s, t) (s[SD_THREAD].t)
+# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].t[index])
+# else
+ /* ShaderData is stored as an Structure-of-Arrays */
+# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
+# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
+# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
+# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(t) + SD_FIELD_SIZE(t) * SD_THREAD - SD_OFFSETOF(t)))->t)
+# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index])
+# endif
#else
-#define SD_VAR(type, what) type what;
-#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure];
-#define ccl_fetch(s, t) (s->t)
-#define ccl_fetch_array(s, t, index) (&s->t[index])
+# define ccl_fetch(s, t) (s->t)
+# define ccl_fetch_array(s, t, index) (&s->t[index])
#endif
typedef ccl_addr_space struct ShaderData {
+ /* position */
+ float3 P;
+ /* smooth normal for shading */
+ float3 N;
+ /* true geometric normal */
+ float3 Ng;
+ /* view/incoming direction */
+ float3 I;
+ /* shader id */
+ int shader;
+ /* booleans describing shader, see ShaderDataFlag */
+ int flag;
+
+ /* primitive id if there is one, ~0 otherwise */
+ int prim;
+
+ /* combined type and curve segment for hair */
+ int type;
+
+ /* parametric coordinates
+ * - barycentric weights for triangles */
+ float u;
+ float v;
+ /* object id if there is one, ~0 otherwise */
+ int object;
+
+ /* motion blur sample time */
+ float time;
+
+ /* length of the ray being shaded */
+ float ray_length;
-#include "kernel_shaderdata_vars.h"
+ /* ray bounce depth */
+ int ray_depth;
+ /* ray transparent depth */
+ int transparent_depth;
+
+#ifdef __RAY_DIFFERENTIALS__
+ /* differential of P. these are orthogonal to Ng, not N */
+ differential3 dP;
+ /* differential of I */
+ differential3 dI;
+ /* differential of u, v */
+ differential du;
+ differential dv;
+#endif
+#ifdef __DPDU__
+ /* differential of P w.r.t. parametric coordinates. note that dPdu is
+ * not readily suitable as a tangent for shading on triangles. */
+ float3 dPdu;
+ float3 dPdv;
+#endif
+
+#ifdef __OBJECT_MOTION__
+ /* object <-> world space transformations, cached to avoid
+ * re-interpolating them constantly for shading */
+ Transform ob_tfm;
+ Transform ob_itfm;
+#endif
+
+ /* Closure data, we store a fixed array of closures */
+ struct ShaderClosure closure[MAX_CLOSURE];
+ int num_closure;
+ float randb_closure;
+
+ /* ray start position, only set for backgrounds */
+ float3 ray_P;
+ differential3 ray_dP;
+
+#ifdef __OSL__
+ struct KernelGlobals * osl_globals;
+ struct PathState *osl_path_state;
+#endif
} ShaderData;
/* Path State */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index 401c4467afa..18139687eab 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -17,91 +17,8 @@
#include "split/kernel_data_init.h"
__kernel void kernel_ocl_path_trace_data_init(
- ccl_global char *kg,
- ccl_global char *sd,
+ ccl_global char *globals,
ccl_global char *sd_DL_shadow,
-
- ccl_global float3 *P_sd,
- ccl_global float3 *P_sd_DL_shadow,
-
- ccl_global float3 *N_sd,
- ccl_global float3 *N_sd_DL_shadow,
-
- ccl_global float3 *Ng_sd,
- ccl_global float3 *Ng_sd_DL_shadow,
-
- ccl_global float3 *I_sd,
- ccl_global float3 *I_sd_DL_shadow,
-
- ccl_global int *shader_sd,
- ccl_global int *shader_sd_DL_shadow,
-
- ccl_global int *flag_sd,
- ccl_global int *flag_sd_DL_shadow,
-
- ccl_global int *prim_sd,
- ccl_global int *prim_sd_DL_shadow,
-
- ccl_global int *type_sd,
- ccl_global int *type_sd_DL_shadow,
-
- ccl_global float *u_sd,
- ccl_global float *u_sd_DL_shadow,
-
- ccl_global float *v_sd,
- ccl_global float *v_sd_DL_shadow,
-
- ccl_global int *object_sd,
- ccl_global int *object_sd_DL_shadow,
-
- ccl_global float *time_sd,
- ccl_global float *time_sd_DL_shadow,
-
- ccl_global float *ray_length_sd,
- ccl_global float *ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- ccl_global differential3 *dP_sd,
- ccl_global differential3 *dP_sd_DL_shadow,
-
- ccl_global differential3 *dI_sd,
- ccl_global differential3 *dI_sd_DL_shadow,
-
- ccl_global differential *du_sd,
- ccl_global differential *du_sd_DL_shadow,
-
- ccl_global differential *dv_sd,
- ccl_global differential *dv_sd_DL_shadow,
-
- /* Dp/Du */
- ccl_global float3 *dPdu_sd,
- ccl_global float3 *dPdu_sd_DL_shadow,
-
- ccl_global float3 *dPdv_sd,
- ccl_global float3 *dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ccl_global Transform *ob_tfm_sd,
- ccl_global Transform *ob_tfm_sd_DL_shadow,
-
- ccl_global Transform *ob_itfm_sd,
- ccl_global Transform *ob_itfm_sd_DL_shadow,
-
- ShaderClosure *closure_sd,
- ShaderClosure *closure_sd_DL_shadow,
-
- ccl_global int *num_closure_sd,
- ccl_global int *num_closure_sd_DL_shadow,
-
- ccl_global float *randb_closure_sd,
- ccl_global float *randb_closure_sd_DL_shadow,
-
- ccl_global float3 *ray_P_sd,
- ccl_global float3 *ray_P_sd_DL_shadow,
-
- ccl_global differential3 *ray_dP_sd,
- ccl_global differential3 *ray_dP_sd_DL_shadow,
-
ccl_constant KernelData *data,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
@@ -136,68 +53,8 @@ __kernel void kernel_ocl_path_trace_data_init(
#endif
int parallel_samples) /* Number of samples to be processed in parallel */
{
- kernel_data_init((KernelGlobals *)kg,
- (ShaderData *)sd,
+ kernel_data_init((KernelGlobals *)globals,
(ShaderData *)sd_DL_shadow,
- P_sd,
- P_sd_DL_shadow,
- N_sd,
- N_sd_DL_shadow,
- Ng_sd,
- Ng_sd_DL_shadow,
- I_sd,
- I_sd_DL_shadow,
- shader_sd,
- shader_sd_DL_shadow,
- flag_sd,
- flag_sd_DL_shadow,
- prim_sd,
- prim_sd_DL_shadow,
- type_sd,
- type_sd_DL_shadow,
- u_sd,
- u_sd_DL_shadow,
- v_sd,
- v_sd_DL_shadow,
- object_sd,
- object_sd_DL_shadow,
- time_sd,
- time_sd_DL_shadow,
- ray_length_sd,
- ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- dP_sd,
- dP_sd_DL_shadow,
- dI_sd,
- dI_sd_DL_shadow,
- du_sd,
- du_sd_DL_shadow,
- dv_sd,
- dv_sd_DL_shadow,
-
- /* Dp/Du */
- dPdu_sd,
- dPdu_sd_DL_shadow,
- dPdv_sd,
- dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ob_tfm_sd,
- ob_tfm_sd_DL_shadow,
- ob_itfm_sd,
- ob_itfm_sd_DL_shadow,
-
- closure_sd,
- closure_sd_DL_shadow,
- num_closure_sd,
- num_closure_sd_DL_shadow,
- randb_closure_sd,
- randb_closure_sd_DL_shadow,
- ray_P_sd,
- ray_P_sd_DL_shadow,
- ray_dP_sd,
- ray_dP_sd_DL_shadow,
data,
per_sample_output_buffers,
rng_state,
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 6993a78a789..9891391a3a3 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -52,90 +52,7 @@
*/
ccl_device void kernel_data_init(
KernelGlobals *kg,
- ShaderData *sd,
ShaderData *sd_DL_shadow,
-
- ccl_global float3 *P_sd,
- ccl_global float3 *P_sd_DL_shadow,
-
- ccl_global float3 *N_sd,
- ccl_global float3 *N_sd_DL_shadow,
-
- ccl_global float3 *Ng_sd,
- ccl_global float3 *Ng_sd_DL_shadow,
-
- ccl_global float3 *I_sd,
- ccl_global float3 *I_sd_DL_shadow,
-
- ccl_global int *shader_sd,
- ccl_global int *shader_sd_DL_shadow,
-
- ccl_global int *flag_sd,
- ccl_global int *flag_sd_DL_shadow,
-
- ccl_global int *prim_sd,
- ccl_global int *prim_sd_DL_shadow,
-
- ccl_global int *type_sd,
- ccl_global int *type_sd_DL_shadow,
-
- ccl_global float *u_sd,
- ccl_global float *u_sd_DL_shadow,
-
- ccl_global float *v_sd,
- ccl_global float *v_sd_DL_shadow,
-
- ccl_global int *object_sd,
- ccl_global int *object_sd_DL_shadow,
-
- ccl_global float *time_sd,
- ccl_global float *time_sd_DL_shadow,
-
- ccl_global float *ray_length_sd,
- ccl_global float *ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- ccl_global differential3 *dP_sd,
- ccl_global differential3 *dP_sd_DL_shadow,
-
- ccl_global differential3 *dI_sd,
- ccl_global differential3 *dI_sd_DL_shadow,
-
- ccl_global differential *du_sd,
- ccl_global differential *du_sd_DL_shadow,
-
- ccl_global differential *dv_sd,
- ccl_global differential *dv_sd_DL_shadow,
-
- /* Dp/Du */
- ccl_global float3 *dPdu_sd,
- ccl_global float3 *dPdu_sd_DL_shadow,
-
- ccl_global float3 *dPdv_sd,
- ccl_global float3 *dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ccl_global Transform *ob_tfm_sd,
- ccl_global Transform *ob_tfm_sd_DL_shadow,
-
- ccl_global Transform *ob_itfm_sd,
- ccl_global Transform *ob_itfm_sd_DL_shadow,
-
- ShaderClosure *closure_sd,
- ShaderClosure *closure_sd_DL_shadow,
-
- ccl_global int *num_closure_sd,
- ccl_global int *num_closure_sd_DL_shadow,
-
- ccl_global float *randb_closure_sd,
- ccl_global float *randb_closure_sd_DL_shadow,
-
- ccl_global float3 *ray_P_sd,
- ccl_global float3 *ray_P_sd_DL_shadow,
-
- ccl_global differential3 *ray_dP_sd,
- ccl_global differential3 *ray_dP_sd_DL_shadow,
-
ccl_constant KernelData *data,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
@@ -177,89 +94,6 @@ ccl_device void kernel_data_init(
kg->name = name;
#include "../kernel_textures.h"
- sd->P = P_sd;
- sd_DL_shadow->P = P_sd_DL_shadow;
-
- sd->N = N_sd;
- sd_DL_shadow->N = N_sd_DL_shadow;
-
- sd->Ng = Ng_sd;
- sd_DL_shadow->Ng = Ng_sd_DL_shadow;
-
- sd->I = I_sd;
- sd_DL_shadow->I = I_sd_DL_shadow;
-
- sd->shader = shader_sd;
- sd_DL_shadow->shader = shader_sd_DL_shadow;
-
- sd->flag = flag_sd;
- sd_DL_shadow->flag = flag_sd_DL_shadow;
-
- sd->prim = prim_sd;
- sd_DL_shadow->prim = prim_sd_DL_shadow;
-
- sd->type = type_sd;
- sd_DL_shadow->type = type_sd_DL_shadow;
-
- sd->u = u_sd;
- sd_DL_shadow->u = u_sd_DL_shadow;
-
- sd->v = v_sd;
- sd_DL_shadow->v = v_sd_DL_shadow;
-
- sd->object = object_sd;
- sd_DL_shadow->object = object_sd_DL_shadow;
-
- sd->time = time_sd;
- sd_DL_shadow->time = time_sd_DL_shadow;
-
- sd->ray_length = ray_length_sd;
- sd_DL_shadow->ray_length = ray_length_sd_DL_shadow;
-
-#ifdef __RAY_DIFFERENTIALS__
- sd->dP = dP_sd;
- sd_DL_shadow->dP = dP_sd_DL_shadow;
-
- sd->dI = dI_sd;
- sd_DL_shadow->dI = dI_sd_DL_shadow;
-
- sd->du = du_sd;
- sd_DL_shadow->du = du_sd_DL_shadow;
-
- sd->dv = dv_sd;
- sd_DL_shadow->dv = dv_sd_DL_shadow;
-#ifdef __DPDU__
- sd->dPdu = dPdu_sd;
- sd_DL_shadow->dPdu = dPdu_sd_DL_shadow;
-
- sd->dPdv = dPdv_sd;
- sd_DL_shadow->dPdv = dPdv_sd_DL_shadow;
-#endif
-#endif
-
-#ifdef __OBJECT_MOTION__
- sd->ob_tfm = ob_tfm_sd;
- sd_DL_shadow->ob_tfm = ob_tfm_sd_DL_shadow;
-
- sd->ob_itfm = ob_itfm_sd;
- sd_DL_shadow->ob_itfm = ob_itfm_sd_DL_shadow;
-#endif
-
- sd->closure = closure_sd;
- sd_DL_shadow->closure = closure_sd_DL_shadow;
-
- sd->num_closure = num_closure_sd;
- sd_DL_shadow->num_closure = num_closure_sd_DL_shadow;
-
- sd->randb_closure = randb_closure_sd;
- sd_DL_shadow->randb_closure = randb_closure_sd_DL_shadow;
-
- sd->ray_P = ray_P_sd;
- sd_DL_shadow->ray_P = ray_P_sd_DL_shadow;
-
- sd->ray_dP = ray_dP_sd;
- sd_DL_shadow->ray_dP = ray_dP_sd_DL_shadow;
-
int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
#ifdef __WORK_STEALING__