Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-10-29 18:56:27 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-01-30 02:23:06 +0300
commit9815f8a623d47c9a52aac8ac3a2bcc17d1c74b5e (patch)
treec42a1bbdd4f070802932bce88c11be8c8732b930 /intern/cycles/kernel
parentfef53c74b5520fe6404d581a3c15fad4177f29b7 (diff)
Cycles: Cleanup of OpenCL split kernel routines
The idea is to switch from allocating separate buffers for shader data's structure of arrays to allocating one huge memory block and do some index trickery to make it accessed as SOA. This saves quite reasonable amount of lines of code in device_opencl and also makes it possible to get rid of special declaration of ShaderData structure. As a side effect it also makes it easier to experiment with SOA vs. AOS for split kernel. Works fine here on NVidia GTX580, Intel CPU amd AMD Fiji cards. Reviewers: #cycles, brecht, juicyfruit, dingto Differential Revision: https://developer.blender.org/D1593
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/kernel_shaderdata_vars.h94
-rw-r--r--intern/cycles/kernel/kernel_shadow.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h97
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl147
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h166
6 files changed, 90 insertions, 417 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 5249416e838..b5b0b1f3fae 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -55,7 +55,6 @@ set(SRC_HEADERS
kernel_queues.h
kernel_random.h
kernel_shader.h
- kernel_shaderdata_vars.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
diff --git a/intern/cycles/kernel/kernel_shaderdata_vars.h b/intern/cycles/kernel/kernel_shaderdata_vars.h
deleted file mode 100644
index 3f63f94912e..00000000000
--- a/intern/cycles/kernel/kernel_shaderdata_vars.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-* Copyright 2011-2015 Blender Foundation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#ifndef SD_VAR
-#define SD_VAR(type, what)
-#endif
-#ifndef SD_CLOSURE_VAR
-#define SD_CLOSURE_VAR(type, what, max_closure)
-#endif
-
-/* position */
-SD_VAR(float3, P)
-/* smooth normal for shading */
-SD_VAR(float3, N)
-/* true geometric normal */
-SD_VAR(float3, Ng)
-/* view/incoming direction */
-SD_VAR(float3, I)
-/* shader id */
-SD_VAR(int, shader)
-/* booleans describing shader, see ShaderDataFlag */
-SD_VAR(int, flag)
-
-/* primitive id if there is one, ~0 otherwise */
-SD_VAR(int, prim)
-
-/* combined type and curve segment for hair */
-SD_VAR(int, type)
-
-/* parametric coordinates
-* - barycentric weights for triangles */
-SD_VAR(float, u)
-SD_VAR(float, v)
-/* object id if there is one, ~0 otherwise */
-SD_VAR(int, object)
-
-/* motion blur sample time */
-SD_VAR(float, time)
-
-/* length of the ray being shaded */
-SD_VAR(float, ray_length)
-
-#ifdef __RAY_DIFFERENTIALS__
-/* differential of P. these are orthogonal to Ng, not N */
-SD_VAR(differential3, dP)
-/* differential of I */
-SD_VAR(differential3, dI)
-/* differential of u, v */
-SD_VAR(differential, du)
-SD_VAR(differential, dv)
-#endif
-#ifdef __DPDU__
-/* differential of P w.r.t. parametric coordinates. note that dPdu is
-* not readily suitable as a tangent for shading on triangles. */
-SD_VAR(float3, dPdu)
-SD_VAR(float3, dPdv)
-#endif
-
-#ifdef __OBJECT_MOTION__
-/* object <-> world space transformations, cached to avoid
-* re-interpolating them constantly for shading */
-SD_VAR(Transform, ob_tfm)
-SD_VAR(Transform, ob_itfm)
-#endif
-
-/* Closure data, we store a fixed array of closures */
-SD_CLOSURE_VAR(ShaderClosure, closure, MAX_CLOSURE)
-SD_VAR(int, num_closure)
-SD_VAR(float, randb_closure)
-
-/* ray start position, only set for backgrounds */
-SD_VAR(float3, ray_P)
-SD_VAR(differential3, ray_dP)
-
-#ifdef __OSL__
-SD_VAR(struct KernelGlobals *, osl_globals)
-SD_VAR(struct PathState *, osl_path_state)
-#endif
-
-#undef SD_VAR
-#undef SD_CLOSURE_VAR
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index aefa5d5be85..3b1111e5069 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -201,7 +201,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#endif
#ifdef __SPLIT_KERNEL__
- Intersection *isect = &kg->isect_shadow[TIDX];
+ Intersection *isect = &kg->isect_shadow[SD_THREAD];
#else
Intersection isect_object;
Intersection *isect = &isect_object;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index da2416bc09b..49d8859c7de 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -718,22 +718,99 @@ enum ShaderDataFlag {
struct KernelGlobals;
#ifdef __SPLIT_KERNEL__
-#define SD_VAR(type, what) ccl_global type *what;
-#define SD_CLOSURE_VAR(type, what, max_closure) type *what;
-#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0))
-#define ccl_fetch(s, t) (s->t[TIDX])
-#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index])
+# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
+# if defined(__SPLIT_KERNEL_AOS__)
+ /* ShaderData is stored as an Array-of-Structures */
+# define ccl_fetch(s, t) (s[SD_THREAD].t)
+# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].t[index])
+# else
+ /* ShaderData is stored as an Structure-of-Arrays */
+# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
+# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
+# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
+# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(t) + SD_FIELD_SIZE(t) * SD_THREAD - SD_OFFSETOF(t)))->t)
+# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index])
+# endif
#else
-#define SD_VAR(type, what) type what;
-#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure];
-#define ccl_fetch(s, t) (s->t)
-#define ccl_fetch_array(s, t, index) (&s->t[index])
+# define ccl_fetch(s, t) (s->t)
+# define ccl_fetch_array(s, t, index) (&s->t[index])
#endif
typedef ccl_addr_space struct ShaderData {
+ /* position */
+ float3 P;
+ /* smooth normal for shading */
+ float3 N;
+ /* true geometric normal */
+ float3 Ng;
+ /* view/incoming direction */
+ float3 I;
+ /* shader id */
+ int shader;
+ /* booleans describing shader, see ShaderDataFlag */
+ int flag;
+
+ /* primitive id if there is one, ~0 otherwise */
+ int prim;
+
+ /* combined type and curve segment for hair */
+ int type;
+
+ /* parametric coordinates
+ * - barycentric weights for triangles */
+ float u;
+ float v;
+ /* object id if there is one, ~0 otherwise */
+ int object;
+
+ /* motion blur sample time */
+ float time;
+
+ /* length of the ray being shaded */
+ float ray_length;
-#include "kernel_shaderdata_vars.h"
+ /* ray bounce depth */
+ int ray_depth;
+ /* ray transparent depth */
+ int transparent_depth;
+
+#ifdef __RAY_DIFFERENTIALS__
+ /* differential of P. these are orthogonal to Ng, not N */
+ differential3 dP;
+ /* differential of I */
+ differential3 dI;
+ /* differential of u, v */
+ differential du;
+ differential dv;
+#endif
+#ifdef __DPDU__
+ /* differential of P w.r.t. parametric coordinates. note that dPdu is
+ * not readily suitable as a tangent for shading on triangles. */
+ float3 dPdu;
+ float3 dPdv;
+#endif
+
+#ifdef __OBJECT_MOTION__
+ /* object <-> world space transformations, cached to avoid
+ * re-interpolating them constantly for shading */
+ Transform ob_tfm;
+ Transform ob_itfm;
+#endif
+
+ /* Closure data, we store a fixed array of closures */
+ struct ShaderClosure closure[MAX_CLOSURE];
+ int num_closure;
+ float randb_closure;
+
+ /* ray start position, only set for backgrounds */
+ float3 ray_P;
+ differential3 ray_dP;
+
+#ifdef __OSL__
+ struct KernelGlobals * osl_globals;
+ struct PathState *osl_path_state;
+#endif
} ShaderData;
/* Path State */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index 401c4467afa..18139687eab 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -17,91 +17,8 @@
#include "split/kernel_data_init.h"
__kernel void kernel_ocl_path_trace_data_init(
- ccl_global char *kg,
- ccl_global char *sd,
+ ccl_global char *globals,
ccl_global char *sd_DL_shadow,
-
- ccl_global float3 *P_sd,
- ccl_global float3 *P_sd_DL_shadow,
-
- ccl_global float3 *N_sd,
- ccl_global float3 *N_sd_DL_shadow,
-
- ccl_global float3 *Ng_sd,
- ccl_global float3 *Ng_sd_DL_shadow,
-
- ccl_global float3 *I_sd,
- ccl_global float3 *I_sd_DL_shadow,
-
- ccl_global int *shader_sd,
- ccl_global int *shader_sd_DL_shadow,
-
- ccl_global int *flag_sd,
- ccl_global int *flag_sd_DL_shadow,
-
- ccl_global int *prim_sd,
- ccl_global int *prim_sd_DL_shadow,
-
- ccl_global int *type_sd,
- ccl_global int *type_sd_DL_shadow,
-
- ccl_global float *u_sd,
- ccl_global float *u_sd_DL_shadow,
-
- ccl_global float *v_sd,
- ccl_global float *v_sd_DL_shadow,
-
- ccl_global int *object_sd,
- ccl_global int *object_sd_DL_shadow,
-
- ccl_global float *time_sd,
- ccl_global float *time_sd_DL_shadow,
-
- ccl_global float *ray_length_sd,
- ccl_global float *ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- ccl_global differential3 *dP_sd,
- ccl_global differential3 *dP_sd_DL_shadow,
-
- ccl_global differential3 *dI_sd,
- ccl_global differential3 *dI_sd_DL_shadow,
-
- ccl_global differential *du_sd,
- ccl_global differential *du_sd_DL_shadow,
-
- ccl_global differential *dv_sd,
- ccl_global differential *dv_sd_DL_shadow,
-
- /* Dp/Du */
- ccl_global float3 *dPdu_sd,
- ccl_global float3 *dPdu_sd_DL_shadow,
-
- ccl_global float3 *dPdv_sd,
- ccl_global float3 *dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ccl_global Transform *ob_tfm_sd,
- ccl_global Transform *ob_tfm_sd_DL_shadow,
-
- ccl_global Transform *ob_itfm_sd,
- ccl_global Transform *ob_itfm_sd_DL_shadow,
-
- ShaderClosure *closure_sd,
- ShaderClosure *closure_sd_DL_shadow,
-
- ccl_global int *num_closure_sd,
- ccl_global int *num_closure_sd_DL_shadow,
-
- ccl_global float *randb_closure_sd,
- ccl_global float *randb_closure_sd_DL_shadow,
-
- ccl_global float3 *ray_P_sd,
- ccl_global float3 *ray_P_sd_DL_shadow,
-
- ccl_global differential3 *ray_dP_sd,
- ccl_global differential3 *ray_dP_sd_DL_shadow,
-
ccl_constant KernelData *data,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
@@ -136,68 +53,8 @@ __kernel void kernel_ocl_path_trace_data_init(
#endif
int parallel_samples) /* Number of samples to be processed in parallel */
{
- kernel_data_init((KernelGlobals *)kg,
- (ShaderData *)sd,
+ kernel_data_init((KernelGlobals *)globals,
(ShaderData *)sd_DL_shadow,
- P_sd,
- P_sd_DL_shadow,
- N_sd,
- N_sd_DL_shadow,
- Ng_sd,
- Ng_sd_DL_shadow,
- I_sd,
- I_sd_DL_shadow,
- shader_sd,
- shader_sd_DL_shadow,
- flag_sd,
- flag_sd_DL_shadow,
- prim_sd,
- prim_sd_DL_shadow,
- type_sd,
- type_sd_DL_shadow,
- u_sd,
- u_sd_DL_shadow,
- v_sd,
- v_sd_DL_shadow,
- object_sd,
- object_sd_DL_shadow,
- time_sd,
- time_sd_DL_shadow,
- ray_length_sd,
- ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- dP_sd,
- dP_sd_DL_shadow,
- dI_sd,
- dI_sd_DL_shadow,
- du_sd,
- du_sd_DL_shadow,
- dv_sd,
- dv_sd_DL_shadow,
-
- /* Dp/Du */
- dPdu_sd,
- dPdu_sd_DL_shadow,
- dPdv_sd,
- dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ob_tfm_sd,
- ob_tfm_sd_DL_shadow,
- ob_itfm_sd,
- ob_itfm_sd_DL_shadow,
-
- closure_sd,
- closure_sd_DL_shadow,
- num_closure_sd,
- num_closure_sd_DL_shadow,
- randb_closure_sd,
- randb_closure_sd_DL_shadow,
- ray_P_sd,
- ray_P_sd_DL_shadow,
- ray_dP_sd,
- ray_dP_sd_DL_shadow,
data,
per_sample_output_buffers,
rng_state,
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 6993a78a789..9891391a3a3 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -52,90 +52,7 @@
*/
ccl_device void kernel_data_init(
KernelGlobals *kg,
- ShaderData *sd,
ShaderData *sd_DL_shadow,
-
- ccl_global float3 *P_sd,
- ccl_global float3 *P_sd_DL_shadow,
-
- ccl_global float3 *N_sd,
- ccl_global float3 *N_sd_DL_shadow,
-
- ccl_global float3 *Ng_sd,
- ccl_global float3 *Ng_sd_DL_shadow,
-
- ccl_global float3 *I_sd,
- ccl_global float3 *I_sd_DL_shadow,
-
- ccl_global int *shader_sd,
- ccl_global int *shader_sd_DL_shadow,
-
- ccl_global int *flag_sd,
- ccl_global int *flag_sd_DL_shadow,
-
- ccl_global int *prim_sd,
- ccl_global int *prim_sd_DL_shadow,
-
- ccl_global int *type_sd,
- ccl_global int *type_sd_DL_shadow,
-
- ccl_global float *u_sd,
- ccl_global float *u_sd_DL_shadow,
-
- ccl_global float *v_sd,
- ccl_global float *v_sd_DL_shadow,
-
- ccl_global int *object_sd,
- ccl_global int *object_sd_DL_shadow,
-
- ccl_global float *time_sd,
- ccl_global float *time_sd_DL_shadow,
-
- ccl_global float *ray_length_sd,
- ccl_global float *ray_length_sd_DL_shadow,
-
- /* Ray differentials. */
- ccl_global differential3 *dP_sd,
- ccl_global differential3 *dP_sd_DL_shadow,
-
- ccl_global differential3 *dI_sd,
- ccl_global differential3 *dI_sd_DL_shadow,
-
- ccl_global differential *du_sd,
- ccl_global differential *du_sd_DL_shadow,
-
- ccl_global differential *dv_sd,
- ccl_global differential *dv_sd_DL_shadow,
-
- /* Dp/Du */
- ccl_global float3 *dPdu_sd,
- ccl_global float3 *dPdu_sd_DL_shadow,
-
- ccl_global float3 *dPdv_sd,
- ccl_global float3 *dPdv_sd_DL_shadow,
-
- /* Object motion. */
- ccl_global Transform *ob_tfm_sd,
- ccl_global Transform *ob_tfm_sd_DL_shadow,
-
- ccl_global Transform *ob_itfm_sd,
- ccl_global Transform *ob_itfm_sd_DL_shadow,
-
- ShaderClosure *closure_sd,
- ShaderClosure *closure_sd_DL_shadow,
-
- ccl_global int *num_closure_sd,
- ccl_global int *num_closure_sd_DL_shadow,
-
- ccl_global float *randb_closure_sd,
- ccl_global float *randb_closure_sd_DL_shadow,
-
- ccl_global float3 *ray_P_sd,
- ccl_global float3 *ray_P_sd_DL_shadow,
-
- ccl_global differential3 *ray_dP_sd,
- ccl_global differential3 *ray_dP_sd_DL_shadow,
-
ccl_constant KernelData *data,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
@@ -177,89 +94,6 @@ ccl_device void kernel_data_init(
kg->name = name;
#include "../kernel_textures.h"
- sd->P = P_sd;
- sd_DL_shadow->P = P_sd_DL_shadow;
-
- sd->N = N_sd;
- sd_DL_shadow->N = N_sd_DL_shadow;
-
- sd->Ng = Ng_sd;
- sd_DL_shadow->Ng = Ng_sd_DL_shadow;
-
- sd->I = I_sd;
- sd_DL_shadow->I = I_sd_DL_shadow;
-
- sd->shader = shader_sd;
- sd_DL_shadow->shader = shader_sd_DL_shadow;
-
- sd->flag = flag_sd;
- sd_DL_shadow->flag = flag_sd_DL_shadow;
-
- sd->prim = prim_sd;
- sd_DL_shadow->prim = prim_sd_DL_shadow;
-
- sd->type = type_sd;
- sd_DL_shadow->type = type_sd_DL_shadow;
-
- sd->u = u_sd;
- sd_DL_shadow->u = u_sd_DL_shadow;
-
- sd->v = v_sd;
- sd_DL_shadow->v = v_sd_DL_shadow;
-
- sd->object = object_sd;
- sd_DL_shadow->object = object_sd_DL_shadow;
-
- sd->time = time_sd;
- sd_DL_shadow->time = time_sd_DL_shadow;
-
- sd->ray_length = ray_length_sd;
- sd_DL_shadow->ray_length = ray_length_sd_DL_shadow;
-
-#ifdef __RAY_DIFFERENTIALS__
- sd->dP = dP_sd;
- sd_DL_shadow->dP = dP_sd_DL_shadow;
-
- sd->dI = dI_sd;
- sd_DL_shadow->dI = dI_sd_DL_shadow;
-
- sd->du = du_sd;
- sd_DL_shadow->du = du_sd_DL_shadow;
-
- sd->dv = dv_sd;
- sd_DL_shadow->dv = dv_sd_DL_shadow;
-#ifdef __DPDU__
- sd->dPdu = dPdu_sd;
- sd_DL_shadow->dPdu = dPdu_sd_DL_shadow;
-
- sd->dPdv = dPdv_sd;
- sd_DL_shadow->dPdv = dPdv_sd_DL_shadow;
-#endif
-#endif
-
-#ifdef __OBJECT_MOTION__
- sd->ob_tfm = ob_tfm_sd;
- sd_DL_shadow->ob_tfm = ob_tfm_sd_DL_shadow;
-
- sd->ob_itfm = ob_itfm_sd;
- sd_DL_shadow->ob_itfm = ob_itfm_sd_DL_shadow;
-#endif
-
- sd->closure = closure_sd;
- sd_DL_shadow->closure = closure_sd_DL_shadow;
-
- sd->num_closure = num_closure_sd;
- sd_DL_shadow->num_closure = num_closure_sd_DL_shadow;
-
- sd->randb_closure = randb_closure_sd;
- sd_DL_shadow->randb_closure = randb_closure_sd_DL_shadow;
-
- sd->ray_P = ray_P_sd;
- sd_DL_shadow->ray_P = ray_P_sd_DL_shadow;
-
- sd->ray_dP = ray_dP_sd;
- sd_DL_shadow->ray_dP = ray_dP_sd_DL_shadow;
-
int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
#ifdef __WORK_STEALING__