diff options
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 351 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shaderdata_vars.h | 94 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 97 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_data_init.cl | 147 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_data_init.h | 166 |
7 files changed, 100 insertions, 758 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index e612b9f581f..c41da549c61 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -1908,59 +1908,6 @@ public: * shadow_blocked kernel. */ - /* Global buffers of each member of ShaderData. */ - cl_mem P_sd; - cl_mem P_sd_DL_shadow; - cl_mem N_sd; - cl_mem N_sd_DL_shadow; - cl_mem Ng_sd; - cl_mem Ng_sd_DL_shadow; - cl_mem I_sd; - cl_mem I_sd_DL_shadow; - cl_mem shader_sd; - cl_mem shader_sd_DL_shadow; - cl_mem flag_sd; - cl_mem flag_sd_DL_shadow; - cl_mem prim_sd; - cl_mem prim_sd_DL_shadow; - cl_mem type_sd; - cl_mem type_sd_DL_shadow; - cl_mem u_sd; - cl_mem u_sd_DL_shadow; - cl_mem v_sd; - cl_mem v_sd_DL_shadow; - cl_mem object_sd; - cl_mem object_sd_DL_shadow; - cl_mem time_sd; - cl_mem time_sd_DL_shadow; - cl_mem ray_length_sd; - cl_mem ray_length_sd_DL_shadow; - - /* Ray differentials. */ - cl_mem dP_sd, dI_sd; - cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow; - cl_mem du_sd, dv_sd; - cl_mem du_sd_DL_shadow, dv_sd_DL_shadow; - - /* Dp/Du */ - cl_mem dPdu_sd, dPdv_sd; - cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow; - - /* Object motion. */ - cl_mem ob_tfm_sd, ob_itfm_sd; - cl_mem ob_tfm_sd_DL_shadow, ob_itfm_sd_DL_shadow; - - cl_mem closure_sd; - cl_mem closure_sd_DL_shadow; - cl_mem num_closure_sd; - cl_mem num_closure_sd_DL_shadow; - cl_mem randb_closure_sd; - cl_mem randb_closure_sd_DL_shadow; - cl_mem ray_P_sd; - cl_mem ray_P_sd_DL_shadow; - cl_mem ray_dP_sd; - cl_mem ray_dP_sd_DL_shadow; - /* Global memory required for shadow blocked and accum_radiance. */ cl_mem BSDFEval_coop; cl_mem ISLamp_coop; @@ -2057,66 +2004,6 @@ public: sd = NULL; sd_DL_shadow = NULL; - P_sd = NULL; - P_sd_DL_shadow = NULL; - N_sd = NULL; - N_sd_DL_shadow = NULL; - Ng_sd = NULL; - Ng_sd_DL_shadow = NULL; - I_sd = NULL; - I_sd_DL_shadow = NULL; - shader_sd = NULL; - shader_sd_DL_shadow = NULL; - flag_sd = NULL; - flag_sd_DL_shadow = NULL; - prim_sd = NULL; - prim_sd_DL_shadow = NULL; - type_sd = NULL; - type_sd_DL_shadow = NULL; - u_sd = NULL; - u_sd_DL_shadow = NULL; - v_sd = NULL; - v_sd_DL_shadow = NULL; - object_sd = NULL; - object_sd_DL_shadow = NULL; - time_sd = NULL; - time_sd_DL_shadow = NULL; - ray_length_sd = NULL; - ray_length_sd_DL_shadow = NULL; - - /* Ray differentials. */ - dP_sd = NULL; - dI_sd = NULL; - dP_sd_DL_shadow = NULL; - dI_sd_DL_shadow = NULL; - du_sd = NULL; - dv_sd = NULL; - du_sd_DL_shadow = NULL; - dv_sd_DL_shadow = NULL; - - /* Dp/Du */ - dPdu_sd = NULL; - dPdv_sd = NULL; - dPdu_sd_DL_shadow = NULL; - dPdv_sd_DL_shadow = NULL; - - /* Object motion. */ - ob_tfm_sd = NULL; - ob_itfm_sd = NULL; - ob_tfm_sd_DL_shadow = NULL; - ob_itfm_sd_DL_shadow = NULL; - - closure_sd = NULL; - closure_sd_DL_shadow = NULL; - num_closure_sd = NULL; - num_closure_sd_DL_shadow = NULL; - randb_closure_sd = NULL; - randb_closure_sd_DL_shadow = NULL; - ray_P_sd = NULL; - ray_P_sd_DL_shadow = NULL; - ray_dP_sd = NULL; - ray_dP_sd_DL_shadow = NULL; - rng_coop = NULL; throughput_coop = NULL; L_transparent_coop = NULL; @@ -2232,17 +2119,10 @@ public: return ret_size; } - size_t get_shader_closure_size(int max_closure) - { - return (sizeof(ShaderClosure) * max_closure); - } - - size_t get_shader_data_size(size_t shader_closure_size) + size_t get_shader_data_size(size_t max_closure) { - /* ShaderData size without accounting for ShaderClosure array. */ - size_t shader_data_size = - sizeof(ShaderData) - (sizeof(ShaderClosure) * MAX_CLOSURE); - return (shader_data_size + shader_closure_size); + /* ShaderData size with variable size ShaderClosure array */ + return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure)); } /* Returns size of KernelGlobals structure associated with OpenCL. */ @@ -2264,20 +2144,6 @@ public: return sizeof(KernelGlobals); } - /* Returns size of Structure of arrays implementation of. */ - size_t get_shaderdata_soa_size() - { - size_t shader_soa_size = 0; - -#define SD_VAR(type, what) shader_soa_size += sizeof(void *); -#define SD_CLOSURE_VAR(type, what, max_closure) shader_soa_size += sizeof(void *); - #include "kernel_shaderdata_vars.h" -#undef SD_VAR -#undef SD_CLOSURE_VAR - - return shader_soa_size; - } - bool load_kernels(const DeviceRequestedFeatures& requested_features) { /* Get Shader, bake and film_convert kernels. @@ -2398,66 +2264,6 @@ public: release_kernel_safe(ckPathTraceKernel_sum_all_radiance); /* Release global memory */ - release_mem_object_safe(P_sd); - release_mem_object_safe(P_sd_DL_shadow); - release_mem_object_safe(N_sd); - release_mem_object_safe(N_sd_DL_shadow); - release_mem_object_safe(Ng_sd); - release_mem_object_safe(Ng_sd_DL_shadow); - release_mem_object_safe(I_sd); - release_mem_object_safe(I_sd_DL_shadow); - release_mem_object_safe(shader_sd); - release_mem_object_safe(shader_sd_DL_shadow); - release_mem_object_safe(flag_sd); - release_mem_object_safe(flag_sd_DL_shadow); - release_mem_object_safe(prim_sd); - release_mem_object_safe(prim_sd_DL_shadow); - release_mem_object_safe(type_sd); - release_mem_object_safe(type_sd_DL_shadow); - release_mem_object_safe(u_sd); - release_mem_object_safe(u_sd_DL_shadow); - release_mem_object_safe(v_sd); - release_mem_object_safe(v_sd_DL_shadow); - release_mem_object_safe(object_sd); - release_mem_object_safe(object_sd_DL_shadow); - release_mem_object_safe(time_sd); - release_mem_object_safe(time_sd_DL_shadow); - release_mem_object_safe(ray_length_sd); - release_mem_object_safe(ray_length_sd_DL_shadow); - - /* Ray differentials. */ - release_mem_object_safe(dP_sd); - release_mem_object_safe(dP_sd_DL_shadow); - release_mem_object_safe(dI_sd); - release_mem_object_safe(dI_sd_DL_shadow); - release_mem_object_safe(du_sd); - release_mem_object_safe(du_sd_DL_shadow); - release_mem_object_safe(dv_sd); - release_mem_object_safe(dv_sd_DL_shadow); - - /* Dp/Du */ - release_mem_object_safe(dPdu_sd); - release_mem_object_safe(dPdu_sd_DL_shadow); - release_mem_object_safe(dPdv_sd); - release_mem_object_safe(dPdv_sd_DL_shadow); - - /* Object motion. */ - release_mem_object_safe(ob_tfm_sd); - release_mem_object_safe(ob_itfm_sd); - - release_mem_object_safe(ob_tfm_sd_DL_shadow); - release_mem_object_safe(ob_itfm_sd_DL_shadow); - - release_mem_object_safe(closure_sd); - release_mem_object_safe(closure_sd_DL_shadow); - release_mem_object_safe(num_closure_sd); - release_mem_object_safe(num_closure_sd_DL_shadow); - release_mem_object_safe(randb_closure_sd); - release_mem_object_safe(randb_closure_sd_DL_shadow); - release_mem_object_safe(ray_P_sd); - release_mem_object_safe(ray_P_sd_DL_shadow); - release_mem_object_safe(ray_dP_sd); - release_mem_object_safe(ray_dP_sd_DL_shadow); release_mem_object_safe(rng_coop); release_mem_object_safe(throughput_coop); release_mem_object_safe(L_transparent_coop); @@ -2572,7 +2378,7 @@ public: /* TODO(sergey): This will actually over-allocate if * particular kernel does not support multiclosure. */ - size_t ShaderClosure_size = get_shader_closure_size(current_max_closure); + size_t shaderdata_size = get_shader_data_size(current_max_closure); #ifdef __WORK_STEALING__ /* Calculate max groups */ @@ -2593,67 +2399,8 @@ public: kgbuffer = mem_alloc(get_KernelGlobals_size()); /* Create global buffers for ShaderData. */ - sd = mem_alloc(get_shaderdata_soa_size()); - sd_DL_shadow = mem_alloc(get_shaderdata_soa_size()); - P_sd = mem_alloc(num_global_elements * sizeof(float3)); - P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - N_sd = mem_alloc(num_global_elements * sizeof(float3)); - N_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - Ng_sd = mem_alloc(num_global_elements * sizeof(float3)); - Ng_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - I_sd = mem_alloc(num_global_elements * sizeof(float3)); - I_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - shader_sd = mem_alloc(num_global_elements * sizeof(int)); - shader_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - flag_sd = mem_alloc(num_global_elements * sizeof(int)); - flag_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - prim_sd = mem_alloc(num_global_elements * sizeof(int)); - prim_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - type_sd = mem_alloc(num_global_elements * sizeof(int)); - type_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - u_sd = mem_alloc(num_global_elements * sizeof(float)); - u_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - v_sd = mem_alloc(num_global_elements * sizeof(float)); - v_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - object_sd = mem_alloc(num_global_elements * sizeof(int)); - object_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - time_sd = mem_alloc(num_global_elements * sizeof(float)); - time_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - ray_length_sd = mem_alloc(num_global_elements * sizeof(float)); - ray_length_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - - /* Ray differentials. */ - dP_sd = mem_alloc(num_global_elements * sizeof(differential3)); - dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3)); - dI_sd = mem_alloc(num_global_elements * sizeof(differential3)); - dI_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3)); - du_sd = mem_alloc(num_global_elements * sizeof(differential)); - du_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential)); - dv_sd = mem_alloc(num_global_elements * sizeof(differential)); - dv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential)); - - /* Dp/Du */ - dPdu_sd = mem_alloc(num_global_elements * sizeof(float3)); - dPdu_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - dPdv_sd = mem_alloc(num_global_elements * sizeof(float3)); - dPdv_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - - /* Object motion. */ - ob_tfm_sd = mem_alloc(num_global_elements * sizeof(Transform)); - ob_tfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform)); - ob_itfm_sd = mem_alloc(num_global_elements * sizeof(Transform)); - ob_itfm_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(Transform)); - - closure_sd = mem_alloc(num_global_elements * ShaderClosure_size); - closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * ShaderClosure_size); - num_closure_sd = mem_alloc(num_global_elements * sizeof(int)); - num_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(int)); - randb_closure_sd = mem_alloc(num_global_elements * sizeof(float)); - randb_closure_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float)); - ray_P_sd = mem_alloc(num_global_elements * sizeof(float3)); - ray_P_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(float3)); - ray_dP_sd = mem_alloc(num_global_elements * sizeof(differential3)); - ray_dP_sd_DL_shadow = mem_alloc(num_global_elements * 2 * sizeof(differential3)); + sd = mem_alloc(num_global_elements * shaderdata_size); + sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size); /* Creation of global memory buffers which are shared among * the kernels. @@ -2694,79 +2441,7 @@ public: kernel_set_args(ckPathTraceKernel_data_init, 0, kgbuffer, - sd, sd_DL_shadow, - P_sd, - P_sd_DL_shadow, - N_sd, - N_sd_DL_shadow, - Ng_sd, - Ng_sd_DL_shadow, - I_sd, - I_sd_DL_shadow, - shader_sd, - shader_sd_DL_shadow, - flag_sd, - flag_sd_DL_shadow, - prim_sd, - prim_sd_DL_shadow, - type_sd, - type_sd_DL_shadow, - u_sd, - u_sd_DL_shadow, - v_sd, - v_sd_DL_shadow, - object_sd, - object_sd_DL_shadow, - time_sd, - time_sd_DL_shadow, - ray_length_sd, - ray_length_sd_DL_shadow); - - /* Ray differentials. */ - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - dP_sd, - dP_sd_DL_shadow, - dI_sd, - dI_sd_DL_shadow, - du_sd, - du_sd_DL_shadow, - dv_sd, - dv_sd_DL_shadow); - - /* Dp/Du */ - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - dPdu_sd, - dPdu_sd_DL_shadow, - dPdv_sd, - dPdv_sd_DL_shadow); - - /* Object motion. */ - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - ob_tfm_sd, - ob_tfm_sd_DL_shadow, - ob_itfm_sd, - ob_itfm_sd_DL_shadow); - - start_arg_index += - kernel_set_args(ckPathTraceKernel_data_init, - start_arg_index, - closure_sd, - closure_sd_DL_shadow, - num_closure_sd, - num_closure_sd_DL_shadow, - randb_closure_sd, - randb_closure_sd_DL_shadow, - ray_P_sd, - ray_P_sd_DL_shadow, - ray_dP_sd, - ray_dP_sd_DL_shadow, d_data, per_sample_output_buffers, d_rng_state, @@ -3132,16 +2807,12 @@ public: { size_t total_invariable_mem_allocated = 0; size_t KernelGlobals_size = 0; - size_t ShaderData_SOA_size = 0; KernelGlobals_size = get_KernelGlobals_size(); - ShaderData_SOA_size = get_shaderdata_soa_size(); total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */ total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */ total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */ - total_invariable_mem_allocated += ShaderData_SOA_size; /* sd size */ - total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_DL_shadow size */ return total_invariable_mem_allocated; } @@ -3208,13 +2879,11 @@ public: /* Calculate the memory required for one thread in split kernel. */ size_t get_per_thread_memory() { - size_t shader_closure_size = 0; - size_t shaderdata_volume = 0; - shader_closure_size = get_shader_closure_size(current_max_closure); + size_t shaderdata_size = 0; /* TODO(sergey): This will actually over-allocate if * particular kernel does not support multiclosure. */ - shaderdata_volume = get_shader_data_size(shader_closure_size); + shaderdata_size = get_shader_data_size(current_max_closure); size_t retval = sizeof(RNG) + sizeof(float3) /* Throughput size */ + sizeof(float) /* L transparent size */ @@ -3225,8 +2894,8 @@ public: + sizeof(Intersection) /* Overall isect */ + sizeof(Intersection) /* Instersection_coop_AO */ + sizeof(Intersection) /* Intersection coop DL */ - + shaderdata_volume /* Overall ShaderData */ - + (shaderdata_volume * 2) /* ShaderData : DL and shadow */ + + shaderdata_size /* Overall ShaderData */ + + (shaderdata_size * 2) /* ShaderData : DL and shadow */ + sizeof(Ray) + sizeof(BsdfEval) + sizeof(float3) /* AOAlpha size */ + sizeof(float3) /* AOBSDF size */ diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 5249416e838..b5b0b1f3fae 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -55,7 +55,6 @@ set(SRC_HEADERS kernel_queues.h kernel_random.h kernel_shader.h - kernel_shaderdata_vars.h kernel_shadow.h kernel_subsurface.h kernel_textures.h diff --git a/intern/cycles/kernel/kernel_shaderdata_vars.h b/intern/cycles/kernel/kernel_shaderdata_vars.h deleted file mode 100644 index 3f63f94912e..00000000000 --- a/intern/cycles/kernel/kernel_shaderdata_vars.h +++ /dev/null @@ -1,94 +0,0 @@ -/* -* Copyright 2011-2015 Blender Foundation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#ifndef SD_VAR -#define SD_VAR(type, what) -#endif -#ifndef SD_CLOSURE_VAR -#define SD_CLOSURE_VAR(type, what, max_closure) -#endif - -/* position */ -SD_VAR(float3, P) -/* smooth normal for shading */ -SD_VAR(float3, N) -/* true geometric normal */ -SD_VAR(float3, Ng) -/* view/incoming direction */ -SD_VAR(float3, I) -/* shader id */ -SD_VAR(int, shader) -/* booleans describing shader, see ShaderDataFlag */ -SD_VAR(int, flag) - -/* primitive id if there is one, ~0 otherwise */ -SD_VAR(int, prim) - -/* combined type and curve segment for hair */ -SD_VAR(int, type) - -/* parametric coordinates -* - barycentric weights for triangles */ -SD_VAR(float, u) -SD_VAR(float, v) -/* object id if there is one, ~0 otherwise */ -SD_VAR(int, object) - -/* motion blur sample time */ -SD_VAR(float, time) - -/* length of the ray being shaded */ -SD_VAR(float, ray_length) - -#ifdef __RAY_DIFFERENTIALS__ -/* differential of P. these are orthogonal to Ng, not N */ -SD_VAR(differential3, dP) -/* differential of I */ -SD_VAR(differential3, dI) -/* differential of u, v */ -SD_VAR(differential, du) -SD_VAR(differential, dv) -#endif -#ifdef __DPDU__ -/* differential of P w.r.t. parametric coordinates. note that dPdu is -* not readily suitable as a tangent for shading on triangles. */ -SD_VAR(float3, dPdu) -SD_VAR(float3, dPdv) -#endif - -#ifdef __OBJECT_MOTION__ -/* object <-> world space transformations, cached to avoid -* re-interpolating them constantly for shading */ -SD_VAR(Transform, ob_tfm) -SD_VAR(Transform, ob_itfm) -#endif - -/* Closure data, we store a fixed array of closures */ -SD_CLOSURE_VAR(ShaderClosure, closure, MAX_CLOSURE) -SD_VAR(int, num_closure) -SD_VAR(float, randb_closure) - -/* ray start position, only set for backgrounds */ -SD_VAR(float3, ray_P) -SD_VAR(differential3, ray_dP) - -#ifdef __OSL__ -SD_VAR(struct KernelGlobals *, osl_globals) -SD_VAR(struct PathState *, osl_path_state) -#endif - -#undef SD_VAR -#undef SD_CLOSURE_VAR diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index aefa5d5be85..3b1111e5069 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -201,7 +201,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, #endif #ifdef __SPLIT_KERNEL__ - Intersection *isect = &kg->isect_shadow[TIDX]; + Intersection *isect = &kg->isect_shadow[SD_THREAD]; #else Intersection isect_object; Intersection *isect = &isect_object; diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index da2416bc09b..49d8859c7de 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -718,22 +718,99 @@ enum ShaderDataFlag { struct KernelGlobals; #ifdef __SPLIT_KERNEL__ -#define SD_VAR(type, what) ccl_global type *what; -#define SD_CLOSURE_VAR(type, what, max_closure) type *what; -#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0)) -#define ccl_fetch(s, t) (s->t[TIDX]) -#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index]) +# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0)) +# if defined(__SPLIT_KERNEL_AOS__) + /* ShaderData is stored as an Array-of-Structures */ +# define ccl_fetch(s, t) (s[SD_THREAD].t) +# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].t[index]) +# else + /* ShaderData is stored as an Structure-of-Arrays */ +# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1)) +# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t) +# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0) +# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(t) + SD_FIELD_SIZE(t) * SD_THREAD - SD_OFFSETOF(t)))->t) +# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index]) +# endif #else -#define SD_VAR(type, what) type what; -#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure]; -#define ccl_fetch(s, t) (s->t) -#define ccl_fetch_array(s, t, index) (&s->t[index]) +# define ccl_fetch(s, t) (s->t) +# define ccl_fetch_array(s, t, index) (&s->t[index]) #endif typedef ccl_addr_space struct ShaderData { + /* position */ + float3 P; + /* smooth normal for shading */ + float3 N; + /* true geometric normal */ + float3 Ng; + /* view/incoming direction */ + float3 I; + /* shader id */ + int shader; + /* booleans describing shader, see ShaderDataFlag */ + int flag; + + /* primitive id if there is one, ~0 otherwise */ + int prim; + + /* combined type and curve segment for hair */ + int type; + + /* parametric coordinates + * - barycentric weights for triangles */ + float u; + float v; + /* object id if there is one, ~0 otherwise */ + int object; + + /* motion blur sample time */ + float time; + + /* length of the ray being shaded */ + float ray_length; -#include "kernel_shaderdata_vars.h" + /* ray bounce depth */ + int ray_depth; + /* ray transparent depth */ + int transparent_depth; + +#ifdef __RAY_DIFFERENTIALS__ + /* differential of P. these are orthogonal to Ng, not N */ + differential3 dP; + /* differential of I */ + differential3 dI; + /* differential of u, v */ + differential du; + differential dv; +#endif +#ifdef __DPDU__ + /* differential of P w.r.t. parametric coordinates. note that dPdu is + * not readily suitable as a tangent for shading on triangles. */ + float3 dPdu; + float3 dPdv; +#endif + +#ifdef __OBJECT_MOTION__ + /* object <-> world space transformations, cached to avoid + * re-interpolating them constantly for shading */ + Transform ob_tfm; + Transform ob_itfm; +#endif + + /* Closure data, we store a fixed array of closures */ + struct ShaderClosure closure[MAX_CLOSURE]; + int num_closure; + float randb_closure; + + /* ray start position, only set for backgrounds */ + float3 ray_P; + differential3 ray_dP; + +#ifdef __OSL__ + struct KernelGlobals * osl_globals; + struct PathState *osl_path_state; +#endif } ShaderData; /* Path State */ diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index 401c4467afa..18139687eab 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -17,91 +17,8 @@ #include "split/kernel_data_init.h" __kernel void kernel_ocl_path_trace_data_init( - ccl_global char *kg, - ccl_global char *sd, + ccl_global char *globals, ccl_global char *sd_DL_shadow, - - ccl_global float3 *P_sd, - ccl_global float3 *P_sd_DL_shadow, - - ccl_global float3 *N_sd, - ccl_global float3 *N_sd_DL_shadow, - - ccl_global float3 *Ng_sd, - ccl_global float3 *Ng_sd_DL_shadow, - - ccl_global float3 *I_sd, - ccl_global float3 *I_sd_DL_shadow, - - ccl_global int *shader_sd, - ccl_global int *shader_sd_DL_shadow, - - ccl_global int *flag_sd, - ccl_global int *flag_sd_DL_shadow, - - ccl_global int *prim_sd, - ccl_global int *prim_sd_DL_shadow, - - ccl_global int *type_sd, - ccl_global int *type_sd_DL_shadow, - - ccl_global float *u_sd, - ccl_global float *u_sd_DL_shadow, - - ccl_global float *v_sd, - ccl_global float *v_sd_DL_shadow, - - ccl_global int *object_sd, - ccl_global int *object_sd_DL_shadow, - - ccl_global float *time_sd, - ccl_global float *time_sd_DL_shadow, - - ccl_global float *ray_length_sd, - ccl_global float *ray_length_sd_DL_shadow, - - /* Ray differentials. */ - ccl_global differential3 *dP_sd, - ccl_global differential3 *dP_sd_DL_shadow, - - ccl_global differential3 *dI_sd, - ccl_global differential3 *dI_sd_DL_shadow, - - ccl_global differential *du_sd, - ccl_global differential *du_sd_DL_shadow, - - ccl_global differential *dv_sd, - ccl_global differential *dv_sd_DL_shadow, - - /* Dp/Du */ - ccl_global float3 *dPdu_sd, - ccl_global float3 *dPdu_sd_DL_shadow, - - ccl_global float3 *dPdv_sd, - ccl_global float3 *dPdv_sd_DL_shadow, - - /* Object motion. */ - ccl_global Transform *ob_tfm_sd, - ccl_global Transform *ob_tfm_sd_DL_shadow, - - ccl_global Transform *ob_itfm_sd, - ccl_global Transform *ob_itfm_sd_DL_shadow, - - ShaderClosure *closure_sd, - ShaderClosure *closure_sd_DL_shadow, - - ccl_global int *num_closure_sd, - ccl_global int *num_closure_sd_DL_shadow, - - ccl_global float *randb_closure_sd, - ccl_global float *randb_closure_sd_DL_shadow, - - ccl_global float3 *ray_P_sd, - ccl_global float3 *ray_P_sd_DL_shadow, - - ccl_global differential3 *ray_dP_sd, - ccl_global differential3 *ray_dP_sd_DL_shadow, - ccl_constant KernelData *data, ccl_global float *per_sample_output_buffers, ccl_global uint *rng_state, @@ -136,68 +53,8 @@ __kernel void kernel_ocl_path_trace_data_init( #endif int parallel_samples) /* Number of samples to be processed in parallel */ { - kernel_data_init((KernelGlobals *)kg, - (ShaderData *)sd, + kernel_data_init((KernelGlobals *)globals, (ShaderData *)sd_DL_shadow, - P_sd, - P_sd_DL_shadow, - N_sd, - N_sd_DL_shadow, - Ng_sd, - Ng_sd_DL_shadow, - I_sd, - I_sd_DL_shadow, - shader_sd, - shader_sd_DL_shadow, - flag_sd, - flag_sd_DL_shadow, - prim_sd, - prim_sd_DL_shadow, - type_sd, - type_sd_DL_shadow, - u_sd, - u_sd_DL_shadow, - v_sd, - v_sd_DL_shadow, - object_sd, - object_sd_DL_shadow, - time_sd, - time_sd_DL_shadow, - ray_length_sd, - ray_length_sd_DL_shadow, - - /* Ray differentials. */ - dP_sd, - dP_sd_DL_shadow, - dI_sd, - dI_sd_DL_shadow, - du_sd, - du_sd_DL_shadow, - dv_sd, - dv_sd_DL_shadow, - - /* Dp/Du */ - dPdu_sd, - dPdu_sd_DL_shadow, - dPdv_sd, - dPdv_sd_DL_shadow, - - /* Object motion. */ - ob_tfm_sd, - ob_tfm_sd_DL_shadow, - ob_itfm_sd, - ob_itfm_sd_DL_shadow, - - closure_sd, - closure_sd_DL_shadow, - num_closure_sd, - num_closure_sd_DL_shadow, - randb_closure_sd, - randb_closure_sd_DL_shadow, - ray_P_sd, - ray_P_sd_DL_shadow, - ray_dP_sd, - ray_dP_sd_DL_shadow, data, per_sample_output_buffers, rng_state, diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 6993a78a789..9891391a3a3 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -52,90 +52,7 @@ */ ccl_device void kernel_data_init( KernelGlobals *kg, - ShaderData *sd, ShaderData *sd_DL_shadow, - - ccl_global float3 *P_sd, - ccl_global float3 *P_sd_DL_shadow, - - ccl_global float3 *N_sd, - ccl_global float3 *N_sd_DL_shadow, - - ccl_global float3 *Ng_sd, - ccl_global float3 *Ng_sd_DL_shadow, - - ccl_global float3 *I_sd, - ccl_global float3 *I_sd_DL_shadow, - - ccl_global int *shader_sd, - ccl_global int *shader_sd_DL_shadow, - - ccl_global int *flag_sd, - ccl_global int *flag_sd_DL_shadow, - - ccl_global int *prim_sd, - ccl_global int *prim_sd_DL_shadow, - - ccl_global int *type_sd, - ccl_global int *type_sd_DL_shadow, - - ccl_global float *u_sd, - ccl_global float *u_sd_DL_shadow, - - ccl_global float *v_sd, - ccl_global float *v_sd_DL_shadow, - - ccl_global int *object_sd, - ccl_global int *object_sd_DL_shadow, - - ccl_global float *time_sd, - ccl_global float *time_sd_DL_shadow, - - ccl_global float *ray_length_sd, - ccl_global float *ray_length_sd_DL_shadow, - - /* Ray differentials. */ - ccl_global differential3 *dP_sd, - ccl_global differential3 *dP_sd_DL_shadow, - - ccl_global differential3 *dI_sd, - ccl_global differential3 *dI_sd_DL_shadow, - - ccl_global differential *du_sd, - ccl_global differential *du_sd_DL_shadow, - - ccl_global differential *dv_sd, - ccl_global differential *dv_sd_DL_shadow, - - /* Dp/Du */ - ccl_global float3 *dPdu_sd, - ccl_global float3 *dPdu_sd_DL_shadow, - - ccl_global float3 *dPdv_sd, - ccl_global float3 *dPdv_sd_DL_shadow, - - /* Object motion. */ - ccl_global Transform *ob_tfm_sd, - ccl_global Transform *ob_tfm_sd_DL_shadow, - - ccl_global Transform *ob_itfm_sd, - ccl_global Transform *ob_itfm_sd_DL_shadow, - - ShaderClosure *closure_sd, - ShaderClosure *closure_sd_DL_shadow, - - ccl_global int *num_closure_sd, - ccl_global int *num_closure_sd_DL_shadow, - - ccl_global float *randb_closure_sd, - ccl_global float *randb_closure_sd_DL_shadow, - - ccl_global float3 *ray_P_sd, - ccl_global float3 *ray_P_sd_DL_shadow, - - ccl_global differential3 *ray_dP_sd, - ccl_global differential3 *ray_dP_sd_DL_shadow, - ccl_constant KernelData *data, ccl_global float *per_sample_output_buffers, ccl_global uint *rng_state, @@ -177,89 +94,6 @@ ccl_device void kernel_data_init( kg->name = name; #include "../kernel_textures.h" - sd->P = P_sd; - sd_DL_shadow->P = P_sd_DL_shadow; - - sd->N = N_sd; - sd_DL_shadow->N = N_sd_DL_shadow; - - sd->Ng = Ng_sd; - sd_DL_shadow->Ng = Ng_sd_DL_shadow; - - sd->I = I_sd; - sd_DL_shadow->I = I_sd_DL_shadow; - - sd->shader = shader_sd; - sd_DL_shadow->shader = shader_sd_DL_shadow; - - sd->flag = flag_sd; - sd_DL_shadow->flag = flag_sd_DL_shadow; - - sd->prim = prim_sd; - sd_DL_shadow->prim = prim_sd_DL_shadow; - - sd->type = type_sd; - sd_DL_shadow->type = type_sd_DL_shadow; - - sd->u = u_sd; - sd_DL_shadow->u = u_sd_DL_shadow; - - sd->v = v_sd; - sd_DL_shadow->v = v_sd_DL_shadow; - - sd->object = object_sd; - sd_DL_shadow->object = object_sd_DL_shadow; - - sd->time = time_sd; - sd_DL_shadow->time = time_sd_DL_shadow; - - sd->ray_length = ray_length_sd; - sd_DL_shadow->ray_length = ray_length_sd_DL_shadow; - -#ifdef __RAY_DIFFERENTIALS__ - sd->dP = dP_sd; - sd_DL_shadow->dP = dP_sd_DL_shadow; - - sd->dI = dI_sd; - sd_DL_shadow->dI = dI_sd_DL_shadow; - - sd->du = du_sd; - sd_DL_shadow->du = du_sd_DL_shadow; - - sd->dv = dv_sd; - sd_DL_shadow->dv = dv_sd_DL_shadow; -#ifdef __DPDU__ - sd->dPdu = dPdu_sd; - sd_DL_shadow->dPdu = dPdu_sd_DL_shadow; - - sd->dPdv = dPdv_sd; - sd_DL_shadow->dPdv = dPdv_sd_DL_shadow; -#endif -#endif - -#ifdef __OBJECT_MOTION__ - sd->ob_tfm = ob_tfm_sd; - sd_DL_shadow->ob_tfm = ob_tfm_sd_DL_shadow; - - sd->ob_itfm = ob_itfm_sd; - sd_DL_shadow->ob_itfm = ob_itfm_sd_DL_shadow; -#endif - - sd->closure = closure_sd; - sd_DL_shadow->closure = closure_sd_DL_shadow; - - sd->num_closure = num_closure_sd; - sd_DL_shadow->num_closure = num_closure_sd_DL_shadow; - - sd->randb_closure = randb_closure_sd; - sd_DL_shadow->randb_closure = randb_closure_sd_DL_shadow; - - sd->ray_P = ray_P_sd; - sd_DL_shadow->ray_P = ray_P_sd_DL_shadow; - - sd->ray_dP = ray_dP_sd; - sd_DL_shadow->ray_dP = ray_dP_sd_DL_shadow; - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); #ifdef __WORK_STEALING__ |