From 8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 1 Nov 2017 21:02:28 +0100 Subject: Cycles: reduce closure memory usage for emission/shadow shader data. With a Titan Xp, reduces path trace local memory from 1092MB to 840MB. Benchmark performance was within 1% with both RX 480 and Titan Xp. Original patch was implemented by Sergey. Differential Revision: https://developer.blender.org/D2249 --- intern/cycles/kernel/closure/alloc.h | 17 +++++++---------- intern/cycles/kernel/kernel_bake.h | 10 +++++----- intern/cycles/kernel/kernel_emission.h | 7 ++----- intern/cycles/kernel/kernel_path.h | 16 +++++++++------- intern/cycles/kernel/kernel_path_branched.h | 20 +++++++++++--------- intern/cycles/kernel/kernel_shader.h | 13 +++++++------ intern/cycles/kernel/kernel_shadow.h | 3 ++- intern/cycles/kernel/kernel_subsurface.h | 4 ++-- intern/cycles/kernel/kernel_types.h | 7 ++++++- intern/cycles/kernel/kernel_volume.h | 4 ++-- intern/cycles/kernel/split/kernel_buffer_update.h | 7 ++++++- intern/cycles/kernel/split/kernel_direct_lighting.h | 11 ++++++++++- intern/cycles/kernel/split/kernel_do_volume.h | 4 ++-- ...el_holdout_emission_blurring_pathtermination_ao.h | 2 +- intern/cycles/kernel/split/kernel_path_init.h | 2 +- intern/cycles/kernel/split/kernel_shader_eval.h | 2 +- .../cycles/kernel/split/kernel_shadow_blocked_ao.h | 2 +- .../cycles/kernel/split/kernel_shadow_blocked_dl.h | 2 +- intern/cycles/kernel/split/kernel_split_data_types.h | 4 ++-- .../cycles/kernel/split/kernel_subsurface_scatter.h | 4 ++-- 20 files changed, 80 insertions(+), 61 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index e799855a65e..48a60405b5a 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -20,17 +20,16 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty { kernel_assert(size <= sizeof(ShaderClosure)); - int num_closure = sd->num_closure; - int num_closure_extra = sd->num_closure_extra; - if(num_closure + num_closure_extra >= MAX_CLOSURE) + if(sd->num_closure_left == 0) return NULL; - ShaderClosure *sc = &sd->closure[num_closure]; + ShaderClosure *sc = &sd->closure[sd->num_closure]; sc->type = type; sc->weight = weight; sd->num_closure++; + sd->num_closure_left--; return sc; } @@ -44,18 +43,16 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size) * This lets us keep the same fast array iteration over closures, as we * found linked list iteration and iteration with skipping to be slower. */ int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure)); - int num_closure = sd->num_closure; - int num_closure_extra = sd->num_closure_extra + num_extra; - if(num_closure + num_closure_extra > MAX_CLOSURE) { + if(num_extra > sd->num_closure_left) { /* Remove previous closure. */ sd->num_closure--; - sd->num_closure_extra++; + sd->num_closure_left++; return NULL; } - sd->num_closure_extra = num_closure_extra; - return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra); + sd->num_closure_left -= num_extra; + return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left); } ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight) diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 84d8d84d486..9ce10358b81 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg, path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL); /* evaluate surface shader */ - shader_eval_surface(kg, sd, &state, state.flag); + shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE); /* TODO, disable more closures we don't need besides transparent */ shader_bsdf_disable_transparency(kg, sd); @@ -239,12 +239,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg, } else { /* surface color of the pass only */ - shader_eval_surface(kg, sd, state, 0); + shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE); return kernel_bake_shader_bsdf(kg, sd, type); } } else { - shader_eval_surface(kg, sd, state, 0); + shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE); color = kernel_bake_shader_bsdf(kg, sd, type); } @@ -337,7 +337,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, { float3 N = sd.N; if((sd.flag & SD_HAS_BUMP)) { - shader_eval_surface(kg, &sd, &state, 0); + shader_eval_surface(kg, &sd, &state, 0, MAX_CLOSURE); N = shader_bsdf_average_normal(kg, &sd); } @@ -352,7 +352,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, } case SHADER_EVAL_EMISSION: { - shader_eval_surface(kg, &sd, &state, 0); + shader_eval_surface(kg, &sd, &state, 0, 0); out = shader_emissive_eval(kg, &sd); break; } diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 45b8c6311e1..94b0a37ce62 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -70,14 +70,11 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, /* no path flag, we're evaluating this for all closures. that's weak but * we'd have to do multiple evaluations otherwise */ path_state_modify_bounce(state, true); - shader_eval_surface(kg, emission_sd, state, 0); + shader_eval_surface(kg, emission_sd, state, 0, 0); path_state_modify_bounce(state, false); /* evaluate emissive closure */ - if(emission_sd->flag & SD_EMISSION) - eval = shader_emissive_eval(kg, emission_sd); - else - eval = make_float3(0.0f, 0.0f, 0.0f); + eval = shader_emissive_eval(kg, emission_sd); } eval *= ls->eval_fac; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 1099064038b..8519e0682e1 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, sd, &isect, ray); - shader_eval_surface(kg, sd, state, state->flag); + shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE); shader_prepare_closures(sd, state); /* Apply shadow catcher, holdout, emission. */ @@ -561,7 +561,7 @@ ccl_device_forceinline void kernel_path_integrate( bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); /* Find intersection with lamps and compute emission for MIS. */ - kernel_path_lamp_emission(kg, state, ray, throughput, &isect, emission_sd, L); + kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L); #ifdef __VOLUME__ /* Volume integration. */ @@ -585,7 +585,7 @@ ccl_device_forceinline void kernel_path_integrate( /* Shade background. */ if(!hit) { - kernel_path_background(kg, state, ray, throughput, emission_sd, L); + kernel_path_background(kg, state, ray, throughput, &sd, L); break; } else if(path_state_ao_bounce(kg, state)) { @@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate( /* Setup and evaluate shader. */ shader_setup_from_ray(kg, &sd, &isect, ray); - shader_eval_surface(kg, &sd, state, state->flag); + shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE); shader_prepare_closures(&sd, state); /* Apply shadow catcher, holdout, emission. */ @@ -706,9 +706,11 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, PathRadiance L; path_radiance_init(&L, kernel_data.film.use_light_pass); - ShaderData emission_sd; + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + PathState state; - path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray); + path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); /* Integrate. */ kernel_path_integrate(kg, @@ -717,7 +719,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, &ray, &L, buffer, - &emission_sd); + emission_sd); kernel_write_result(kg, buffer, sample, &L); } diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 3877e4f0058..f93366eade1 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -436,10 +436,12 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, /* shader data memory used for both volumes and surfaces, saves stack space */ ShaderData sd; /* shader data used by emission, shadows, volume stacks, indirect path */ - ShaderData emission_sd, indirect_sd; + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + ShaderData indirect_sd; PathState state; - path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray); + path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); /* Main Loop * Here we only handle transparency intersections from the camera ray. @@ -460,7 +462,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, &isect, hit, &indirect_sd, - &emission_sd, + emission_sd, L); #endif /* __VOLUME__ */ @@ -472,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, /* Setup and evaluate shader. */ shader_setup_from_ray(kg, &sd, &isect, &ray); - shader_eval_surface(kg, &sd, &state, state.flag); + shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE); shader_merge_closures(&sd); /* Apply shadow catcher, holdout, emission. */ @@ -481,7 +483,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, &state, &ray, throughput, - &emission_sd, + emission_sd, L, buffer)) { @@ -513,14 +515,14 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, throughput); + kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput); } #endif /* __AO__ */ #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object */ if(sd.flag & SD_BSSRDF) { - kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd, + kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput); } #endif /* __SUBSURFACE__ */ @@ -534,13 +536,13 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, int all = (kernel_data.integrator.sample_all_lights_direct) || (state.flag & PATH_RAY_SHADOW_CATCHER); kernel_branched_path_surface_connect_light(kg, - &sd, &emission_sd, &hit_state, throughput, 1.0f, L, all); + &sd, emission_sd, &hit_state, throughput, 1.0f, L, all); } #endif /* __EMISSION__ */ /* indirect light */ kernel_branched_path_surface_indirect_light(kg, - &sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, L); + &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L); /* continue in case of transparency */ throughput *= shader_bsdf_transparency(kg, &sd); diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 1ba37eda8cc..42f8737555e 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -955,10 +955,10 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) /* Surface Evaluation */ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, - ccl_addr_space PathState *state, int path_flag) + ccl_addr_space PathState *state, int path_flag, int max_closure) { sd->num_closure = 0; - sd->num_closure_extra = 0; + sd->num_closure_left = max_closure; #ifdef __OSL__ if(kg->osl) @@ -988,7 +988,7 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, int path_flag) { sd->num_closure = 0; - sd->num_closure_extra = 0; + sd->num_closure_left = 0; #ifdef __SVM__ # ifdef __OSL__ @@ -1129,12 +1129,13 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ccl_addr_space VolumeStack *stack, - int path_flag) + int path_flag, + int max_closure) { /* reset closures once at the start, we will be accumulating the closures * for all volumes in the stack into a single array of closures */ sd->num_closure = 0; - sd->num_closure_extra = 0; + sd->num_closure_left = max_closure; sd->flag = 0; sd->object_flag = 0; @@ -1184,7 +1185,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg, ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state) { sd->num_closure = 0; - sd->num_closure_extra = 0; + sd->num_closure_left = 0; /* this will modify sd->P */ #ifdef __SVM__ diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 8a0da6c3b13..ab364d3037a 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -86,7 +86,8 @@ ccl_device_forceinline bool shadow_handle_transparent_isect( shader_eval_surface(kg, shadow_sd, state, - PATH_RAY_SHADOW); + PATH_RAY_SHADOW, + 0); path_state_modify_bounce(state, false); *throughput *= shader_bsdf_transparency(kg, shadow_sd); } diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 23a09e5e2ca..6f75601d8c6 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -80,7 +80,7 @@ ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, const Shad { sd->flag &= ~SD_CLOSURE_FLAGS; sd->num_closure = 0; - sd->num_closure_extra = 0; + sd->num_closure_left = MAX_CLOSURE; if(hit) { Bssrdf *bssrdf = (Bssrdf *)sc; @@ -154,7 +154,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, if(bump || texture_blur > 0.0f) { /* average color and normal at incoming point */ - shader_eval_surface(kg, sd, state, state_flag); + shader_eval_surface(kg, sd, state, state_flag, MAX_CLOSURE); float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL); /* we simply divide out the average color and multiply with the average diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index cac3ef226f9..6d177816a98 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -984,7 +984,7 @@ typedef ccl_addr_space struct ShaderData { /* Closure data, we store a fixed array of closures */ int num_closure; - int num_closure_extra; + int num_closure_left; float randb_closure; float3 svm_closure_weight; @@ -997,6 +997,11 @@ typedef ccl_addr_space struct ShaderData { struct ShaderClosure closure[MAX_CLOSURE]; } ShaderData; +typedef ccl_addr_space struct ShaderDataTinyStorage { + char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE]; +} ShaderDataTinyStorage; +#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData*)shader_data_tiny_storage) + /* Path State */ #ifdef __VOLUME__ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 89af16aa9be..fb3c5437275 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -43,7 +43,7 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, float3 *extinction) { sd->P = P; - shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW); + shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, 0); if(sd->flag & SD_EXTINCTION) { *extinction = sd->closure_transparent_extinction; @@ -62,7 +62,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, VolumeShaderCoefficients *coeff) { sd->P = P; - shader_eval_volume(kg, sd, state, state->volume_stack, state->flag); + shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, MAX_CLOSURE); if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION))) return false; diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 511334e0550..180c0b57077 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -122,7 +122,12 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, */ *throughput = make_float3(1.0f, 1.0f, 1.0f); path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng_hash, sample, ray); + path_state_init(kg, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + state, + rng_hash, + sample, + ray); #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index 2aac66ecb84..832b0e5b265 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -98,7 +98,16 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, BsdfEval L_light; bool is_lamp; - if(direct_emission(kg, sd, &kernel_split_state.sd_DL_shadow[ray_index], &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + if(direct_emission(kg, + sd, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + &ls, + state, + &light_ray, + &L_light, + &is_lamp, + terminate)) + { /* Write intermediate data to global memory to access from * the next kernel. */ diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 491487f1230..02881da6c04 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -31,7 +31,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K ShaderData *sd = &kernel_split_state.sd[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); /* GPU: no decoupled ray marching, scatter probalistically */ int num_samples = kernel_data.integrator.volume_samples; @@ -141,7 +141,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 906bad8ceb6..bc8ca3aa3ca 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -101,7 +101,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; throughput = kernel_split_state.throughput[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index 5ad62b585fe..fdd54225b07 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -64,7 +64,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); path_state_init(kg, - &kernel_split_state.sd_DL_shadow[ray_index], + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), &kernel_split_state.path_state[ray_index], rng_hash, sample, diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 7032461b04a..22602537524 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -50,7 +50,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag); + shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE); #ifdef __BRANCHED_PATH__ if(kernel_data.integrator.branched) { shader_merge_closures(&kernel_split_state.sd[ray_index]); diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index 79aa2c9435b..b50de615fc8 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -34,7 +34,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) } ShaderData *sd = &kernel_split_state.sd[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; float3 throughput = kernel_split_state.throughput[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index b52f9a5eb81..9a6bdfbdffe 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -47,7 +47,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) float3 throughput = kernel_split_state.throughput[ray_index]; BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); bool is_lamp = kernel_split_state.is_lamp[ray_index]; # if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__) diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index b0e6e5f5250..d3464fede41 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -111,7 +111,7 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ @@ -127,7 +127,7 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index 3b957856aea..8d774c020ee 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -39,7 +39,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it ShaderData *sd = &branched_state->sd; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; @@ -229,7 +229,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); if(sd->flag & SD_BSSRDF) { -- cgit v1.2.3