diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2016-05-22 23:35:47 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2016-05-23 23:29:24 +0300 |
commit | 999d5a67852b5958b9361c9888734ebc889e4a22 (patch) | |
tree | 5f3c5ad0409c77fc6ae3486420b3888fa1e2fea8 /intern | |
parent | af4a04eae07184f7437a8c51858a4ddb8a2e3e4c (diff) |
Cycles CUDA: reduce stack memory by reusing ShaderData.
57% less for path and 48% less for branched path.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 21 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_emission.h | 41 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 77 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_branched.h | 62 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_state.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_surface.h | 22 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_volume.h | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 46 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_volume.h | 57 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_background_buffer_update.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_data_init.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_direct_lighting.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_lamp_emission.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_shadow_blocked.h | 1 |
14 files changed, 196 insertions, 169 deletions
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 392cff9c281..77982ee548a 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian Ray ray; float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + /* emission shader data memory used by various functions */ + ShaderData emission_sd; + ray.P = sd->P + sd->Ng; ray.D = -sd->Ng; ray.t = FLT_MAX; @@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian path_radiance_init(&L_sample, kernel_data.film.use_light_pass); /* init path state */ - path_state_init(kg, &state, &rng, sample, NULL); + path_state_init(kg, &emission_sd, &state, &rng, sample, NULL); /* evaluate surface shader */ float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF); @@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian /* sample ambient occlusion */ if(pass_filter & BAKE_FILTER_AO) { - kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput); + kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput); } /* sample emission */ @@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian kernel_path_subsurface_init_indirect(&ss_indirect); if(kernel_path_subsurface_scatter(kg, sd, + &emission_sd, &L_sample, &state, &rng, @@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian &L_sample, &throughput); kernel_path_indirect(kg, + &emission_sd, &rng, &ray, throughput, @@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian /* sample light and BSDF */ if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) { - kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample); + kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample); if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) { #ifdef __LAMP_MIS__ state.ray_t = 0.0f; #endif /* compute indirect light */ - kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample); + kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample); /* sum and reset indirect light pass variables for the next samples */ path_radiance_sum_indirect(&L_sample); @@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian /* sample ambient occlusion */ if(pass_filter & BAKE_FILTER_AO) { - kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput); + kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput); } /* sample emission */ @@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian /* sample subsurface scattering */ if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ - kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput); + kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput); } #endif @@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian if(kernel_data.integrator.use_direct_light) { int all = kernel_data.integrator.sample_all_lights_direct; kernel_branched_path_surface_connect_light(kg, &rng, - sd, &state, throughput, 1.0f, &L_sample, all); + sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all); } #endif /* indirect light */ kernel_branched_path_surface_indirect_light(kg, &rng, - sd, throughput, 1.0f, &state, &L_sample); + sd, &emission_sd, throughput, 1.0f, &state, &L_sample); } } #endif diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 5cf52f9d176..4de8e0f698a 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN /* Direction Emission */ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, + ShaderData *emission_sd, LightSample *ls, ccl_addr_space PathState *state, float3 I, @@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float time) { /* setup shading at emitter */ -#ifdef __SPLIT_KERNEL__ - ShaderData *sd = kg->sd_input; -#else - ShaderData sd_object; - ShaderData *sd = &sd_object; -#endif float3 eval; #ifdef __BACKGROUND_MIS__ @@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, ray.dP = differential3_zero(); ray.dD = dI; - shader_setup_from_background(kg, sd, &ray); + shader_setup_from_background(kg, emission_sd, &ray); path_state_modify_bounce(state, true); - eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION); + eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION); path_state_modify_bounce(state, false); } else #endif { - shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time); + shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time); - ls->Ng = ccl_fetch(sd, Ng); + ls->Ng = ccl_fetch(emission_sd, Ng); /* no path flag, we're evaluating this for all closures. that's weak but * we'd have to do multiple evaluations otherwise */ path_state_modify_bounce(state, true); - shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION); + shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION); path_state_modify_bounce(state, false); /* evaluate emissive closure */ - if(ccl_fetch(sd, flag) & SD_EMISSION) - eval = shader_emissive_eval(kg, sd); + if(ccl_fetch(emission_sd, flag) & SD_EMISSION) + eval = shader_emissive_eval(kg, emission_sd); else eval = make_float3(0.0f, 0.0f, 0.0f); } @@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, + ShaderData *emission_sd, LightSample *ls, ccl_addr_space PathState *state, Ray *ray, @@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, /* evaluate closure */ float3 light_eval = direct_emissive_eval(kg, + emission_sd, ls, state, -ls->D, @@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader /* Indirect Lamp Emission */ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, + ShaderData *emission_sd, ccl_addr_space PathState *state, Ray *ray, float3 *emission) @@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, #endif float3 L = direct_emissive_eval(kg, + emission_sd, &ls, state, -ray->D, @@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, Ray volume_ray = *ray; volume_ray.t = ls.t; float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f); - kernel_volume_shadow(kg, state, &volume_ray, &volume_tp); + kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp); L *= volume_tp; } #endif @@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, /* Indirect Background */ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, + ShaderData *emission_sd, ccl_addr_space PathState *state, ccl_addr_space Ray *ray) { @@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, /* evaluate background closure */ # ifdef __SPLIT_KERNEL__ Ray priv_ray = *ray; - shader_setup_from_background(kg, kg->sd_input, &priv_ray); - - path_state_modify_bounce(state, true); - float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION); - path_state_modify_bounce(state, false); + shader_setup_from_background(kg, emission_sd, &priv_ray); # else - ShaderData sd; - shader_setup_from_background(kg, &sd, ray); + shader_setup_from_background(kg, emission_sd, ray); +# endif path_state_modify_bounce(state, true); - float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION); + float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION); path_state_modify_bounce(state, false); -# endif #ifdef __BACKGROUND_MIS__ /* check if background light exists or if we should skip pdf */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index c136c85df59..5527d8aa861 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -53,6 +53,7 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_path_indirect(KernelGlobals *kg, + ShaderData *emission_sd, RNG *rng, Ray *ray, float3 throughput, @@ -60,6 +61,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, PathState *state, PathRadiance *L) { + /* shader data memory used for both volumes and surfaces, saves stack space */ + ShaderData sd; + /* path iteration */ for(;;) { /* intersect scene */ @@ -87,7 +91,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, /* intersect with lamp */ float3 emission; - if(indirect_lamp_emission(kg, state, &light_ray, &emission)) { + if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) { path_radiance_accum_emission(L, throughput, emission, @@ -115,15 +119,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, if(decoupled) { /* cache steps along volume for repeated sampling */ VolumeSegment volume_segment; - ShaderData volume_sd; shader_setup_from_volume(kg, - &volume_sd, + &sd, &volume_ray); kernel_volume_decoupled_record(kg, state, &volume_ray, - &volume_sd, + &sd, &volume_segment, heterogeneous); @@ -146,7 +149,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, /* direct light sampling */ kernel_branched_path_volume_connect_light(kg, rng, - &volume_sd, + &sd, + emission_sd, throughput, state, L, @@ -163,7 +167,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, result = kernel_volume_decoupled_scatter(kg, state, &volume_ray, - &volume_sd, + &sd, &throughput, rphase, rscatter, @@ -178,7 +182,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, if(result == VOLUME_PATH_SCATTERED) { if(kernel_path_volume_bounce(kg, rng, - &volume_sd, + &sd, &throughput, state, L, @@ -198,16 +202,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, # endif { /* integrate along volume segment with distance sampling */ - ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous); + kg, state, &sd, &volume_ray, L, &throughput, rng, heterogeneous); # ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ kernel_path_volume_connect_light(kg, rng, - &volume_sd, + &sd, + emission_sd, throughput, state, L); @@ -215,7 +219,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, /* indirect light bounce */ if(kernel_path_volume_bounce(kg, rng, - &volume_sd, + &sd, &throughput, state, L, @@ -235,7 +239,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, if(!hit) { #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, state, ray); + float3 L_background = indirect_background(kg, emission_sd, state, ray); path_radiance_accum_background(L, throughput, L_background, @@ -246,7 +250,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, } /* setup shading */ - ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, @@ -328,7 +331,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, light_ray.dP = sd.dP; light_ray.dD = differential3_zero(); - if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) { path_radiance_accum_ao(L, throughput, ao_alpha, @@ -378,6 +381,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, kernel_branched_path_surface_connect_light(kg, rng, &sd, + emission_sd, state, throughput, 1.0f, @@ -393,6 +397,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, + ShaderData *emission_sd, PathRadiance *L, PathState *state, RNG *rng, @@ -425,7 +430,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, light_ray.dP = ccl_fetch(sd, dP); light_ray.dD = differential3_zero(); - if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce); } } @@ -435,6 +440,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, ccl_device bool kernel_path_subsurface_scatter( KernelGlobals *kg, ShaderData *sd, + ShaderData *emission_sd, PathRadiance *L, PathState *state, RNG *rng, @@ -503,7 +509,7 @@ ccl_device bool kernel_path_subsurface_scatter( hit_L->direct_throughput = L->direct_throughput; path_radiance_copy_indirect(hit_L, L); - kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L); + kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L); if(kernel_path_surface_bounce(kg, rng, @@ -526,6 +532,7 @@ ccl_device bool kernel_path_subsurface_scatter( kernel_volume_stack_update_for_subsurface( kg, + emission_sd, &volume_ray, hit_state->volume_stack); } @@ -604,8 +611,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, path_radiance_init(&L, kernel_data.film.use_light_pass); + /* shader data memory used for both volumes and surfaces, saves stack space */ + ShaderData sd; + /* shader data used by emission, shadows, volume stacks */ + ShaderData emission_sd; + PathState state; - path_state_init(kg, &state, rng, sample, &ray); + path_state_init(kg, &emission_sd, &state, rng, sample, &ray); #ifdef __KERNEL_DEBUG__ DebugData debug_data; @@ -669,7 +681,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, /* intersect with lamp */ float3 emission; - if(indirect_lamp_emission(kg, &state, &light_ray, &emission)) + if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission)) path_radiance_accum_emission(&L, throughput, emission, state.bounce); } #endif @@ -689,11 +701,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, if(decoupled) { /* cache steps along volume for repeated sampling */ VolumeSegment volume_segment; - ShaderData volume_sd; - shader_setup_from_volume(kg, &volume_sd, &volume_ray); + shader_setup_from_volume(kg, &sd, &volume_ray); kernel_volume_decoupled_record(kg, &state, - &volume_ray, &volume_sd, &volume_segment, heterogeneous); + &volume_ray, &sd, &volume_segment, heterogeneous); volume_segment.sampling_method = sampling_method; @@ -708,8 +719,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, int all = false; /* direct light sampling */ - kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, - throughput, &state, &L, all, &volume_ray, &volume_segment); + kernel_branched_path_volume_connect_light(kg, rng, &sd, + &emission_sd, throughput, &state, &L, all, + &volume_ray, &volume_segment); /* indirect sample. if we use distance sampling and take just * one sample for direct and indirect light, we could share @@ -718,7 +730,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE); result = kernel_volume_decoupled_scatter(kg, - &state, &volume_ray, &volume_sd, &throughput, + &state, &volume_ray, &sd, &throughput, rphase, rscatter, &volume_segment, NULL, true); } @@ -726,7 +738,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, kernel_volume_decoupled_free(kg, &volume_segment); if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) + if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray)) continue; else break; @@ -739,17 +751,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, # endif { /* integrate along volume segment with distance sampling */ - ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate( - kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous); + kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous); # ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L); + kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L); /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) + if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray)) continue; else break; @@ -772,7 +783,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, &state, &ray); + float3 L_background = indirect_background(kg, &emission_sd, &state, &ray); path_radiance_accum_background(&L, throughput, L_background, state.bounce); #endif @@ -780,7 +791,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, } /* setup shading */ - ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &ray); float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF); shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN); @@ -848,7 +858,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - kernel_path_ao(kg, &sd, &L, &state, rng, throughput); + kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput); } #endif @@ -858,6 +868,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, if(sd.flag & SD_BSSRDF) { if(kernel_path_subsurface_scatter(kg, &sd, + &emission_sd, &L, &state, rng, @@ -871,7 +882,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #endif /* __SUBSURFACE__ */ /* direct lighting */ - kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L); + kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L); /* compute direct lighting and next bounce */ if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray)) diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 13ae4cf669b..b4dee220aa5 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ -ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput) +ccl_device void kernel_branched_path_ao(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + PathState *state, + RNG *rng, + float3 throughput) { int num_samples = kernel_data.integrator.ao_samples; float num_samples_inv = 1.0f/num_samples; @@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR light_ray.dP = ccl_fetch(sd, dP); light_ray.dD = differential3_zero(); - if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce); } } @@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR /* bounce off surface and integrate indirect light */ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, - RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust, - PathState *state, PathRadiance *L) + RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput, + float num_samples_adjust, PathState *state, PathRadiance *L) { for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { const ShaderClosure *sc = &ccl_fetch(sd, closure)[i]; @@ -106,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba } kernel_path_indirect(kg, + emission_sd, rng, &bsdf_ray, tp*num_samples_inv, @@ -124,6 +131,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba #ifdef __SUBSURFACE__ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, + ShaderData *emission_sd, PathRadiance *L, PathState *state, RNG *rng, @@ -186,6 +194,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, kernel_volume_stack_update_for_subsurface( kg, + emission_sd, &volume_ray, hit_state.volume_stack); } @@ -199,6 +208,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, kg, rng, &bssrdf_sd, + emission_sd, &hit_state, throughput, num_samples_inv, @@ -212,6 +222,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, kg, rng, &bssrdf_sd, + emission_sd, throughput, num_samples_inv, &hit_state, @@ -231,8 +242,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_radiance_init(&L, kernel_data.film.use_light_pass); + /* shader data memory used for both volumes and surfaces, saves stack space */ + ShaderData sd; + /* shader data used by emission, shadows, volume stacks */ + ShaderData emission_sd; + PathState state; - path_state_init(kg, &state, rng, sample, &ray); + path_state_init(kg, &emission_sd, &state, rng, sample, &ray); #ifdef __KERNEL_DEBUG__ DebugData debug_data; @@ -287,11 +303,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* cache steps along volume for repeated sampling */ VolumeSegment volume_segment; - ShaderData volume_sd; - shader_setup_from_volume(kg, &volume_sd, &volume_ray); + shader_setup_from_volume(kg, &sd, &volume_ray); kernel_volume_decoupled_record(kg, &state, - &volume_ray, &volume_sd, &volume_segment, heterogeneous); + &volume_ray, &sd, &volume_segment, heterogeneous); /* direct light sampling */ if(volume_segment.closure_flag & SD_SCATTER) { @@ -299,8 +314,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in int all = kernel_data.integrator.sample_all_lights_direct; - kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, - throughput, &state, &L, all, &volume_ray, &volume_segment); + kernel_branched_path_volume_connect_light(kg, rng, &sd, + &emission_sd, throughput, &state, &L, all, + &volume_ray, &volume_segment); /* indirect light sampling */ int num_samples = kernel_data.integrator.volume_samples; @@ -326,20 +342,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE); VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false); + &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false); (void)result; kernel_assert(result == VOLUME_PATH_SCATTERED); if(kernel_path_volume_bounce(kg, rng, - &volume_sd, + &sd, &tp, &ps, &L, &pray)) { kernel_path_indirect(kg, + &emission_sd, rng, &pray, tp*num_samples_inv, @@ -373,30 +390,30 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in for(int j = 0; j < num_samples; j++) { PathState ps = state; Ray pray = ray; - ShaderData volume_sd; float3 tp = throughput * num_samples_inv; /* branch RNG state */ path_state_branch(&ps, j, num_samples); VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous); + kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous); #ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* todo: support equiangular, MIS and all light sampling. * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L); + kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L); if(kernel_path_volume_bounce(kg, rng, - &volume_sd, + &sd, &tp, &ps, &L, &pray)) { kernel_path_indirect(kg, + &emission_sd, rng, &pray, tp, @@ -414,7 +431,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in } /* todo: avoid this calculation using decoupled ray marching */ - kernel_volume_shadow(kg, &state, &volume_ray, &throughput); + kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput); #endif } #endif @@ -432,7 +449,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, &state, &ray); + float3 L_background = indirect_background(kg, &emission_sd, &state, &ray); path_radiance_accum_background(&L, throughput, L_background, state.bounce); #endif @@ -440,7 +457,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in } /* setup shading */ - ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &ray); shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN); shader_merge_closures(&sd); @@ -499,14 +515,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput); + kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput); } #endif #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object */ if(sd.flag & SD_BSSRDF) { - kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state, + kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state, rng, &ray, throughput); } #endif @@ -519,13 +535,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in if(kernel_data.integrator.use_direct_light) { int all = kernel_data.integrator.sample_all_lights_direct; kernel_branched_path_surface_connect_light(kg, rng, - &sd, &hit_state, throughput, 1.0f, &L, all); + &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all); } #endif /* indirect light */ kernel_branched_path_surface_indirect_light(kg, rng, - &sd, throughput, 1.0f, &hit_state, &L); + &sd, &emission_sd, throughput, 1.0f, &hit_state, &L); /* continue in case of transparency */ throughput *= shader_bsdf_transparency(kg, &sd); diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index ef3765f7d89..e0e35d792ab 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -16,7 +16,12 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray) +ccl_device_inline void path_state_init(KernelGlobals *kg, + ShaderData *stack_sd, + ccl_addr_space PathState *state, + ccl_addr_space RNG *rng, + int sample, + ccl_addr_space Ray *ray) { state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP; @@ -41,7 +46,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathSta if(kernel_data.integrator.use_volumes) { /* initialize volume stack with volume we are inside of */ - kernel_volume_stack_init(kg, ray, state->volume_stack); + kernel_volume_stack_init(kg, stack_sd, ray, state->volume_stack); /* seed RNG for cases where we can't use stratified samples */ state->rng_congruential = lcg_init(*rng + sample*0x51633e2d); } diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 1818c4fd2da..74b1ae0ca32 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -20,7 +20,8 @@ CCL_NAMESPACE_BEGIN /* branched path tracing: connect path directly to position on one or more lights and add it to L */ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, int sample_all_lights) + ShaderData *sd, ShaderData *emission_sd, PathState *state, float3 throughput, + float num_samples_adjust, PathRadiance *L, int sample_all_lights) { #ifdef __EMISSION__ /* sample illumination from lights to find path contribution */ @@ -55,11 +56,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal LightSample ls; lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls); - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); } @@ -87,11 +88,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal LightSample ls; light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); } @@ -109,11 +110,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); /* sample random light */ - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp); } @@ -184,7 +185,8 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng, #ifndef __SPLIT_KERNEL__ /* path tracing: connect path directly to position on a light and add it to L */ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng, - ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L) + ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state, + PathRadiance *L) { #ifdef __EMISSION__ if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) @@ -206,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_ LightSample ls; light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp); } diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index 9eb8b240b88..e45522a4641 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN #ifdef __VOLUME_SCATTER__ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L) + ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L) { #ifdef __EMISSION__ if(!kernel_data.integrator.use_direct_light) @@ -44,11 +44,11 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, if(ls.pdf == 0.0f) return; - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp); } @@ -106,7 +106,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, } ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L, + ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L, bool sample_all_lights, Ray *ray, const VolumeSegment *segment) { #ifdef __EMISSION__ @@ -160,11 +160,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG if(ls.pdf == 0.0f) continue; - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); } @@ -211,11 +211,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG if(ls.pdf == 0.0f) continue; - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); } @@ -251,11 +251,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG return; /* sample random light */ - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp); } diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 504ac2e40bc..c8f6503cf58 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN #define STACK_MAX_HITS 64 -ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) +ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow) { *shadow = make_float3(1.0f, 1.0f, 1.0f); @@ -107,21 +107,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * if(ps.volume_stack[0].shader != SHADER_NONE) { Ray segment_ray = *ray; segment_ray.t = isect->t; - kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); + kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput); } #endif /* setup shader data at surface */ - ShaderData sd; - shader_setup_from_ray(kg, &sd, isect, ray); + shader_setup_from_ray(kg, shadow_sd, isect, ray); /* attenuation from transparent surface */ - if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { + if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { path_state_modify_bounce(state, true); - shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); + shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); path_state_modify_bounce(state, false); - throughput *= shader_bsdf_transparency(kg, &sd); + throughput *= shader_bsdf_transparency(kg, shadow_sd); } /* stop if all light is blocked */ @@ -133,13 +132,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * } /* move ray forward */ - ray->P = sd.P; + ray->P = shadow_sd->P; if(ray->t != FLT_MAX) ray->D = normalize_len(Pend - ray->P, &ray->t); #ifdef __VOLUME__ /* exit/enter volume */ - kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack); + kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack); #endif bounce++; @@ -148,7 +147,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * #ifdef __VOLUME__ /* attenuation for last line segment towards light */ if(ps.volume_stack[0].shader != SHADER_NONE) - kernel_volume_shadow(kg, &ps, ray, &throughput); + kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); #endif *shadow = throughput; @@ -164,7 +163,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * #ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* apply attenuation from current volume shader */ - kernel_volume_shadow(kg, state, ray, shadow); + kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } #endif @@ -184,6 +183,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * * one extra ray cast for the cases were we do want transparency. */ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, + ShaderData *shadow_sd, ccl_addr_space PathState *state, ccl_addr_space Ray *ray_input, float3 *shadow) @@ -228,7 +228,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, #ifdef __VOLUME__ /* attenuation for last line segment towards light */ if(ps.volume_stack[0].shader != SHADER_NONE) - kernel_volume_shadow(kg, &ps, ray, &throughput); + kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); #endif *shadow *= throughput; @@ -244,39 +244,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, if(ps.volume_stack[0].shader != SHADER_NONE) { Ray segment_ray = *ray; segment_ray.t = isect->t; - kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); + kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput); } #endif /* setup shader data at surface */ -#ifdef __SPLIT_KERNEL__ - ShaderData *sd = kg->sd_input; -#else - ShaderData sd_object; - ShaderData *sd = &sd_object; -#endif - shader_setup_from_ray(kg, sd, isect, ray); + shader_setup_from_ray(kg, shadow_sd, isect, ray); /* attenuation from transparent surface */ - if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) { + if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) { path_state_modify_bounce(state, true); - shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); + shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); path_state_modify_bounce(state, false); - throughput *= shader_bsdf_transparency(kg, sd); + throughput *= shader_bsdf_transparency(kg, shadow_sd); } if(is_zero(throughput)) return true; /* move ray forward */ - ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng)); + ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng)); if(ray->t != FLT_MAX) ray->D = normalize_len(Pend - ray->P, &ray->t); #ifdef __VOLUME__ /* exit/enter volume */ - kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack); + kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack); #endif bounce++; @@ -286,7 +280,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, #ifdef __VOLUME__ else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* apply attenuation from current volume shader */ - kernel_volume_shadow(kg, state, ray, shadow); + kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } #endif #endif diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 30a978f6c9e..e1ea60f372e 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -219,15 +219,14 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* get the volume attenuation over line segment defined by ray, with the * assumption that there are no surfaces blocking light between the endpoints */ -ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput) +ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput) { - ShaderData sd; - shader_setup_from_volume(kg, &sd, ray); + shader_setup_from_volume(kg, shadow_sd, ray); if(volume_stack_is_heterogeneous(kg, state->volume_stack)) - kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput); + kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput); else - kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput); + kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput); } /* Equi-angular sampling as in: @@ -1000,6 +999,7 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou * is inside of. */ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, + ShaderData *stack_sd, Ray *ray, VolumeStack *stack) { @@ -1040,28 +1040,27 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, qsort(hits, num_hits, sizeof(Intersection), intersections_compare); for(uint hit = 0; hit < num_hits; ++hit, ++isect) { - ShaderData sd; - shader_setup_from_ray(kg, &sd, isect, &volume_ray); - if(sd.flag & SD_BACKFACING) { + shader_setup_from_ray(kg, stack_sd, isect, &volume_ray); + if(stack_sd->flag & SD_BACKFACING) { bool need_add = true; for(int i = 0; i < enclosed_index && need_add; ++i) { /* If ray exited the volume and never entered to that volume * it means that camera is inside such a volume. */ - if(enclosed_volumes[i] == sd.object) { + if(enclosed_volumes[i] == stack_sd->object) { need_add = false; } } for(int i = 0; i < stack_index && need_add; ++i) { /* Don't add intersections twice. */ - if(stack[i].object == sd.object) { + if(stack[i].object == stack_sd->object) { need_add = false; break; } } if(need_add) { - stack[stack_index].object = sd.object; - stack[stack_index].shader = sd.shader; + stack[stack_index].object = stack_sd->object; + stack[stack_index].shader = stack_sd->shader; ++stack_index; } } @@ -1069,7 +1068,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, /* If ray from camera enters the volume, this volume shouldn't * be added to the stack on exit. */ - enclosed_volumes[enclosed_index++] = sd.object; + enclosed_volumes[enclosed_index++] = stack_sd->object; } } } @@ -1086,9 +1085,8 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, break; } - ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &volume_ray); - if(sd.flag & SD_BACKFACING) { + shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray); + if(stack_sd->flag & SD_BACKFACING) { /* If ray exited the volume and never entered to that volume * it means that camera is inside such a volume. */ @@ -1097,20 +1095,20 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, /* If ray exited the volume and never entered to that volume * it means that camera is inside such a volume. */ - if(enclosed_volumes[i] == sd.object) { + if(enclosed_volumes[i] == stack_sd->object) { need_add = false; } } for(int i = 0; i < stack_index && need_add; ++i) { /* Don't add intersections twice. */ - if(stack[i].object == sd.object) { + if(stack[i].object == stack_sd->object) { need_add = false; break; } } if(need_add) { - stack[stack_index].object = sd.object; - stack[stack_index].shader = sd.shader; + stack[stack_index].object = stack_sd->object; + stack[stack_index].shader = stack_sd->shader; ++stack_index; } } @@ -1118,11 +1116,11 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, /* If ray from camera enters the volume, this volume shouldn't * be added to the stack on exit. */ - enclosed_volumes[enclosed_index++] = sd.object; + enclosed_volumes[enclosed_index++] = stack_sd->object; } /* Move ray forward. */ - volume_ray.P = ray_offset(sd.P, -sd.Ng); + volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); ++step; } #endif @@ -1190,6 +1188,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd #ifdef __SUBSURFACE__ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, + ShaderData *stack_sd, Ray *ray, VolumeStack *stack) { @@ -1210,9 +1209,8 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, qsort(hits, num_hits, sizeof(Intersection), intersections_compare); for(uint hit = 0; hit < num_hits; ++hit, ++isect) { - ShaderData sd; - shader_setup_from_ray(kg, &sd, isect, &volume_ray); - kernel_volume_stack_enter_exit(kg, &sd, stack); + shader_setup_from_ray(kg, stack_sd, isect, &volume_ray); + kernel_volume_stack_enter_exit(kg, stack_sd, stack); } } # else @@ -1224,13 +1222,12 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, &isect, PATH_RAY_ALL_VISIBILITY)) { - ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &volume_ray); - kernel_volume_stack_enter_exit(kg, &sd, stack); + shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray); + kernel_volume_stack_enter_exit(kg, stack_sd, stack); /* Move ray forward. */ - volume_ray.P = ray_offset(sd.P, -sd.Ng); - volume_ray.t -= sd.ray_length; + volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + volume_ray.t -= stack_sd->ray_length; ++step; } # endif diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h index 3d12a3dd993..f42d0a985bb 100644 --- a/intern/cycles/kernel/split/kernel_background_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h @@ -157,7 +157,7 @@ ccl_device char kernel_background_buffer_update( if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, state, ray); + float3 L_background = indirect_background(kg, kg->sd_input, state, ray); path_radiance_accum_background(L, (*throughput), L_background, state->bounce); #endif ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); @@ -226,7 +226,7 @@ ccl_device char kernel_background_buffer_update( *throughput = make_float3(1.0f, 1.0f, 1.0f); *L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, state, rng, sample, ray); + path_state_init(kg, kg->sd_input, state, rng, sample, ray); #ifdef __KERNEL_DEBUG__ debug_data_init(debug_data); #endif diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 9891391a3a3..e3dbc43757e 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -207,6 +207,7 @@ ccl_device void kernel_data_init( L_transparent_coop[ray_index] = 0.0f; path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass); path_state_init(kg, + kg->sd_input, &PathState_coop[ray_index], &rng_coop[ray_index], my_sample, diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index c7a2aa6426c..ebe91097496 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -88,7 +88,7 @@ ccl_device char kernel_direct_lighting( BsdfEval L_light; bool is_lamp; - if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { + if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp)) { /* Write intermediate data to global memory to access from * the next kernel. */ diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index dc3b4b34d4e..3bd0e361078 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -74,7 +74,7 @@ ccl_device void kernel_lamp_emission( /* intersect with lamp */ float3 emission; - if(indirect_lamp_emission(kg, state, &light_ray, &emission)) { + if(indirect_lamp_emission(kg, kg->sd_input, state, &light_ray, &emission)) { path_radiance_accum_emission(L, throughput, emission, state->bounce); } } diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h index 0c989861eef..6153af47f96 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -71,6 +71,7 @@ ccl_device void kernel_shadow_blocked( float3 shadow; update_path_radiance = !(shadow_blocked(kg, + kg->sd_input, state, light_ray_global, &shadow)); |