Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2016-05-22 23:35:47 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2016-05-23 23:29:24 +0300
commit999d5a67852b5958b9361c9888734ebc889e4a22 (patch)
tree5f3c5ad0409c77fc6ae3486420b3888fa1e2fea8 /intern
parentaf4a04eae07184f7437a8c51858a4ddb8a2e3e4c (diff)
Cycles CUDA: reduce stack memory by reusing ShaderData.
57% less for path and 48% less for branched path.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/kernel_bake.h21
-rw-r--r--intern/cycles/kernel/kernel_emission.h41
-rw-r--r--intern/cycles/kernel/kernel_path.h77
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h62
-rw-r--r--intern/cycles/kernel/kernel_path_state.h9
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h22
-rw-r--r--intern/cycles/kernel/kernel_path_volume.h20
-rw-r--r--intern/cycles/kernel/kernel_shadow.h46
-rw-r--r--intern/cycles/kernel/kernel_volume.h57
-rw-r--r--intern/cycles/kernel/split/kernel_background_buffer_update.h4
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h1
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h2
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked.h1
14 files changed, 196 insertions, 169 deletions
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 392cff9c281..77982ee548a 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ /* emission shader data memory used by various functions */
+ ShaderData emission_sd;
+
ray.P = sd->P + sd->Ng;
ray.D = -sd->Ng;
ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
/* init path state */
- path_state_init(kg, &state, &rng, sample, NULL);
+ path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
/* evaluate surface shader */
float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
- kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
}
/* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
kernel_path_subsurface_init_indirect(&ss_indirect);
if(kernel_path_subsurface_scatter(kg,
sd,
+ &emission_sd,
&L_sample,
&state,
&rng,
@@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&L_sample,
&throughput);
kernel_path_indirect(kg,
+ &emission_sd,
&rng,
&ray,
throughput,
@@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample light and BSDF */
if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
- kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
+ kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);
if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__
state.ray_t = 0.0f;
#endif
/* compute indirect light */
- kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
+ kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
/* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample);
@@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
- kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
}
/* sample emission */
@@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample subsurface scattering */
if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
+ kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
}
#endif
@@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, &rng,
- sd, &state, throughput, 1.0f, &L_sample, all);
+ sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, &rng,
- sd, throughput, 1.0f, &state, &L_sample);
+ sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
}
}
#endif
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 5cf52f9d176..4de8e0f698a 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */
ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+ ShaderData *emission_sd,
LightSample *ls,
ccl_addr_space PathState *state,
float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
float time)
{
/* setup shading at emitter */
-#ifdef __SPLIT_KERNEL__
- ShaderData *sd = kg->sd_input;
-#else
- ShaderData sd_object;
- ShaderData *sd = &sd_object;
-#endif
float3 eval;
#ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.dP = differential3_zero();
ray.dD = dI;
- shader_setup_from_background(kg, sd, &ray);
+ shader_setup_from_background(kg, emission_sd, &ray);
path_state_modify_bounce(state, true);
- eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION);
+ eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
}
else
#endif
{
- shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
+ shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
- ls->Ng = ccl_fetch(sd, Ng);
+ ls->Ng = ccl_fetch(emission_sd, Ng);
/* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
+ shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
/* evaluate emissive closure */
- if(ccl_fetch(sd, flag) & SD_EMISSION)
- eval = shader_emissive_eval(kg, sd);
+ if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
+ eval = shader_emissive_eval(kg, emission_sd);
else
eval = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ccl_device_noinline bool direct_emission(KernelGlobals *kg,
ShaderData *sd,
+ ShaderData *emission_sd,
LightSample *ls,
ccl_addr_space PathState *state,
Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
/* evaluate closure */
float3 light_eval = direct_emissive_eval(kg,
+ emission_sd,
ls,
state,
-ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */
ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+ ShaderData *emission_sd,
ccl_addr_space PathState *state,
Ray *ray,
float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
#endif
float3 L = direct_emissive_eval(kg,
+ emission_sd,
&ls,
state,
-ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
Ray volume_ray = *ray;
volume_ray.t = ls.t;
float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
- kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
L *= volume_tp;
}
#endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
/* Indirect Background */
ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+ ShaderData *emission_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray)
{
@@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
/* evaluate background closure */
# ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray;
- shader_setup_from_background(kg, kg->sd_input, &priv_ray);
-
- path_state_modify_bounce(state, true);
- float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
- path_state_modify_bounce(state, false);
+ shader_setup_from_background(kg, emission_sd, &priv_ray);
# else
- ShaderData sd;
- shader_setup_from_background(kg, &sd, ray);
+ shader_setup_from_background(kg, emission_sd, ray);
+# endif
path_state_modify_bounce(state, true);
- float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
+ float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
-# endif
#ifdef __BACKGROUND_MIS__
/* check if background light exists or if we should skip pdf */
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index c136c85df59..5527d8aa861 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -53,6 +53,7 @@
CCL_NAMESPACE_BEGIN
ccl_device void kernel_path_indirect(KernelGlobals *kg,
+ ShaderData *emission_sd,
RNG *rng,
Ray *ray,
float3 throughput,
@@ -60,6 +61,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
PathState *state,
PathRadiance *L)
{
+ /* shader data memory used for both volumes and surfaces, saves stack space */
+ ShaderData sd;
+
/* path iteration */
for(;;) {
/* intersect scene */
@@ -87,7 +91,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* intersect with lamp */
float3 emission;
- if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
+ if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
path_radiance_accum_emission(L,
throughput,
emission,
@@ -115,15 +119,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
- ShaderData volume_sd;
shader_setup_from_volume(kg,
- &volume_sd,
+ &sd,
&volume_ray);
kernel_volume_decoupled_record(kg,
state,
&volume_ray,
- &volume_sd,
+ &sd,
&volume_segment,
heterogeneous);
@@ -146,7 +149,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg,
rng,
- &volume_sd,
+ &sd,
+ emission_sd,
throughput,
state,
L,
@@ -163,7 +167,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
result = kernel_volume_decoupled_scatter(kg,
state,
&volume_ray,
- &volume_sd,
+ &sd,
&throughput,
rphase,
rscatter,
@@ -178,7 +182,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg,
rng,
- &volume_sd,
+ &sd,
&throughput,
state,
L,
@@ -198,16 +202,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# endif
{
/* integrate along volume segment with distance sampling */
- ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
+ kg, state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg,
rng,
- &volume_sd,
+ &sd,
+ emission_sd,
throughput,
state,
L);
@@ -215,7 +219,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* indirect light bounce */
if(kernel_path_volume_bounce(kg,
rng,
- &volume_sd,
+ &sd,
&throughput,
state,
L,
@@ -235,7 +239,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(!hit) {
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, state, ray);
+ float3 L_background = indirect_background(kg, emission_sd, state, ray);
path_radiance_accum_background(L,
throughput,
L_background,
@@ -246,7 +250,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
/* setup shading */
- ShaderData sd;
shader_setup_from_ray(kg,
&sd,
&isect,
@@ -328,7 +331,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
light_ray.dP = sd.dP;
light_ray.dD = differential3_zero();
- if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
path_radiance_accum_ao(L,
throughput,
ao_alpha,
@@ -378,6 +381,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
kernel_branched_path_surface_connect_light(kg,
rng,
&sd,
+ emission_sd,
state,
throughput,
1.0f,
@@ -393,6 +397,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *sd,
+ ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@@ -425,7 +430,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
- if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
}
}
@@ -435,6 +440,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ccl_device bool kernel_path_subsurface_scatter(
KernelGlobals *kg,
ShaderData *sd,
+ ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@@ -503,7 +509,7 @@ ccl_device bool kernel_path_subsurface_scatter(
hit_L->direct_throughput = L->direct_throughput;
path_radiance_copy_indirect(hit_L, L);
- kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
+ kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
if(kernel_path_surface_bounce(kg,
rng,
@@ -526,6 +532,7 @@ ccl_device bool kernel_path_subsurface_scatter(
kernel_volume_stack_update_for_subsurface(
kg,
+ emission_sd,
&volume_ray,
hit_state->volume_stack);
}
@@ -604,8 +611,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
path_radiance_init(&L, kernel_data.film.use_light_pass);
+ /* shader data memory used for both volumes and surfaces, saves stack space */
+ ShaderData sd;
+ /* shader data used by emission, shadows, volume stacks */
+ ShaderData emission_sd;
+
PathState state;
- path_state_init(kg, &state, rng, sample, &ray);
+ path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
@@ -669,7 +681,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
/* intersect with lamp */
float3 emission;
- if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
+ if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
path_radiance_accum_emission(&L, throughput, emission, state.bounce);
}
#endif
@@ -689,11 +701,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
- ShaderData volume_sd;
- shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+ shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state,
- &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+ &volume_ray, &sd, &volume_segment, heterogeneous);
volume_segment.sampling_method = sampling_method;
@@ -708,8 +719,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
int all = false;
/* direct light sampling */
- kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
- throughput, &state, &L, all, &volume_ray, &volume_segment);
+ kernel_branched_path_volume_connect_light(kg, rng, &sd,
+ &emission_sd, throughput, &state, &L, all,
+ &volume_ray, &volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
@@ -718,7 +730,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
- &state, &volume_ray, &volume_sd, &throughput,
+ &state, &volume_ray, &sd, &throughput,
rphase, rscatter, &volume_segment, NULL, true);
}
@@ -726,7 +738,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+ if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
continue;
else
break;
@@ -739,17 +751,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
# endif
{
/* integrate along volume segment with distance sampling */
- ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
+ kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
+ kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
/* indirect light bounce */
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+ if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
continue;
else
break;
@@ -772,7 +783,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, &state, &ray);
+ float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@@ -780,7 +791,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
}
/* setup shading */
- ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
@@ -848,7 +858,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
+ kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
}
#endif
@@ -858,6 +868,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(sd.flag & SD_BSSRDF) {
if(kernel_path_subsurface_scatter(kg,
&sd,
+ &emission_sd,
&L,
&state,
rng,
@@ -871,7 +882,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#endif /* __SUBSURFACE__ */
/* direct lighting */
- kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
+ kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
/* compute direct lighting and next bounce */
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 13ae4cf669b..b4dee220aa5 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ float3 throughput)
{
int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples;
@@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
- if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
}
}
@@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
- RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
- PathState *state, PathRadiance *L)
+ RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput,
+ float num_samples_adjust, PathState *state, PathRadiance *L)
{
for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
@@ -106,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
}
kernel_path_indirect(kg,
+ emission_sd,
rng,
&bsdf_ray,
tp*num_samples_inv,
@@ -124,6 +131,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
#ifdef __SUBSURFACE__
ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
ShaderData *sd,
+ ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@@ -186,6 +194,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kernel_volume_stack_update_for_subsurface(
kg,
+ emission_sd,
&volume_ray,
hit_state.volume_stack);
}
@@ -199,6 +208,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg,
rng,
&bssrdf_sd,
+ emission_sd,
&hit_state,
throughput,
num_samples_inv,
@@ -212,6 +222,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg,
rng,
&bssrdf_sd,
+ emission_sd,
throughput,
num_samples_inv,
&hit_state,
@@ -231,8 +242,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_init(&L, kernel_data.film.use_light_pass);
+ /* shader data memory used for both volumes and surfaces, saves stack space */
+ ShaderData sd;
+ /* shader data used by emission, shadows, volume stacks */
+ ShaderData emission_sd;
+
PathState state;
- path_state_init(kg, &state, rng, sample, &ray);
+ path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
@@ -287,11 +303,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
- ShaderData volume_sd;
- shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+ shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state,
- &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+ &volume_ray, &sd, &volume_segment, heterogeneous);
/* direct light sampling */
if(volume_segment.closure_flag & SD_SCATTER) {
@@ -299,8 +314,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
int all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
- throughput, &state, &L, all, &volume_ray, &volume_segment);
+ kernel_branched_path_volume_connect_light(kg, rng, &sd,
+ &emission_sd, throughput, &state, &L, all,
+ &volume_ray, &volume_segment);
/* indirect light sampling */
int num_samples = kernel_data.integrator.volume_samples;
@@ -326,20 +342,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+ &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
(void)result;
kernel_assert(result == VOLUME_PATH_SCATTERED);
if(kernel_path_volume_bounce(kg,
rng,
- &volume_sd,
+ &sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
+ &emission_sd,
rng,
&pray,
tp*num_samples_inv,
@@ -373,30 +390,30 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
for(int j = 0; j < num_samples; j++) {
PathState ps = state;
Ray pray = ray;
- ShaderData volume_sd;
float3 tp = throughput * num_samples_inv;
/* branch RNG state */
path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+ kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* todo: support equiangular, MIS and all light sampling.
* alternatively get decoupled ray marching working on the GPU */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
+ kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);
if(kernel_path_volume_bounce(kg,
rng,
- &volume_sd,
+ &sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
+ &emission_sd,
rng,
&pray,
tp,
@@ -414,7 +431,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
}
/* todo: avoid this calculation using decoupled ray marching */
- kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+ kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
#endif
}
#endif
@@ -432,7 +449,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, &state, &ray);
+ float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@@ -440,7 +457,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
}
/* setup shading */
- ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
shader_merge_closures(&sd);
@@ -499,14 +515,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
+ kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
}
#endif
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */
if(sd.flag & SD_BSSRDF) {
- kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
+ kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state,
rng, &ray, throughput);
}
#endif
@@ -519,13 +535,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, rng,
- &sd, &hit_state, throughput, 1.0f, &L, all);
+ &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng,
- &sd, throughput, 1.0f, &hit_state, &L);
+ &sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
/* continue in case of transparency */
throughput *= shader_bsdf_transparency(kg, &sd);
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index ef3765f7d89..e0e35d792ab 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -16,7 +16,12 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray)
+ccl_device_inline void path_state_init(KernelGlobals *kg,
+ ShaderData *stack_sd,
+ ccl_addr_space PathState *state,
+ ccl_addr_space RNG *rng,
+ int sample,
+ ccl_addr_space Ray *ray)
{
state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;
@@ -41,7 +46,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathSta
if(kernel_data.integrator.use_volumes) {
/* initialize volume stack with volume we are inside of */
- kernel_volume_stack_init(kg, ray, state->volume_stack);
+ kernel_volume_stack_init(kg, stack_sd, ray, state->volume_stack);
/* seed RNG for cases where we can't use stratified samples */
state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
}
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 1818c4fd2da..74b1ae0ca32 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -20,7 +20,8 @@ CCL_NAMESPACE_BEGIN
/* branched path tracing: connect path directly to position on one or more lights and add it to L */
ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, int sample_all_lights)
+ ShaderData *sd, ShaderData *emission_sd, PathState *state, float3 throughput,
+ float num_samples_adjust, PathRadiance *L, int sample_all_lights)
{
#ifdef __EMISSION__
/* sample illumination from lights to find path contribution */
@@ -55,11 +56,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
LightSample ls;
lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@@ -87,11 +88,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@@ -109,11 +110,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
/* sample random light */
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
}
@@ -184,7 +185,8 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
#ifndef __SPLIT_KERNEL__
/* path tracing: connect path directly to position on a light and add it to L */
ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
- ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L)
+ ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
+ PathRadiance *L)
{
#ifdef __EMISSION__
if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
@@ -206,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index 9eb8b240b88..e45522a4641 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
#ifdef __VOLUME_SCATTER__
ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
+ ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
{
#ifdef __EMISSION__
if(!kernel_data.integrator.use_direct_light)
@@ -44,11 +44,11 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
if(ls.pdf == 0.0f)
return;
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}
@@ -106,7 +106,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
}
ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
+ ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
{
#ifdef __EMISSION__
@@ -160,11 +160,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
if(ls.pdf == 0.0f)
continue;
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@@ -211,11 +211,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
if(ls.pdf == 0.0f)
continue;
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@@ -251,11 +251,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
return;
/* sample random light */
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 504ac2e40bc..c8f6503cf58 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN
#define STACK_MAX_HITS 64
-ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
@@ -107,21 +107,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect->t;
- kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, isect, ray);
+ shader_setup_from_ray(kg, shadow_sd, isect, ray);
/* attenuation from transparent surface */
- if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+ if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+ shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false);
- throughput *= shader_bsdf_transparency(kg, &sd);
+ throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
/* stop if all light is blocked */
@@ -133,13 +132,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
}
/* move ray forward */
- ray->P = sd.P;
+ ray->P = shadow_sd->P;
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
- kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
+ kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif
bounce++;
@@ -148,7 +147,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
- kernel_volume_shadow(kg, &ps, ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif
*shadow = throughput;
@@ -164,7 +163,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
- kernel_volume_shadow(kg, state, ray, shadow);
+ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
}
#endif
@@ -184,6 +183,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
* one extra ray cast for the cases were we do want transparency. */
ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
+ ShaderData *shadow_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray_input,
float3 *shadow)
@@ -228,7 +228,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
- kernel_volume_shadow(kg, &ps, ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif
*shadow *= throughput;
@@ -244,39 +244,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect->t;
- kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
-#ifdef __SPLIT_KERNEL__
- ShaderData *sd = kg->sd_input;
-#else
- ShaderData sd_object;
- ShaderData *sd = &sd_object;
-#endif
- shader_setup_from_ray(kg, sd, isect, ray);
+ shader_setup_from_ray(kg, shadow_sd, isect, ray);
/* attenuation from transparent surface */
- if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) {
+ if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+ shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false);
- throughput *= shader_bsdf_transparency(kg, sd);
+ throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
if(is_zero(throughput))
return true;
/* move ray forward */
- ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
+ ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
- kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
+ kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif
bounce++;
@@ -286,7 +280,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__
else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
- kernel_volume_shadow(kg, state, ray, shadow);
+ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
}
#endif
#endif
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 30a978f6c9e..e1ea60f372e 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -219,15 +219,14 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* get the volume attenuation over line segment defined by ray, with the
* assumption that there are no surfaces blocking light between the endpoints */
-ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
+ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
{
- ShaderData sd;
- shader_setup_from_volume(kg, &sd, ray);
+ shader_setup_from_volume(kg, shadow_sd, ray);
if(volume_stack_is_heterogeneous(kg, state->volume_stack))
- kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
+ kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
else
- kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
+ kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
}
/* Equi-angular sampling as in:
@@ -1000,6 +999,7 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
* is inside of. */
ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
+ ShaderData *stack_sd,
Ray *ray,
VolumeStack *stack)
{
@@ -1040,28 +1040,27 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, isect, &volume_ray);
- if(sd.flag & SD_BACKFACING) {
+ shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+ if(stack_sd->flag & SD_BACKFACING) {
bool need_add = true;
for(int i = 0; i < enclosed_index && need_add; ++i) {
/* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume.
*/
- if(enclosed_volumes[i] == sd.object) {
+ if(enclosed_volumes[i] == stack_sd->object) {
need_add = false;
}
}
for(int i = 0; i < stack_index && need_add; ++i) {
/* Don't add intersections twice. */
- if(stack[i].object == sd.object) {
+ if(stack[i].object == stack_sd->object) {
need_add = false;
break;
}
}
if(need_add) {
- stack[stack_index].object = sd.object;
- stack[stack_index].shader = sd.shader;
+ stack[stack_index].object = stack_sd->object;
+ stack[stack_index].shader = stack_sd->shader;
++stack_index;
}
}
@@ -1069,7 +1068,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray from camera enters the volume, this volume shouldn't
* be added to the stack on exit.
*/
- enclosed_volumes[enclosed_index++] = sd.object;
+ enclosed_volumes[enclosed_index++] = stack_sd->object;
}
}
}
@@ -1086,9 +1085,8 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
break;
}
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
- if(sd.flag & SD_BACKFACING) {
+ shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+ if(stack_sd->flag & SD_BACKFACING) {
/* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume.
*/
@@ -1097,20 +1095,20 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume.
*/
- if(enclosed_volumes[i] == sd.object) {
+ if(enclosed_volumes[i] == stack_sd->object) {
need_add = false;
}
}
for(int i = 0; i < stack_index && need_add; ++i) {
/* Don't add intersections twice. */
- if(stack[i].object == sd.object) {
+ if(stack[i].object == stack_sd->object) {
need_add = false;
break;
}
}
if(need_add) {
- stack[stack_index].object = sd.object;
- stack[stack_index].shader = sd.shader;
+ stack[stack_index].object = stack_sd->object;
+ stack[stack_index].shader = stack_sd->shader;
++stack_index;
}
}
@@ -1118,11 +1116,11 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray from camera enters the volume, this volume shouldn't
* be added to the stack on exit.
*/
- enclosed_volumes[enclosed_index++] = sd.object;
+ enclosed_volumes[enclosed_index++] = stack_sd->object;
}
/* Move ray forward. */
- volume_ray.P = ray_offset(sd.P, -sd.Ng);
+ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
++step;
}
#endif
@@ -1190,6 +1188,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
#ifdef __SUBSURFACE__
ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
+ ShaderData *stack_sd,
Ray *ray,
VolumeStack *stack)
{
@@ -1210,9 +1209,8 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, isect, &volume_ray);
- kernel_volume_stack_enter_exit(kg, &sd, stack);
+ shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+ kernel_volume_stack_enter_exit(kg, stack_sd, stack);
}
}
# else
@@ -1224,13 +1222,12 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
&isect,
PATH_RAY_ALL_VISIBILITY))
{
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
- kernel_volume_stack_enter_exit(kg, &sd, stack);
+ shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+ kernel_volume_stack_enter_exit(kg, stack_sd, stack);
/* Move ray forward. */
- volume_ray.P = ray_offset(sd.P, -sd.Ng);
- volume_ray.t -= sd.ray_length;
+ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+ volume_ray.t -= stack_sd->ray_length;
++step;
}
# endif
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index 3d12a3dd993..f42d0a985bb 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -157,7 +157,7 @@ ccl_device char kernel_background_buffer_update(
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, state, ray);
+ float3 L_background = indirect_background(kg, kg->sd_input, state, ray);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
@@ -226,7 +226,7 @@ ccl_device char kernel_background_buffer_update(
*throughput = make_float3(1.0f, 1.0f, 1.0f);
*L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass);
- path_state_init(kg, state, rng, sample, ray);
+ path_state_init(kg, kg->sd_input, state, rng, sample, ray);
#ifdef __KERNEL_DEBUG__
debug_data_init(debug_data);
#endif
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 9891391a3a3..e3dbc43757e 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -207,6 +207,7 @@ ccl_device void kernel_data_init(
L_transparent_coop[ray_index] = 0.0f;
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg,
+ kg->sd_input,
&PathState_coop[ray_index],
&rng_coop[ray_index],
my_sample,
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index c7a2aa6426c..ebe91097496 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -88,7 +88,7 @@ ccl_device char kernel_direct_lighting(
BsdfEval L_light;
bool is_lamp;
- if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+ if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* Write intermediate data to global memory to access from
* the next kernel.
*/
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index dc3b4b34d4e..3bd0e361078 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -74,7 +74,7 @@ ccl_device void kernel_lamp_emission(
/* intersect with lamp */
float3 emission;
- if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
+ if(indirect_lamp_emission(kg, kg->sd_input, state, &light_ray, &emission)) {
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
}
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h
index 0c989861eef..6153af47f96 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h
@@ -71,6 +71,7 @@ ccl_device void kernel_shadow_blocked(
float3 shadow;
update_path_radiance = !(shadow_blocked(kg,
+ kg->sd_input,
state,
light_ray_global,
&shadow));