Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2016-05-22 23:35:47 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2016-05-23 23:29:24 +0300
commit999d5a67852b5958b9361c9888734ebc889e4a22 (patch)
tree5f3c5ad0409c77fc6ae3486420b3888fa1e2fea8 /intern/cycles/kernel/kernel_path_branched.h
parentaf4a04eae07184f7437a8c51858a4ddb8a2e3e4c (diff)
Cycles CUDA: reduce stack memory by reusing ShaderData.
57% less for path and 48% less for branched path.
Diffstat (limited to 'intern/cycles/kernel/kernel_path_branched.h')
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h62
1 files changed, 39 insertions, 23 deletions
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 13ae4cf669b..b4dee220aa5 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ float3 throughput)
{
int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples;
@@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
- if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
}
}
@@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
- RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
- PathState *state, PathRadiance *L)
+ RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput,
+ float num_samples_adjust, PathState *state, PathRadiance *L)
{
for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
@@ -106,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
}
kernel_path_indirect(kg,
+ emission_sd,
rng,
&bsdf_ray,
tp*num_samples_inv,
@@ -124,6 +131,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
#ifdef __SUBSURFACE__
ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
ShaderData *sd,
+ ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@@ -186,6 +194,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kernel_volume_stack_update_for_subsurface(
kg,
+ emission_sd,
&volume_ray,
hit_state.volume_stack);
}
@@ -199,6 +208,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg,
rng,
&bssrdf_sd,
+ emission_sd,
&hit_state,
throughput,
num_samples_inv,
@@ -212,6 +222,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg,
rng,
&bssrdf_sd,
+ emission_sd,
throughput,
num_samples_inv,
&hit_state,
@@ -231,8 +242,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_init(&L, kernel_data.film.use_light_pass);
+ /* shader data memory used for both volumes and surfaces, saves stack space */
+ ShaderData sd;
+ /* shader data used by emission, shadows, volume stacks */
+ ShaderData emission_sd;
+
PathState state;
- path_state_init(kg, &state, rng, sample, &ray);
+ path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
@@ -287,11 +303,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
- ShaderData volume_sd;
- shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+ shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state,
- &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+ &volume_ray, &sd, &volume_segment, heterogeneous);
/* direct light sampling */
if(volume_segment.closure_flag & SD_SCATTER) {
@@ -299,8 +314,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
int all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
- throughput, &state, &L, all, &volume_ray, &volume_segment);
+ kernel_branched_path_volume_connect_light(kg, rng, &sd,
+ &emission_sd, throughput, &state, &L, all,
+ &volume_ray, &volume_segment);
/* indirect light sampling */
int num_samples = kernel_data.integrator.volume_samples;
@@ -326,20 +342,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+ &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
(void)result;
kernel_assert(result == VOLUME_PATH_SCATTERED);
if(kernel_path_volume_bounce(kg,
rng,
- &volume_sd,
+ &sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
+ &emission_sd,
rng,
&pray,
tp*num_samples_inv,
@@ -373,30 +390,30 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
for(int j = 0; j < num_samples; j++) {
PathState ps = state;
Ray pray = ray;
- ShaderData volume_sd;
float3 tp = throughput * num_samples_inv;
/* branch RNG state */
path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+ kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* todo: support equiangular, MIS and all light sampling.
* alternatively get decoupled ray marching working on the GPU */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
+ kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);
if(kernel_path_volume_bounce(kg,
rng,
- &volume_sd,
+ &sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
+ &emission_sd,
rng,
&pray,
tp,
@@ -414,7 +431,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
}
/* todo: avoid this calculation using decoupled ray marching */
- kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+ kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
#endif
}
#endif
@@ -432,7 +449,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, &state, &ray);
+ float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@@ -440,7 +457,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
}
/* setup shading */
- ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
shader_merge_closures(&sd);
@@ -499,14 +515,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
+ kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
}
#endif
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */
if(sd.flag & SD_BSSRDF) {
- kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
+ kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state,
rng, &ray, throughput);
}
#endif
@@ -519,13 +535,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, rng,
- &sd, &hit_state, throughput, 1.0f, &L, all);
+ &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng,
- &sd, throughput, 1.0f, &hit_state, &L);
+ &sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
/* continue in case of transparency */
throughput *= shader_bsdf_transparency(kg, &sd);