Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2016-05-22 23:35:47 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2016-05-23 23:29:24 +0300
commit999d5a67852b5958b9361c9888734ebc889e4a22 (patch)
tree5f3c5ad0409c77fc6ae3486420b3888fa1e2fea8 /intern/cycles/kernel/kernel_shadow.h
parentaf4a04eae07184f7437a8c51858a4ddb8a2e3e4c (diff)
Cycles CUDA: reduce stack memory by reusing ShaderData.
57% less for path and 48% less for branched path.
Diffstat (limited to 'intern/cycles/kernel/kernel_shadow.h')
-rw-r--r--intern/cycles/kernel/kernel_shadow.h46
1 files changed, 20 insertions, 26 deletions
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 504ac2e40bc..c8f6503cf58 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN
#define STACK_MAX_HITS 64
-ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
@@ -107,21 +107,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect->t;
- kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, isect, ray);
+ shader_setup_from_ray(kg, shadow_sd, isect, ray);
/* attenuation from transparent surface */
- if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+ if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+ shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false);
- throughput *= shader_bsdf_transparency(kg, &sd);
+ throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
/* stop if all light is blocked */
@@ -133,13 +132,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
}
/* move ray forward */
- ray->P = sd.P;
+ ray->P = shadow_sd->P;
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
- kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
+ kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif
bounce++;
@@ -148,7 +147,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
- kernel_volume_shadow(kg, &ps, ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif
*shadow = throughput;
@@ -164,7 +163,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
- kernel_volume_shadow(kg, state, ray, shadow);
+ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
}
#endif
@@ -184,6 +183,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
* one extra ray cast for the cases were we do want transparency. */
ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
+ ShaderData *shadow_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray_input,
float3 *shadow)
@@ -228,7 +228,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
- kernel_volume_shadow(kg, &ps, ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif
*shadow *= throughput;
@@ -244,39 +244,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect->t;
- kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+ kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
-#ifdef __SPLIT_KERNEL__
- ShaderData *sd = kg->sd_input;
-#else
- ShaderData sd_object;
- ShaderData *sd = &sd_object;
-#endif
- shader_setup_from_ray(kg, sd, isect, ray);
+ shader_setup_from_ray(kg, shadow_sd, isect, ray);
/* attenuation from transparent surface */
- if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) {
+ if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+ shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false);
- throughput *= shader_bsdf_transparency(kg, sd);
+ throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
if(is_zero(throughput))
return true;
/* move ray forward */
- ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
+ ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
- kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
+ kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif
bounce++;
@@ -286,7 +280,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__
else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
- kernel_volume_shadow(kg, state, ray, shadow);
+ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
}
#endif
#endif