diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-11-22 13:00:29 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-11-25 11:01:22 +0300 |
commit | 8bca34fe326d10cc2f20df7fa541179e9ba835d2 (patch) | |
tree | aeab22e5e0ec3d4ee1a5fe8c37daee0be4a89bee /intern/cycles/kernel/kernel_path.h | |
parent | e6fff424dbcd02c3fed25036a7feb7f59d427843 (diff) |
Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing
old logic of having separate arrays for just intersections and shader data and
encapsulates all the data needed for SSS evaluation.
This giver a huge stack memory saving on GPU. In own experiments it gave 25%
memory usage reduction on GTX560Ti (722MB vs. 946MB).
Unfortunately, this gave some performance loss of 20% which only happens on GPU.
This is perhaps due to different memory access pattern. Will be solved in the
future, hopefully.
Famous saying: won in memory - lost in time (which is also valid in other way
around).
Diffstat (limited to 'intern/cycles/kernel/kernel_path.h')
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 27 |
1 files changed, 22 insertions, 5 deletions
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 9794ad1d180..87d36efa4d4 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -338,10 +338,16 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd if(sc) { uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); - ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; + SubsurfaceIntersection ss_isect; float bssrdf_u, bssrdf_v; path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); + int num_hits = subsurface_scatter_multi_intersect(kg, + &ss_isect, + sd, + sc, + &lcg_state, + bssrdf_u, bssrdf_v, + false); #ifdef __VOLUME__ Ray volume_ray = *ray; bool need_update_volume_stack = kernel_data.integrator.use_volumes && @@ -350,15 +356,26 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd /* compute lighting with the BSDF closure */ for(int hit = 0; hit < num_hits; hit++) { + /* NOTE: We reuse the existing ShaderData, we assume the path + * integration loop stops when this function returns true. + */ + subsurface_scatter_multi_setup(kg, + &ss_isect, + hit, + sd, + state->flag, + sc, + false); + float3 tp = *throughput; PathState hit_state = *state; Ray hit_ray = *ray; hit_state.rng_offset += PRNG_BOUNCE_NUM; - - kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L); - if(kernel_path_surface_bounce(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) { + kernel_path_surface_connect_light(kg, rng, sd, tp, state, L); + + if(kernel_path_surface_bounce(kg, rng, sd, &tp, &hit_state, L, &hit_ray)) { #ifdef __LAMP_MIS__ hit_state.ray_t = 0.0f; #endif |