diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-11-28 17:30:35 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-11-28 18:07:34 +0300 |
commit | 1e43f0d74216cc936e6a708be321ba2c05b66ca1 (patch) | |
tree | 203cfcc239df6543980580c12c70773073e37f0d /intern | |
parent | 8919ed3a62137259f5c94d7ebd7cfdce452371b8 (diff) |
Cycles: Set of fixes for delayed SSS ray tracing
There were multiple issues which are solved now:
- It was possible that ray wouldn't be bounced off the BSSRDF, for example
when PDF or shader eval is zero. In this case PathState might have been
left in pre-bounced state which would have been gave incorrect shading
results.
This is solved by having separate PathState for each of the hits.
- Path radiance summing wasn't happening correct as well, indirect rays
were using wrong path radiance in the case when there were more than
one hit recorded.
This is now using a bit trickier state machine which calculates path
radiance for just SSS (both direct and indirect) and then sums it back
to the final radiance.
- Previous commit wasn't totally correct either and was an induced bug
due to wrong path state left from the "un-happened" ray bounce.
There should be no special case happening here, BSSRDFs will be replaced
with diffuse ones due to PATH_RAY_DIFFUSE_ANCESTOR flag.
- Merged back codebases for "delayed" and "immediate" indirect SSS ray
tracing, hopefully making it easier to maintain the codebase.
Sure this changes brings memory usage back by about 4-5%, but overall
it's still about 2x memory reduction for the experimental kernel here.
Thanks Brecht for the review!
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 115 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 6 |
3 files changed, 55 insertions, 74 deletions
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 57cbf0b63db..a04e759f6eb 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -65,6 +65,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) { /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ SubsurfaceIndirectRays ss_indirect; + ss_indirect.tracing = false; ss_indirect.num_rays = 0; if(kernel_path_subsurface_scatter(kg, sd, @@ -75,14 +76,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian &throughput, &ss_indirect)) { -# ifdef __SUBSURFACE_DELAYED_INDIRECT__ while(ss_indirect.num_rays) { kernel_path_subsurface_setup_indirect(kg, &ss_indirect, - &L_sample, - &state, &ray, + &state, &ray, + &L_sample, &throughput); kernel_path_indirect(kg, &rng, @@ -91,8 +91,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian state.num_samples, &state, &L_sample); + kernel_path_subsurface_accum_indirect(&ss_indirect, &L_sample); } -# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */ is_sss_sample = true; } } diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 850bfb22b3c..721e0fc32d5 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -448,21 +448,13 @@ ccl_device bool kernel_path_subsurface_scatter( /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { - uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); + /* We should never have two consecutive BSSRDF bounces, + * the second one should be converted to a diffuse BSDF to + * avoid this. + */ + kernel_assert(!ss_indirect->tracing); - /* If indirect ray hits BSSRDF we replace it with diffuse BSDF. */ - if(ss_indirect->num_rays) { - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - subsurface_scatter_step(kg, - sd, - state->flag, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - false); - return false; - } + uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); SubsurfaceIntersection ss_isect; float bssrdf_u, bssrdf_v; @@ -493,9 +485,10 @@ ccl_device bool kernel_path_subsurface_scatter( sc, false); - PathState *hit_state = &ss_indirect->state; + PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays]; Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays]; float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays]; + PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays]; *hit_state = *state; *hit_ray = *ray; @@ -503,51 +496,25 @@ ccl_device bool kernel_path_subsurface_scatter( hit_state->rng_offset += PRNG_BOUNCE_NUM; - kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, L); + path_radiance_init(hit_L, kernel_data.film.use_light_pass); + kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L); if(kernel_path_surface_bounce(kg, rng, sd, hit_tp, hit_state, - L, + hit_L, hit_ray)) { #ifdef __LAMP_MIS__ hit_state->ray_t = 0.0f; #endif -#ifdef __SUBSURFACE_DELAYED_INDIRECT__ ss_indirect->num_rays++; -#else -# ifdef __VOLUME__ - if(ss_indirect->need_update_volume_stack) { - Ray volume_ray = *ray; - - /* Setup ray from previous surface point to the new one. */ - volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, - &volume_ray.t); - - kernel_volume_stack_update_for_subsurface(kg, - &volume_ray, - hit_state->volume_stack); - } -# endif /* __VOLUME__ */ - - kernel_path_indirect(kg, - rng, - hit_ray, - *hit_tp, - hit_state->num_samples, - hit_state, - L); - - /* For render passes, sum and reset indirect light pass variables - * for the next samples. - */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); -#endif + } + else { + path_radiance_accum_sample(L, hit_L, 1); } } return true; @@ -555,23 +522,38 @@ ccl_device bool kernel_path_subsurface_scatter( return false; } -#ifdef __SUBSURFACE_DELAYED_INDIRECT__ +ccl_device void kernel_path_subsurface_accum_indirect( + SubsurfaceIndirectRays *ss_indirect, + PathRadiance *L) +{ + if(ss_indirect->tracing) { + path_radiance_sum_indirect(L); + path_radiance_accum_sample(&ss_indirect->direct_L, L, 1); + if(ss_indirect->num_rays == 0) { + *L = ss_indirect->direct_L; + } + } +} + ccl_device void kernel_path_subsurface_setup_indirect( KernelGlobals *kg, SubsurfaceIndirectRays *ss_indirect, - PathRadiance *L, + const Ray *orig_ray, PathState *state, - Ray *orig_ray, Ray *ray, + PathRadiance *L, float3 *throughput) { + if(!ss_indirect->tracing) { + ss_indirect->direct_L = *L; + } + ss_indirect->tracing = true; + /* Setup state, ray and throughput for indirect SSS rays. */ ss_indirect->num_rays--; Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays]; - - *state = ss_indirect->state; - *throughput = ss_indirect->throughputs[ss_indirect->num_rays]; + PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays]; #ifdef __VOLUME__ if(ss_indirect->need_update_volume_stack) { @@ -587,17 +569,15 @@ ccl_device void kernel_path_subsurface_setup_indirect( } #endif /* __VOLUME__ */ + *state = ss_indirect->state[ss_indirect->num_rays]; *ray = *indirect_ray; + *L = *indirect_L; + *throughput = ss_indirect->throughputs[ss_indirect->num_rays]; - /* For render passes, sum and reset indirect light pass variables - * for the next samples. - */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); + state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM; } -#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */ -#endif +#endif /* __SUBSURFACE__ */ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer) { @@ -618,9 +598,9 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, #ifdef __SUBSURFACE__ SubsurfaceIndirectRays ss_indirect; + ss_indirect.tracing = false; ss_indirect.num_rays = 0; -# ifdef __SUBSURFACE_DELAYED_INDIRECT__ /* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter * ray by storing an updated version of state in the ss_indirect which will * be updated to the new volume stack. @@ -628,7 +608,6 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ss_orig_ray; for(;;) { -# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */ #endif /* path iteration */ @@ -877,9 +856,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, &throughput, &ss_indirect)) { -# ifdef __SUBSURFACE_DELAYED_INDIRECT__ ss_orig_ray = ray; -# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */ break; } } @@ -893,24 +870,26 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, break; } -#ifdef __SUBSURFACE_DELAYED_INDIRECT__ +#ifdef __SUBSURFACE__ + kernel_path_subsurface_accum_indirect(&ss_indirect, &L); + /* Trace indirect subsurface rays by restarting the loop. this uses less * stack memory than invoking kernel_path_indirect. */ if(ss_indirect.num_rays) { kernel_path_subsurface_setup_indirect(kg, &ss_indirect, - &L, - &state, &ss_orig_ray, + &state, &ray, + &L, &throughput); } else { break; } } -#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */ +#endif /* __SUBSURFACE__ */ float3 L_sum = path_radiance_clamp_and_sum(kg, &L); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index b9869886a8f..017126d05e3 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -87,7 +87,6 @@ CCL_NAMESPACE_BEGIN /* Experimental on GPU */ #ifdef __KERNEL_EXPERIMENTAL__ #define __SUBSURFACE__ -#define __SUBSURFACE_DELAYED_INDIRECT__ #define __CMJ__ #endif @@ -770,11 +769,14 @@ struct SubsurfaceIntersection struct SubsurfaceIndirectRays { bool need_update_volume_stack; - PathState state; + bool tracing; + PathState state[BSSRDF_MAX_HITS]; + PathRadiance direct_L; int num_rays; Ray rays[BSSRDF_MAX_HITS]; float3 throughputs[BSSRDF_MAX_HITS]; + PathRadiance L[BSSRDF_MAX_HITS]; }; /* Constant Kernel Data |