From 31a025f51e635febb85c47c05d185a716687e898 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Mon, 30 Jan 2017 12:08:20 +0100 Subject: Cycles: Split shadow functions to avoid some duplicated calculations --- intern/cycles/kernel/kernel_shadow.h | 468 ++++++++++++++++++++--------------- 1 file changed, 267 insertions(+), 201 deletions(-) diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index e69eac6ab83..95b34b5b2d9 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -19,12 +19,13 @@ CCL_NAMESPACE_BEGIN /* Attenuate throughput accordingly to the given intersection event. * Returns true if the throughput is zero and traversal can be aborted. */ -ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg, - ShaderData *shadow_sd, - PathState *state, - Intersection *isect, - Ray *ray, - float3 *throughput) +ccl_device_forceinline bool shadow_handle_transparent_isect( + KernelGlobals *kg, + ShaderData *shadow_sd, + PathState *state, + Intersection *isect, + Ray *ray, + float3 *throughput) { #ifdef __VOLUME__ /* Attenuation between last surface and next surface. */ @@ -60,7 +61,31 @@ ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg, return false; } -#ifdef __SHADOW_RECORD_ALL__ +/* Special version which only handles opaque shadows. */ +ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, + ShaderData *shadow_sd, + PathState *state, + Ray *ray, + Intersection *isect, + float3 *shadow) +{ + const bool blocked = scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_OPAQUE, + isect, + NULL, + 0.0f, 0.0f); +#ifdef __VOLUME__ + if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* Apply attenuation from current volume shader. */ + kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); + } +#endif + return blocked; +} + +#ifdef __TRANSPARENT_SHADOWS__ +# ifdef __SHADOW_RECORD_ALL__ /* Shadow function to compute how much light is blocked, * * We trace a single ray. If it hits any opaque surface, or more than a given @@ -86,121 +111,134 @@ ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg, * is something to be kept an eye on. */ -#define SHADOW_STACK_MAX_HITS 64 +# define SHADOW_STACK_MAX_HITS 64 -ccl_device_inline bool shadow_blocked_all(KernelGlobals *kg, - ShaderData *shadow_sd, - PathState *state, - Ray *ray, - float3 *shadow) +/* Actual logic with traversal loop implementation which is free from device + * specific tweaks. + * + * Note that hits array should be as big as max_hits+1. + */ +ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, + ShaderData *shadow_sd, + PathState *state, + Ray *ray, + Intersection *hits, + uint max_hits, + float3 *shadow) { - *shadow = make_float3(1.0f, 1.0f, 1.0f); - if(ray->t == 0.0f) { - return false; - } - bool blocked; - if(kernel_data.integrator.transparent_shadows) { - /* Check transparent bounces here, for volume scatter which can do - * lighting before surface path termination is checked. - */ - if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - return true; - } - /* Intersect to find an opaque surface, or record all transparent - * surface hits. - */ -#ifdef __KERNEL_CUDA__ - Intersection *hits = kg->hits_stack; -#else - Intersection hits_stack[SHADOW_STACK_MAX_HITS]; - Intersection *hits = hits_stack; -#endif - const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; - uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; -#ifndef __KERNEL_GPU__ - /* Prefer to use stack but use dynamic allocation if too deep max hits - * we need max_hits + 1 storage space due to the logic in - * scene_intersect_shadow_all which will first store and then check if - * the limit is exceeded. - * - * Ignore this on GPU because of slow/unavailable malloc(). - */ - if(max_hits + 1 > SHADOW_STACK_MAX_HITS) { - if(kg->transparent_shadow_intersections == NULL) { - kg->transparent_shadow_intersections = - (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1)); + /* Intersect to find an opaque surface, or record all transparent + * surface hits. + */ + uint num_hits; + const bool blocked = scene_intersect_shadow_all(kg, + ray, + hits, + max_hits, + &num_hits); + /* If no opaque surface found but we did find transparent hits, + * shade them. + */ + if(!blocked && num_hits > 0) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D*ray->t; + float last_t = 0.0f; + int bounce = state->transparent_bounce; + Intersection *isect = hits; +# ifdef __VOLUME__ + PathState ps = *state; +# endif + sort_intersections(hits, num_hits); + for(int hit = 0; hit < num_hits; hit++, isect++) { + /* Adjust intersection distance for moving ray forward. */ + float new_t = isect->t; + isect->t -= last_t; + /* Skip hit if we did not move forward, step by step raytracing + * would have skipped it as well then. + */ + if(last_t == new_t) { + continue; } - hits = kg->transparent_shadow_intersections; - } -#endif /* __KERNEL_GPU__ */ - uint num_hits; - blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits); - /* If no opaque surface found but we did find transparent hits, - * shade them. - */ - if(!blocked && num_hits > 0) { - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 Pend = ray->P + ray->D*ray->t; - float last_t = 0.0f; - int bounce = state->transparent_bounce; - Intersection *isect = hits; -#ifdef __VOLUME__ - PathState ps = *state; -#endif - sort_intersections(hits, num_hits); - for(int hit = 0; hit < num_hits; hit++, isect++) { - /* Adjust intersection distance for moving ray forward. */ - float new_t = isect->t; - isect->t -= last_t; - /* Skip hit if we did not move forward, step by step raytracing - * would have skipped it as well then. - */ - if(last_t == new_t) { - continue; - } - last_t = new_t; - /* Attenuate the throughput. */ - if(shadow_handle_transparent_isect(kg, - shadow_sd, - &ps, - isect, - ray, - &throughput)) - { - return true; - } - /* Move ray forward. */ - ray->P = shadow_sd->P; - if(ray->t != FLT_MAX) { - ray->D = normalize_len(Pend - ray->P, &ray->t); - } - bounce++; + last_t = new_t; + /* Attenuate the throughput. */ + if(shadow_handle_transparent_isect(kg, + shadow_sd, + &ps, + isect, + ray, + &throughput)) + { + return true; } -#ifdef __VOLUME__ - /* Attenuation for last line segment towards light. */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); + /* Move ray forward. */ + ray->P = shadow_sd->P; + if(ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); } -#endif - *shadow = throughput; - return is_zero(throughput); + bounce++; } +# ifdef __VOLUME__ + /* Attenuation for last line segment towards light. */ + if(ps.volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); + } +# endif + *shadow = throughput; + return is_zero(throughput); } - else { - Intersection isect; - blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); - } -#ifdef __VOLUME__ +# ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader/ */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } -#endif +# endif return blocked; } -#endif /* __SHADOW_RECORD_ALL__ */ -#ifndef __KERNEL_CPU__ +/* Here we do all device specific trickery before invoking actual traversal + * loop to help readability of the actual logic. + */ +ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, + ShaderData *shadow_sd, + PathState *state, + Ray *ray, + uint max_hits, + float3 *shadow) +{ +# ifdef __KERNEL_CUDA__ + Intersection *hits = kg->hits_stack; +# else + Intersection hits_stack[SHADOW_STACK_MAX_HITS]; + Intersection *hits = hits_stack; +# endif +# ifndef __KERNEL_GPU__ + /* Prefer to use stack but use dynamic allocation if too deep max hits + * we need max_hits + 1 storage space due to the logic in + * scene_intersect_shadow_all which will first store and then check if + * the limit is exceeded. + * + * Ignore this on GPU because of slow/unavailable malloc(). + */ + if(max_hits + 1 > SHADOW_STACK_MAX_HITS) { + if(kg->transparent_shadow_intersections == NULL) { + const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + kg->transparent_shadow_intersections = + (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1)); + } + hits = kg->transparent_shadow_intersections; + } +# endif /* __KERNEL_GPU__ */ + /* Invoke actual traversal. */ + return shadow_blocked_transparent_all_loop(kg, + shadow_sd, + state, + ray, + hits, + max_hits, + shadow); +} +# endif /* __SHADOW_RECORD_ALL__ */ + +# ifdef __KERNEL_GPU__ /* Shadow function to compute how much light is blocked, * * Here we raytrace from one transparent surface to the next step by step. @@ -209,119 +247,147 @@ ccl_device_inline bool shadow_blocked_all(KernelGlobals *kg, * potentially transparent, and only in that case start marching. this gives * one extra ray cast for the cases were we do want transparency. */ -ccl_device_noinline bool shadow_blocked_stepped(KernelGlobals *kg, - ShaderData *shadow_sd, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray_input, - float3 *shadow) +ccl_device bool shadow_blocked_transparent_stepped( + KernelGlobals *kg, + ShaderData *shadow_sd, + PathState *state, + Ray *ray, + Intersection *isect, + float3 *shadow) { - *shadow = make_float3(1.0f, 1.0f, 1.0f); - if(ray_input->t == 0.0f) { - return false; - } -#ifdef __SPLIT_KERNEL__ - Ray private_ray = *ray_input; - Ray *ray = &private_ray; -#else - Ray *ray = ray_input; -#endif - -#ifdef __SPLIT_KERNEL__ - Intersection *isect = &kg->isect_shadow[SD_THREAD]; -#else - Intersection isect_object; - Intersection *isect = &isect_object; -#endif /* Early check for opaque shadows. */ - bool blocked = scene_intersect(kg, - *ray, - PATH_RAY_SHADOW_OPAQUE, - isect, - NULL, - 0.0f, 0.0f); -#ifdef __TRANSPARENT_SHADOWS__ - if(blocked && kernel_data.integrator.transparent_shadows) { - if(shader_transparent_shadow(kg, isect)) { - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 Pend = ray->P + ray->D*ray->t; - int bounce = state->transparent_bounce; -# ifdef __VOLUME__ - PathState ps = *state; -# endif - for(;;) { - if(bounce >= kernel_data.integrator.transparent_max_bounce) { - return true; - } - if(!scene_intersect(kg, - *ray, - PATH_RAY_SHADOW_TRANSPARENT, - isect, - NULL, - 0.0f, 0.0f)) - { - break; - } - if(!shader_transparent_shadow(kg, isect)) { - return true; - } - /* Attenuate the throughput. */ - if(shadow_handle_transparent_isect(kg, - shadow_sd, - &ps, - isect, - ray, - &throughput)) - { - return true; - } - /* Move ray forward. */ - ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng)); - if(ray->t != FLT_MAX) { - ray->D = normalize_len(Pend - ray->P, &ray->t); - } - bounce++; + const bool blocked = scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_OPAQUE, + isect, + NULL, + 0.0f, 0.0f); + if(blocked && shader_transparent_shadow(kg, isect)) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D*ray->t; + int bounce = state->transparent_bounce; +# ifdef __VOLUME__ + PathState ps = *state; +# endif + for(;;) { + if(bounce >= kernel_data.integrator.transparent_max_bounce) { + return true; + } + if(!scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_TRANSPARENT, + isect, + NULL, + 0.0f, 0.0f)) + { + break; + } + if(!shader_transparent_shadow(kg, isect)) { + return true; } -# ifdef __VOLUME__ - /* Attenuation for last line segment towards light. */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); + /* Attenuate the throughput. */ + if(shadow_handle_transparent_isect(kg, + shadow_sd, + &ps, + isect, + ray, + &throughput)) + { + return true; } -# endif - *shadow *= throughput; - return is_zero(throughput); + /* Move ray forward. */ + ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng)); + if(ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); + } + bounce++; + } +# ifdef __VOLUME__ + /* Attenuation for last line segment towards light. */ + if(ps.volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); } +# endif + *shadow *= throughput; + return is_zero(throughput); } -# ifdef __VOLUME__ - else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { +# ifdef __VOLUME__ + if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } -# endif -#endif +# endif return blocked; } -#endif /* __KERNEL_CPU__ */ +# endif /* __KERNEL_GPU__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, - Ray *ray, + Ray *ray_input, float3 *shadow) { -#ifdef __SHADOW_RECORD_ALL__ -# ifdef __KERNEL_CPU__ - return shadow_blocked_all(kg, shadow_sd, state, ray, shadow); -# else + /* Special trickery for split kernel: some data is coming from the + * global memory. + */ +#ifdef __SPLIT_KERNEL__ + Ray private_ray = *ray_input; + Ray *ray = &private_ray; + Intersection *isect = &kg->isect_shadow[SD_THREAD]; +#else /* __SPLIT_KERNEL__ */ + Ray *ray = ray_input; + Intersection isect_object; + Intersection *isect = &isect_object; +#endif /* __SPLIT_KERNEL__ */ + /* Some common early checks. */ + *shadow = make_float3(1.0f, 1.0f, 1.0f); + if(ray->t == 0.0f) { + return false; + } + /* Do actual shadow shading. */ +#ifdef __TRANSPARENT_SHADOWS__ + if(!kernel_data.integrator.transparent_shadows) +#endif + { + return shadow_blocked_opaque(kg, + shadow_sd, + state, + ray, + isect, + shadow); + } +#ifdef __TRANSPARENT_SHADOWS__ +# ifdef __SHADOW_RECORD_ALL__ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + /* Check transparent bounces here, for volume scatter which can do + * lighting before surface path termination is checked. + */ + if(state->transparent_bounce >= transparent_max_bounce) { + return true; + } const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; - if(max_hits + 1 < SHADOW_STACK_MAX_HITS) { - return shadow_blocked_all(kg, shadow_sd, state, ray, shadow); +# ifdef __KERNEL_GPU__ + if(max_hits + 1 < SHADOW_STACK_MAX_HITS) +# endif + { + return shadow_blocked_transparent_all(kg, + shadow_sd, + state, + ray, + max_hits, + shadow); } -# endif -#endif -#ifndef __KERNEL_CPU__ - return shadow_blocked_stepped(kg, shadow_sd, state, ray, shadow); -#endif +# endif /* __SHADOW_RECORD_ALL__ */ +# ifdef __KERNEL_GPU__ + return shadow_blocked_transparent_stepped(kg, + shadow_sd, + state, + ray, + isect, + shadow); +# endif /* __KERNEL_GPU__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ } #undef SHADOW_STACK_MAX_HITS -- cgit v1.2.3