diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_shadow.h')
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 661 |
1 files changed, 439 insertions, 222 deletions
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 2981f6ac566..0426e0a62c9 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -16,9 +16,84 @@ CCL_NAMESPACE_BEGIN -#ifdef __SHADOW_RECORD_ALL__ +/* Attenuate throughput accordingly to the given intersection event. + * Returns true if the throughput is zero and traversal can be aborted. + */ +ccl_device_forceinline bool shadow_handle_transparent_isect( + KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, +# ifdef __VOLUME__ + ccl_addr_space struct PathState *volume_state, +# endif + Intersection *isect, + Ray *ray, + float3 *throughput) +{ +#ifdef __VOLUME__ + /* Attenuation between last surface and next surface. */ + if(volume_state->volume_stack[0].shader != SHADER_NONE) { + Ray segment_ray = *ray; + segment_ray.t = isect->t; + kernel_volume_shadow(kg, + shadow_sd, + volume_state, + &segment_ray, + throughput); + } +#endif + /* Setup shader data at surface. */ + shader_setup_from_ray(kg, shadow_sd, isect, ray); + /* Attenuation from transparent surface. */ + if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { + path_state_modify_bounce(state, true); + shader_eval_surface(kg, + shadow_sd, + NULL, + state, + 0.0f, + PATH_RAY_SHADOW, + SHADER_CONTEXT_SHADOW); + path_state_modify_bounce(state, false); + *throughput *= shader_bsdf_transparency(kg, shadow_sd); + } + /* Stop if all light is blocked. */ + if(is_zero(*throughput)) { + return true; + } +#ifdef __VOLUME__ + /* Exit/enter volume. */ + kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack); +#endif + return false; +} + +/* Special version which only handles opaque shadows. */ +ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + Ray *ray, + Intersection *isect, + float3 *shadow) +{ + const bool blocked = scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_OPAQUE, + isect, + NULL, + 0.0f, 0.0f); +#ifdef __VOLUME__ + if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* Apply attenuation from current volume shader. */ + kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); + } +#endif + return blocked; +} -/* Shadow function to compute how much light is blocked, CPU variation. +#ifdef __TRANSPARENT_SHADOWS__ +# ifdef __SHADOW_RECORD_ALL__ +/* Shadow function to compute how much light is blocked, * * We trace a single ray. If it hits any opaque surface, or more than a given * number of transparent surfaces is hit, then we consider the geometry to be @@ -36,261 +111,403 @@ CCL_NAMESPACE_BEGIN * or there is a performance increase anyway due to avoiding the need to send * two rays with transparent shadows. * - * This is CPU only because of qsort, and malloc or high stack space usage to - * record all these intersections. */ + * On CPU it'll handle all transparent bounces (by allocating storage for + * intersections when they don't fit into the stack storage). + * + * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this + * is something to be kept an eye on. + */ -#define STACK_MAX_HITS 64 +# define SHADOW_STACK_MAX_HITS 64 -ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow) +/* Actual logic with traversal loop implementation which is free from device + * specific tweaks. + * + * Note that hits array should be as big as max_hits+1. + */ +ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + const int skip_object, + Ray *ray, + Intersection *hits, + uint max_hits, + float3 *shadow) { - *shadow = make_float3(1.0f, 1.0f, 1.0f); - - if(ray->t == 0.0f) - return false; - - bool blocked; - - if(kernel_data.integrator.transparent_shadows) { - /* check transparent bounces here, for volume scatter which can do - * lighting before surface path termination is checked */ - if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) - return true; - - /* intersect to find an opaque surface, or record all transparent surface hits */ - Intersection hits_stack[STACK_MAX_HITS]; - Intersection *hits = hits_stack; - const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; - uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; - - /* prefer to use stack but use dynamic allocation if too deep max hits - * we need max_hits + 1 storage space due to the logic in - * scene_intersect_shadow_all which will first store and then check if - * the limit is exceeded */ - if(max_hits + 1 > STACK_MAX_HITS) { - if(kg->transparent_shadow_intersections == NULL) { - kg->transparent_shadow_intersections = - (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1)); + /* Intersect to find an opaque surface, or record all transparent + * surface hits. + */ + uint num_hits; + const bool blocked = scene_intersect_shadow_all(kg, + ray, + hits, + skip_object, + max_hits, + &num_hits); + /* If no opaque surface found but we did find transparent hits, + * shade them. + */ + if(!blocked && num_hits > 0) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D*ray->t; + float last_t = 0.0f; + int bounce = state->transparent_bounce; + Intersection *isect = hits; +# ifdef __VOLUME__ +# ifdef __SPLIT_KERNEL__ + ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; +# else + PathState ps_object; + PathState *ps = &ps_object; +# endif + *ps = *state; +# endif + sort_intersections(hits, num_hits); + for(int hit = 0; hit < num_hits; hit++, isect++) { + /* Adjust intersection distance for moving ray forward. */ + float new_t = isect->t; + isect->t -= last_t; + /* Skip hit if we did not move forward, step by step raytracing + * would have skipped it as well then. + */ + if(last_t == new_t) { + continue; } - hits = kg->transparent_shadow_intersections; - } - - uint num_hits; - blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits); - - /* if no opaque surface found but we did find transparent hits, shade them */ - if(!blocked && num_hits > 0) { - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 Pend = ray->P + ray->D*ray->t; - float last_t = 0.0f; - int bounce = state->transparent_bounce; - Intersection *isect = hits; + last_t = new_t; + /* Attenuate the throughput. */ + if(shadow_handle_transparent_isect(kg, + shadow_sd, + state, #ifdef __VOLUME__ - PathState ps = *state; + ps, #endif - - qsort(hits, num_hits, sizeof(Intersection), intersections_compare); - - for(int hit = 0; hit < num_hits; hit++, isect++) { - /* adjust intersection distance for moving ray forward */ - float new_t = isect->t; - isect->t -= last_t; - - /* skip hit if we did not move forward, step by step raytracing - * would have skipped it as well then */ - if(last_t == new_t) - continue; - - last_t = new_t; - -#ifdef __VOLUME__ - /* attenuation between last surface and next surface */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - Ray segment_ray = *ray; - segment_ray.t = isect->t; - kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput); - } -#endif - - /* setup shader data at surface */ - shader_setup_from_ray(kg, shadow_sd, isect, ray); - - /* attenuation from transparent surface */ - if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { - path_state_modify_bounce(state, true); - shader_eval_surface(kg, shadow_sd, NULL, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); - path_state_modify_bounce(state, false); - - throughput *= shader_bsdf_transparency(kg, shadow_sd); - } - - /* stop if all light is blocked */ - if(is_zero(throughput)) { - return true; - } - - /* move ray forward */ - ray->P = shadow_sd->P; - if(ray->t != FLT_MAX) { - ray->D = normalize_len(Pend - ray->P, &ray->t); - } - -#ifdef __VOLUME__ - /* exit/enter volume */ - kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack); -#endif - - bounce++; + isect, + ray, + &throughput)) + { + return true; } - -#ifdef __VOLUME__ - /* attenuation for last line segment towards light */ - if(ps.volume_stack[0].shader != SHADER_NONE) - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); -#endif - - *shadow = throughput; - - return is_zero(throughput); + /* Move ray forward. */ + ray->P = shadow_sd->P; + if(ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); + } + bounce++; } +# ifdef __VOLUME__ + /* Attenuation for last line segment towards light. */ + if(ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); + } +# endif + *shadow = throughput; + return is_zero(throughput); } - else { - Intersection isect; - blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); - } - -#ifdef __VOLUME__ +# ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { - /* apply attenuation from current volume shader */ + /* Apply attenuation from current volume shader/ */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } -#endif - +# endif return blocked; } -#undef STACK_MAX_HITS - -#else +/* Here we do all device specific trickery before invoking actual traversal + * loop to help readability of the actual logic. + */ +ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + const int skip_object, + Ray *ray, + uint max_hits, + float3 *shadow) +{ +# ifdef __SPLIT_KERNEL__ + Intersection hits_[SHADOW_STACK_MAX_HITS]; + Intersection *hits = &hits_[0]; +# elif defined(__KERNEL_CUDA__) + Intersection *hits = kg->hits_stack; +# else + Intersection hits_stack[SHADOW_STACK_MAX_HITS]; + Intersection *hits = hits_stack; +# endif +# ifndef __KERNEL_GPU__ + /* Prefer to use stack but use dynamic allocation if too deep max hits + * we need max_hits + 1 storage space due to the logic in + * scene_intersect_shadow_all which will first store and then check if + * the limit is exceeded. + * + * Ignore this on GPU because of slow/unavailable malloc(). + */ + if(max_hits + 1 > SHADOW_STACK_MAX_HITS) { + if(kg->transparent_shadow_intersections == NULL) { + const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + kg->transparent_shadow_intersections = + (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1)); + } + hits = kg->transparent_shadow_intersections; + } +# endif /* __KERNEL_GPU__ */ + /* Invoke actual traversal. */ + return shadow_blocked_transparent_all_loop(kg, + shadow_sd, + state, + skip_object, + ray, + hits, + max_hits, + shadow); +} +# endif /* __SHADOW_RECORD_ALL__ */ -/* Shadow function to compute how much light is blocked, GPU variation. +# if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__) +/* Shadow function to compute how much light is blocked, * * Here we raytrace from one transparent surface to the next step by step. * To minimize overhead in cases where we don't need transparent shadows, we * first trace a regular shadow ray. We check if the hit primitive was * potentially transparent, and only in that case start marching. this gives - * one extra ray cast for the cases were we do want transparency. */ + * one extra ray cast for the cases were we do want transparency. + */ -ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, - ShaderData *shadow_sd, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray_input, - float3 *shadow) +/* This function is only implementing device-independent traversal logic + * which requires some precalculation done. + */ +ccl_device bool shadow_blocked_transparent_stepped_loop( + KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + const int skip_object, + Ray *ray, + Intersection *isect, + const bool blocked, + const bool is_transparent_isect, + float3 *shadow) { - *shadow = make_float3(1.0f, 1.0f, 1.0f); - - if(ray_input->t == 0.0f) - return false; - -#ifdef __SPLIT_KERNEL__ - Ray private_ray = *ray_input; - Ray *ray = &private_ray; -#else - Ray *ray = ray_input; -#endif - -#ifdef __SPLIT_KERNEL__ - Intersection *isect = &kg->isect_shadow[SD_THREAD]; -#else - Intersection isect_object; - Intersection *isect = &isect_object; -#endif - - bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); - -#ifdef __TRANSPARENT_SHADOWS__ - if(blocked && kernel_data.integrator.transparent_shadows) { - if(shader_transparent_shadow(kg, isect)) { - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 Pend = ray->P + ray->D*ray->t; - int bounce = state->transparent_bounce; -#ifdef __VOLUME__ - PathState ps = *state; -#endif - - for(;;) { - if(bounce >= kernel_data.integrator.transparent_max_bounce) - return true; - - if(!scene_intersect(kg, *ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) - { -#ifdef __VOLUME__ - /* attenuation for last line segment towards light */ - if(ps.volume_stack[0].shader != SHADER_NONE) - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); -#endif - - *shadow *= throughput; - - return false; - } - - if(!shader_transparent_shadow(kg, isect)) { - return true; - } - -#ifdef __VOLUME__ - /* attenuation between last surface and next surface */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - Ray segment_ray = *ray; - segment_ray.t = isect->t; - kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput); + if((blocked && is_transparent_isect) || skip_object != OBJECT_NONE) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D*ray->t; + int bounce = state->transparent_bounce; +# ifdef __VOLUME__ +# ifdef __SPLIT_KERNEL__ + ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; +# else + PathState ps_object; + PathState *ps = &ps_object; +# endif + *ps = *state; +# endif + for(;;) { + if(bounce >= kernel_data.integrator.transparent_max_bounce) { + return true; + } + if(!scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_TRANSPARENT, + isect, + NULL, + 0.0f, 0.0f)) + { + break; + } +#ifdef __SHADOW_TRICKS__ + if(skip_object != OBJECT_NONE) { + const int isect_object = (isect->object == PRIM_NONE) + ? kernel_tex_fetch(__prim_object, isect->prim) + : isect->object; + if(isect_object == skip_object) { + shader_setup_from_ray(kg, shadow_sd, isect, ray); + /* Move ray forward. */ + ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); + if(ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); + } + bounce++; + continue; } + } #endif - - /* setup shader data at surface */ - shader_setup_from_ray(kg, shadow_sd, isect, ray); - - /* attenuation from transparent surface */ - if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) { - path_state_modify_bounce(state, true); - shader_eval_surface(kg, shadow_sd, NULL, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); - path_state_modify_bounce(state, false); - - throughput *= shader_bsdf_transparency(kg, shadow_sd); - } - - /* stop if all light is blocked */ - if(is_zero(throughput)) { - return true; - } - - /* move ray forward */ - ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng)); - if(ray->t != FLT_MAX) { - ray->D = normalize_len(Pend - ray->P, &ray->t); - } - + if(!shader_transparent_shadow(kg, isect)) { + return true; + } + /* Attenuate the throughput. */ + if(shadow_handle_transparent_isect(kg, + shadow_sd, + state, #ifdef __VOLUME__ - /* exit/enter volume */ - kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack); + ps, #endif - - bounce++; + isect, + ray, + &throughput)) + { + return true; } + /* Move ray forward. */ + ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); + if(ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); + } + bounce++; + } +# ifdef __VOLUME__ + /* Attenuation for last line segment towards light. */ + if(ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); } +# endif + *shadow *= throughput; + return is_zero(throughput); } -#ifdef __VOLUME__ - else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { - /* apply attenuation from current volume shader */ +# ifdef __VOLUME__ + if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* Apply attenuation from current volume shader. */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } -#endif -#endif - +# endif return blocked; } +ccl_device bool shadow_blocked_transparent_stepped( + KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + const int skip_object, + Ray *ray, + Intersection *isect, + float3 *shadow) +{ + bool blocked, is_transparent_isect; + if (skip_object == OBJECT_NONE) { + blocked = scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_OPAQUE, + isect, + NULL, + 0.0f, 0.0f); + is_transparent_isect = blocked + ? shader_transparent_shadow(kg, isect) + : false; + } + else { + blocked = false; + is_transparent_isect = false; + } + return shadow_blocked_transparent_stepped_loop(kg, + shadow_sd, + state, + skip_object, + ray, + isect, + blocked, + is_transparent_isect, + shadow); +} + +# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ + +ccl_device_inline bool shadow_blocked(KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + Ray *ray_input, + float3 *shadow) +{ + Ray *ray = ray_input; + Intersection isect; + /* Some common early checks. */ + *shadow = make_float3(1.0f, 1.0f, 1.0f); + if(ray->t == 0.0f) { + return false; + } +#ifdef __SHADOW_TRICKS__ + const int skip_object = state->catcher_object; +#else + const int skip_object = OBJECT_NONE; #endif + /* Do actual shadow shading. */ + /* First of all, we check if integrator requires transparent shadows. + * if not, we use simplest and fastest ever way to calculate occlusion. + * + * NOTE: We can't do quick opaque test here if we are on shadow-catcher + * path because we don't want catcher object to be casting shadow here. + */ +#ifdef __TRANSPARENT_SHADOWS__ + if(!kernel_data.integrator.transparent_shadows && + skip_object == OBJECT_NONE) +#endif + { + return shadow_blocked_opaque(kg, + shadow_sd, + state, + ray, + &isect, + shadow); + } +#ifdef __TRANSPARENT_SHADOWS__ +# ifdef __SHADOW_RECORD_ALL__ + /* For the transparent shadows we try to use record-all logic on the + * devices which supports this. + */ + const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + /* Check transparent bounces here, for volume scatter which can do + * lighting before surface path termination is checked. + */ + if(state->transparent_bounce >= transparent_max_bounce) { + return true; + } + const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; +# ifdef __KERNEL_GPU__ + /* On GPU we do trickey with tracing opaque ray first, this avoids speed + * regressions in some files. + * + * TODO(sergey): Check why using record-all behavior causes slowdown in such + * cases. Could that be caused by a higher spill pressure? + */ + const bool blocked = scene_intersect(kg, + *ray, + PATH_RAY_SHADOW_OPAQUE, + &isect, + NULL, + 0.0f, 0.0f); + const bool is_transparent_isect = blocked + ? shader_transparent_shadow(kg, &isect) + : false; + if(!blocked || !is_transparent_isect || + max_hits + 1 >= SHADOW_STACK_MAX_HITS) + { + return shadow_blocked_transparent_stepped_loop(kg, + shadow_sd, + state, + skip_object, + ray, + &isect, + blocked, + is_transparent_isect, + shadow); + } +# endif /* __KERNEL_GPU__ */ + return shadow_blocked_transparent_all(kg, + shadow_sd, + state, + skip_object, + ray, + max_hits, + shadow); +# else /* __SHADOW_RECORD_ALL__ */ + /* Fallback to a slowest version which works on all devices. */ + return shadow_blocked_transparent_stepped(kg, + shadow_sd, + state, + skip_object, + ray, + &isect, + shadow); +# endif /* __SHADOW_RECORD_ALL__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ +} -CCL_NAMESPACE_END +#undef SHADOW_STACK_MAX_HITS +CCL_NAMESPACE_END |