/* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ CCL_NAMESPACE_BEGIN /* Attenuate throughput accordingly to the given intersection event. * Returns true if the throughput is zero and traversal can be aborted. */ ccl_device_forceinline bool shadow_handle_transparent_isect( KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, # ifdef __VOLUME__ ccl_addr_space struct PathState *volume_state, # endif Intersection *isect, Ray *ray, float3 *throughput) { #ifdef __VOLUME__ /* Attenuation between last surface and next surface. */ if(volume_state->volume_stack[0].shader != SHADER_NONE) { Ray segment_ray = *ray; segment_ray.t = isect->t; kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput); } #endif /* Setup shader data at surface. */ shader_setup_from_ray(kg, shadow_sd, isect, ray); /* Attenuation from transparent surface. */ if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { path_state_modify_bounce(state, true); shader_eval_surface(kg, shadow_sd, NULL, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); path_state_modify_bounce(state, false); *throughput *= shader_bsdf_transparency(kg, shadow_sd); } /* Stop if all light is blocked. */ if(is_zero(*throughput)) { return true; } #ifdef __VOLUME__ /* Exit/enter volume. */ kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack); #endif return false; } /* Special version which only handles opaque shadows. */ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, Ray *ray, Intersection *isect, float3 *shadow) { const bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); #ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } #endif return blocked; } #ifdef __TRANSPARENT_SHADOWS__ # ifdef __SHADOW_RECORD_ALL__ /* Shadow function to compute how much light is blocked, * * We trace a single ray. If it hits any opaque surface, or more than a given * number of transparent surfaces is hit, then we consider the geometry to be * entirely blocked. If not, all transparent surfaces will be recorded and we * will shade them one by one to determine how much light is blocked. This all * happens in one scene intersection function. * * Recording all hits works well in some cases but may be slower in others. If * we have many semi-transparent hairs, one intersection may be faster because * you'd be reinteresecting the same hairs a lot with each step otherwise. If * however there is mostly binary transparency then we may be recording many * unnecessary intersections when one of the first surfaces blocks all light. * * From tests in real scenes it seems the performance loss is either minimal, * or there is a performance increase anyway due to avoiding the need to send * two rays with transparent shadows. * * On CPU it'll handle all transparent bounces (by allocating storage for * intersections when they don't fit into the stack storage). * * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this * is something to be kept an eye on. */ # define SHADOW_STACK_MAX_HITS 64 /* Actual logic with traversal loop implementation which is free from device * specific tweaks. * * Note that hits array should be as big as max_hits+1. */ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, const int skip_object, Ray *ray, Intersection *hits, uint max_hits, float3 *shadow) { /* Intersect to find an opaque surface, or record all transparent * surface hits. */ uint num_hits; const bool blocked = scene_intersect_shadow_all(kg, ray, hits, skip_object, max_hits, &num_hits); /* If no opaque surface found but we did find transparent hits, * shade them. */ if(!blocked && num_hits > 0) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 Pend = ray->P + ray->D*ray->t; float last_t = 0.0f; int bounce = state->transparent_bounce; Intersection *isect = hits; # ifdef __VOLUME__ # ifdef __SPLIT_KERNEL__ ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; # else PathState ps_object; PathState *ps = &ps_object; # endif *ps = *state; # endif sort_intersections(hits, num_hits); for(int hit = 0; hit < num_hits; hit++, isect++) { /* Adjust intersection distance for moving ray forward. */ float new_t = isect->t; isect->t -= last_t; /* Skip hit if we did not move forward, step by step raytracing * would have skipped it as well then. */ if(last_t == new_t) { continue; } last_t = new_t; /* Attenuate the throughput. */ if(shadow_handle_transparent_isect(kg, shadow_sd, state, #ifdef __VOLUME__ ps, #endif isect, ray, &throughput)) { return true; } /* Move ray forward. */ ray->P = shadow_sd->P; if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } bounce++; } # ifdef __VOLUME__ /* Attenuation for last line segment towards light. */ if(ps->volume_stack[0].shader != SHADER_NONE) { kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); } # endif *shadow = throughput; return is_zero(throughput); } # ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader/ */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } # endif return blocked; } /* Here we do all device specific trickery before invoking actual traversal * loop to help readability of the actual logic. */ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, const int skip_object, Ray *ray, uint max_hits, float3 *shadow) { # ifdef __SPLIT_KERNEL__ Intersection hits_[SHADOW_STACK_MAX_HITS]; Intersection *hits = &hits_[0]; # elif defined(__KERNEL_CUDA__) Intersection *hits = kg->hits_stack; # else Intersection hits_stack[SHADOW_STACK_MAX_HITS]; Intersection *hits = hits_stack; # endif # ifndef __KERNEL_GPU__ /* Prefer to use stack but use dynamic allocation if too deep max hits * we need max_hits + 1 storage space due to the logic in * scene_intersect_shadow_all which will first store and then check if * the limit is exceeded. * * Ignore this on GPU because of slow/unavailable malloc(). */ if(max_hits + 1 > SHADOW_STACK_MAX_HITS) { if(kg->transparent_shadow_intersections == NULL) { const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; kg->transparent_shadow_intersections = (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1)); } hits = kg->transparent_shadow_intersections; } # endif /* __KERNEL_GPU__ */ /* Invoke actual traversal. */ return shadow_blocked_transparent_all_loop(kg, shadow_sd, state, skip_object, ray, hits, max_hits, shadow); } # endif /* __SHADOW_RECORD_ALL__ */ # if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__) /* Shadow function to compute how much light is blocked, * * Here we raytrace from one transparent surface to the next step by step. * To minimize overhead in cases where we don't need transparent shadows, we * first trace a regular shadow ray. We check if the hit primitive was * potentially transparent, and only in that case start marching. this gives * one extra ray cast for the cases were we do want transparency. */ /* This function is only implementing device-independent traversal logic * which requires some precalculation done. */ ccl_device bool shadow_blocked_transparent_stepped_loop( KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, const int skip_object, Ray *ray, Intersection *isect, const bool blocked, const bool is_transparent_isect, float3 *shadow) { if((blocked && is_transparent_isect) || skip_object != OBJECT_NONE) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 Pend = ray->P + ray->D*ray->t; int bounce = state->transparent_bounce; # ifdef __VOLUME__ # ifdef __SPLIT_KERNEL__ ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; # else PathState ps_object; PathState *ps = &ps_object; # endif *ps = *state; # endif for(;;) { if(bounce >= kernel_data.integrator.transparent_max_bounce) { return true; } if(!scene_intersect(kg, *ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) { break; } #ifdef __SHADOW_TRICKS__ if(skip_object != OBJECT_NONE) { const int isect_object = (isect->object == PRIM_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : isect->object; if(isect_object == skip_object) { shader_setup_from_ray(kg, shadow_sd, isect, ray); /* Move ray forward. */ ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } bounce++; continue; } } #endif if(!shader_transparent_shadow(kg, isect)) { return true; } /* Attenuate the throughput. */ if(shadow_handle_transparent_isect(kg, shadow_sd, state, #ifdef __VOLUME__ ps, #endif isect, ray, &throughput)) { return true; } /* Move ray forward. */ ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } bounce++; } # ifdef __VOLUME__ /* Attenuation for last line segment towards light. */ if(ps->volume_stack[0].shader != SHADER_NONE) { kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); } # endif *shadow *= throughput; return is_zero(throughput); } # ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); } # endif return blocked; } ccl_device bool shadow_blocked_transparent_stepped( KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, const int skip_object, Ray *ray, Intersection *isect, float3 *shadow) { bool blocked, is_transparent_isect; if (skip_object == OBJECT_NONE) { blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false; } else { blocked = false; is_transparent_isect = false; } return shadow_blocked_transparent_stepped_loop(kg, shadow_sd, state, skip_object, ray, isect, blocked, is_transparent_isect, shadow); } # endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */ #endif /* __TRANSPARENT_SHADOWS__ */ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, Ray *ray_input, float3 *shadow) { Ray *ray = ray_input; Intersection isect; /* Some common early checks. */ *shadow = make_float3(1.0f, 1.0f, 1.0f); if(ray->t == 0.0f) { return false; } #ifdef __SHADOW_TRICKS__ const int skip_object = state->catcher_object; #else const int skip_object = OBJECT_NONE; #endif /* Do actual shadow shading. */ /* First of all, we check if integrator requires transparent shadows. * if not, we use simplest and fastest ever way to calculate occlusion. * * NOTE: We can't do quick opaque test here if we are on shadow-catcher * path because we don't want catcher object to be casting shadow here. */ #ifdef __TRANSPARENT_SHADOWS__ if(!kernel_data.integrator.transparent_shadows && skip_object == OBJECT_NONE) #endif { return shadow_blocked_opaque(kg, shadow_sd, state, ray, &isect, shadow); } #ifdef __TRANSPARENT_SHADOWS__ # ifdef __SHADOW_RECORD_ALL__ /* For the transparent shadows we try to use record-all logic on the * devices which supports this. */ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; /* Check transparent bounces here, for volume scatter which can do * lighting before surface path termination is checked. */ if(state->transparent_bounce >= transparent_max_bounce) { return true; } const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; # ifdef __KERNEL_GPU__ /* On GPU we do trickey with tracing opaque ray first, this avoids speed * regressions in some files. * * TODO(sergey): Check why using record-all behavior causes slowdown in such * cases. Could that be caused by a higher spill pressure? */ const bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false; if(!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) { return shadow_blocked_transparent_stepped_loop(kg, shadow_sd, state, skip_object, ray, &isect, blocked, is_transparent_isect, shadow); } # endif /* __KERNEL_GPU__ */ return shadow_blocked_transparent_all(kg, shadow_sd, state, skip_object, ray, max_hits, shadow); # else /* __SHADOW_RECORD_ALL__ */ /* Fallback to a slowest version which works on all devices. */ return shadow_blocked_transparent_stepped(kg, shadow_sd, state, skip_object, ray, &isect, shadow); # endif /* __SHADOW_RECORD_ALL__ */ #endif /* __TRANSPARENT_SHADOWS__ */ } #undef SHADOW_STACK_MAX_HITS CCL_NAMESPACE_END