diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_shadow.h')
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 193 |
1 files changed, 142 insertions, 51 deletions
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 06a77a208cb..8a0da6c3b13 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -16,6 +16,42 @@ CCL_NAMESPACE_BEGIN +#ifdef __VOLUME__ +typedef struct VolumeState { +# ifdef __SPLIT_KERNEL__ +# else + PathState ps; +# endif +} VolumeState; + +/* Get PathState ready for use for volume stack evaluation. */ +# ifdef __SPLIT_KERNEL__ +ccl_addr_space +# endif +ccl_device_inline PathState *shadow_blocked_volume_path_state( + KernelGlobals *kg, + VolumeState *volume_state, + ccl_addr_space PathState *state, + ShaderData *sd, + Ray *ray) +{ +# ifdef __SPLIT_KERNEL__ + ccl_addr_space PathState *ps = + &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; +# else + PathState *ps = &volume_state->ps; +# endif + *ps = *state; + /* We are checking for shadow on the "other" side of the surface, so need + * to discard volume we are currently at. + */ + if(dot(sd->Ng, ray->D) < 0.0f) { + kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack); + } + return ps; +} +#endif /* __VOLUME__ */ + /* Attenuate throughput accordingly to the given intersection event. * Returns true if the throughput is zero and traversal can be aborted. */ @@ -24,7 +60,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect( ShaderData *shadow_sd, ccl_addr_space PathState *state, # ifdef __VOLUME__ - struct PathState *volume_state, + ccl_addr_space struct PathState *volume_state, # endif Intersection *isect, Ray *ray, @@ -45,15 +81,12 @@ ccl_device_forceinline bool shadow_handle_transparent_isect( /* Setup shader data at surface. */ shader_setup_from_ray(kg, shadow_sd, isect, ray); /* Attenuation from transparent surface. */ - if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) { + if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { path_state_modify_bounce(state, true); shader_eval_surface(kg, shadow_sd, - NULL, state, - 0.0f, - PATH_RAY_SHADOW, - SHADER_CONTEXT_SHADOW); + PATH_RAY_SHADOW); path_state_modify_bounce(state, false); *throughput *= shader_bsdf_transparency(kg, shadow_sd); } @@ -72,13 +105,14 @@ ccl_device_forceinline bool shadow_handle_transparent_isect( ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, + const uint visibility, Ray *ray, Intersection *isect, float3 *shadow) { const bool blocked = scene_intersect(kg, *ray, - PATH_RAY_SHADOW_OPAQUE, + visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); @@ -126,8 +160,10 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, * Note that hits array should be as big as max_hits+1. */ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, + const uint visibility, Ray *ray, Intersection *hits, uint max_hits, @@ -140,8 +176,12 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, const bool blocked = scene_intersect_shadow_all(kg, ray, hits, + visibility, max_hits, &num_hits); +# ifdef __VOLUME__ + VolumeState volume_state; +# endif /* If no opaque surface found but we did find transparent hits, * shade them. */ @@ -152,7 +192,14 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, int bounce = state->transparent_bounce; Intersection *isect = hits; # ifdef __VOLUME__ - PathState ps = *state; +# ifdef __SPLIT_KERNEL__ + ccl_addr_space +# endif + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd, + ray); # endif sort_intersections(hits, num_hits); for(int hit = 0; hit < num_hits; hit++, isect++) { @@ -171,7 +218,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, shadow_sd, state, #ifdef __VOLUME__ - &ps, + ps, #endif isect, ray, @@ -180,7 +227,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, return true; } /* Move ray forward. */ - ray->P = ccl_fetch(shadow_sd, P); + ray->P = shadow_sd->P; if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } @@ -188,8 +235,8 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, } # ifdef __VOLUME__ /* Attenuation for last line segment towards light. */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); + if(ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); } # endif *shadow = throughput; @@ -197,8 +244,16 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, } # ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { - /* Apply attenuation from current volume shader/ */ - kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); + /* Apply attenuation from current volume shader. */ +# ifdef __SPLIT_KERNEL__ + ccl_addr_space +# endif + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd, + ray); + kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); } # endif return blocked; @@ -208,13 +263,18 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, * loop to help readability of the actual logic. */ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, + const uint visibility, Ray *ray, uint max_hits, float3 *shadow) { -# ifdef __KERNEL_CUDA__ +# ifdef __SPLIT_KERNEL__ + Intersection hits_[SHADOW_STACK_MAX_HITS]; + Intersection *hits = &hits_[0]; +# elif defined(__KERNEL_CUDA__) Intersection *hits = kg->hits_stack; # else Intersection hits_stack[SHADOW_STACK_MAX_HITS]; @@ -239,8 +299,10 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, # endif /* __KERNEL_GPU__ */ /* Invoke actual traversal. */ return shadow_blocked_transparent_all_loop(kg, + sd, shadow_sd, state, + visibility, ray, hits, max_hits, @@ -248,7 +310,7 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, } # endif /* __SHADOW_RECORD_ALL__ */ -# ifdef __KERNEL_GPU__ +# if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__) /* Shadow function to compute how much light is blocked, * * Here we raytrace from one transparent surface to the next step by step. @@ -263,20 +325,32 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, */ ccl_device bool shadow_blocked_transparent_stepped_loop( KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, + const uint visibility, Ray *ray, Intersection *isect, const bool blocked, const bool is_transparent_isect, float3 *shadow) { +# ifdef __VOLUME__ + VolumeState volume_state; +# endif if(blocked && is_transparent_isect) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 Pend = ray->P + ray->D*ray->t; int bounce = state->transparent_bounce; # ifdef __VOLUME__ - PathState ps = *state; +# ifdef __SPLIT_KERNEL__ + ccl_addr_space +# endif + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd, + ray); # endif for(;;) { if(bounce >= kernel_data.integrator.transparent_max_bounce) { @@ -284,7 +358,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( } if(!scene_intersect(kg, *ray, - PATH_RAY_SHADOW_TRANSPARENT, + visibility & PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) @@ -299,7 +373,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( shadow_sd, state, #ifdef __VOLUME__ - &ps, + ps, #endif isect, ray, @@ -308,7 +382,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( return true; } /* Move ray forward. */ - ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng)); + ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } @@ -316,8 +390,8 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( } # ifdef __VOLUME__ /* Attenuation for last line segment towards light. */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); + if(ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); } # endif *shadow *= throughput; @@ -326,7 +400,15 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( # ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ - kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); +# ifdef __SPLIT_KERNEL__ + ccl_addr_space +# endif + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd, + ray); + kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); } # endif return blocked; @@ -334,24 +416,28 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( ccl_device bool shadow_blocked_transparent_stepped( KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, + const uint visibility, Ray *ray, Intersection *isect, float3 *shadow) { - const bool blocked = scene_intersect(kg, - *ray, - PATH_RAY_SHADOW_OPAQUE, - isect, - NULL, - 0.0f, 0.0f); - const bool is_transparent_isect = blocked - ? shader_transparent_shadow(kg, isect) - : false; + bool blocked = scene_intersect(kg, + *ray, + visibility & PATH_RAY_SHADOW_OPAQUE, + isect, + NULL, + 0.0f, 0.0f); + bool is_transparent_isect = blocked + ? shader_transparent_shadow(kg, isect) + : false; return shadow_blocked_transparent_stepped_loop(kg, + sd, shadow_sd, state, + visibility, ray, isect, blocked, @@ -359,32 +445,30 @@ ccl_device bool shadow_blocked_transparent_stepped( shadow); } -# endif /* __KERNEL_GPU__ */ +# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */ #endif /* __TRANSPARENT_SHADOWS__ */ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, - ccl_addr_space Ray *ray_input, + Ray *ray_input, float3 *shadow) { - /* Special trickery for split kernel: some data is coming from the - * global memory. - */ -#ifdef __SPLIT_KERNEL__ - Ray private_ray = *ray_input; - Ray *ray = &private_ray; - Intersection *isect = &kg->isect_shadow[SD_THREAD]; -#else /* __SPLIT_KERNEL__ */ Ray *ray = ray_input; - Intersection isect_object; - Intersection *isect = &isect_object; -#endif /* __SPLIT_KERNEL__ */ + Intersection isect; /* Some common early checks. */ *shadow = make_float3(1.0f, 1.0f, 1.0f); if(ray->t == 0.0f) { return false; } +#ifdef __SHADOW_TRICKS__ + const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) + ? PATH_RAY_SHADOW_NON_CATCHER + : PATH_RAY_SHADOW; +#else + const uint visibility = PATH_RAY_SHADOW; +#endif /* Do actual shadow shading. */ /* First of all, we check if integrator requires transparent shadows. * if not, we use simplest and fastest ever way to calculate occlusion. @@ -396,8 +480,9 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, return shadow_blocked_opaque(kg, shadow_sd, state, + visibility, ray, - isect, + &isect, shadow); } #ifdef __TRANSPARENT_SHADOWS__ @@ -422,39 +507,45 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, */ const bool blocked = scene_intersect(kg, *ray, - PATH_RAY_SHADOW_OPAQUE, - isect, + visibility & PATH_RAY_SHADOW_OPAQUE, + &isect, NULL, 0.0f, 0.0f); const bool is_transparent_isect = blocked - ? shader_transparent_shadow(kg, isect) + ? shader_transparent_shadow(kg, &isect) : false; if(!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) { return shadow_blocked_transparent_stepped_loop(kg, + sd, shadow_sd, state, + visibility, ray, - isect, + &isect, blocked, is_transparent_isect, shadow); } # endif /* __KERNEL_GPU__ */ return shadow_blocked_transparent_all(kg, + sd, shadow_sd, state, + visibility, ray, max_hits, shadow); # else /* __SHADOW_RECORD_ALL__ */ /* Fallback to a slowest version which works on all devices. */ return shadow_blocked_transparent_stepped(kg, + sd, shadow_sd, state, + visibility, ray, - isect, + &isect, shadow); # endif /* __SHADOW_RECORD_ALL__ */ #endif /* __TRANSPARENT_SHADOWS__ */ |