/*
 * Copyright 2011-2013 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

CCL_NAMESPACE_BEGIN

/* Attenuate throughput accordingly to the given intersection event.
 * Returns true if the throughput is zero and traversal can be aborted.
 */
ccl_device_forceinline bool shadow_handle_transparent_isect(
        KernelGlobals *kg,
        ShaderData *shadow_sd,
        ccl_addr_space PathState *state,
#    ifdef __VOLUME__
        ccl_addr_space struct PathState *volume_state,
#    endif
        Intersection *isect,
        Ray *ray,
        float3 *throughput)
{
#ifdef __VOLUME__
	/* Attenuation between last surface and next surface. */
	if(volume_state->volume_stack[0].shader != SHADER_NONE) {
		Ray segment_ray = *ray;
		segment_ray.t = isect->t;
		kernel_volume_shadow(kg,
		                     shadow_sd,
		                     volume_state,
		                     &segment_ray,
		                     throughput);
	}
#endif
	/* Setup shader data at surface. */
	shader_setup_from_ray(kg, shadow_sd, isect, ray);
	/* Attenuation from transparent surface. */
	if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
		path_state_modify_bounce(state, true);
		shader_eval_surface(kg,
		                    shadow_sd,
		                    NULL,
		                    state,
		                    0.0f,
		                    PATH_RAY_SHADOW,
		                    SHADER_CONTEXT_SHADOW);
		path_state_modify_bounce(state, false);
		*throughput *= shader_bsdf_transparency(kg, shadow_sd);
	}
	/* Stop if all light is blocked. */
	if(is_zero(*throughput)) {
		return true;
	}
#ifdef __VOLUME__
	/* Exit/enter volume. */
	kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
#endif
	return false;
}

/* Special version which only handles opaque shadows. */
ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
                                      ShaderData *shadow_sd,
                                      ccl_addr_space PathState *state,
                                      Ray *ray,
                                      Intersection *isect,
                                      float3 *shadow)
{
	const bool blocked = scene_intersect(kg,
	                                     *ray,
	                                     PATH_RAY_SHADOW_OPAQUE,
	                                     isect,
	                                     NULL,
	                                     0.0f, 0.0f);
#ifdef __VOLUME__
	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
		/* Apply attenuation from current volume shader. */
		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
	}
#endif
	return blocked;
}

#ifdef __TRANSPARENT_SHADOWS__
#  ifdef __SHADOW_RECORD_ALL__
/* Shadow function to compute how much light is blocked,
 *
 * We trace a single ray. If it hits any opaque surface, or more than a given
 * number of transparent surfaces is hit, then we consider the geometry to be
 * entirely blocked. If not, all transparent surfaces will be recorded and we
 * will shade them one by one to determine how much light is blocked. This all
 * happens in one scene intersection function.
 *
 * Recording all hits works well in some cases but may be slower in others. If
 * we have many semi-transparent hairs, one intersection may be faster because
 * you'd be reinteresecting the same hairs a lot with each step otherwise. If
 * however there is mostly binary transparency then we may be recording many
 * unnecessary intersections when one of the first surfaces blocks all light.
 *
 * From tests in real scenes it seems the performance loss is either minimal,
 * or there is a performance increase anyway due to avoiding the need to send
 * two rays with transparent shadows.
 *
 * On CPU it'll handle all transparent bounces (by allocating storage for
 * intersections when they don't fit into the stack storage).
 *
 * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
 * is something to be kept an eye on.
 */

#    define SHADOW_STACK_MAX_HITS 64

/* Actual logic with traversal loop implementation which is free from device
 * specific tweaks.
 *
 * Note that hits array should be as big as max_hits+1.
 */
ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
                                                    ShaderData *shadow_sd,
                                                    ccl_addr_space PathState *state,
                                                    const int skip_object,
                                                    Ray *ray,
                                                    Intersection *hits,
                                                    uint max_hits,
                                                    float3 *shadow)
{
	/* Intersect to find an opaque surface, or record all transparent
	 * surface hits.
	 */
	uint num_hits;
	const bool blocked = scene_intersect_shadow_all(kg,
	                                                ray,
	                                                hits,
	                                                skip_object,
	                                                max_hits,
	                                                &num_hits);
	/* If no opaque surface found but we did find transparent hits,
	 * shade them.
	 */
	if(!blocked && num_hits > 0) {
		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
		float3 Pend = ray->P + ray->D*ray->t;
		float last_t = 0.0f;
		int bounce = state->transparent_bounce;
		Intersection *isect = hits;
#    ifdef __VOLUME__
#      ifdef __SPLIT_KERNEL__
		ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
#      else
		PathState ps_object;
		PathState *ps = &ps_object;
#      endif
		*ps = *state;
#    endif
		sort_intersections(hits, num_hits);
		for(int hit = 0; hit < num_hits; hit++, isect++) {
			/* Adjust intersection distance for moving ray forward. */
			float new_t = isect->t;
			isect->t -= last_t;
			/* Skip hit if we did not move forward, step by step raytracing
			 * would have skipped it as well then.
			 */
			if(last_t == new_t) {
				continue;
			}
			last_t = new_t;
			/* Attenuate the throughput. */
			if(shadow_handle_transparent_isect(kg,
			                                   shadow_sd,
			                                   state,
#ifdef __VOLUME__
			                                   ps,
#endif
			                                   isect,
			                                   ray,
			                                   &throughput))
			{
				return true;
			}
			/* Move ray forward. */
			ray->P = shadow_sd->P;
			if(ray->t != FLT_MAX) {
				ray->D = normalize_len(Pend - ray->P, &ray->t);
			}
			bounce++;
		}
#    ifdef __VOLUME__
		/* Attenuation for last line segment towards light. */
		if(ps->volume_stack[0].shader != SHADER_NONE) {
			kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
		}
#    endif
		*shadow = throughput;
		return is_zero(throughput);
	}
#    ifdef __VOLUME__
	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
		/* Apply attenuation from current volume shader/ */
		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
	}
#    endif
	return blocked;
}

/* Here we do all device specific trickery before invoking actual traversal
 * loop to help readability of the actual logic.
 */
ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
                                               ShaderData *shadow_sd,
                                               ccl_addr_space PathState *state,
                                               const int skip_object,
                                               Ray *ray,
                                               uint max_hits,
                                               float3 *shadow)
{
#    ifdef __SPLIT_KERNEL__
	Intersection hits_[SHADOW_STACK_MAX_HITS];
	Intersection *hits = &hits_[0];
#    elif defined(__KERNEL_CUDA__)
	Intersection *hits = kg->hits_stack;
#    else
	Intersection hits_stack[SHADOW_STACK_MAX_HITS];
	Intersection *hits = hits_stack;
#    endif
#    ifndef __KERNEL_GPU__
	/* Prefer to use stack but use dynamic allocation if too deep max hits
	 * we need max_hits + 1 storage space due to the logic in
	 * scene_intersect_shadow_all which will first store and then check if
	 * the limit is exceeded.
	 *
	 * Ignore this on GPU because of slow/unavailable malloc().
	 */
	if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
		if(kg->transparent_shadow_intersections == NULL) {
			const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
			kg->transparent_shadow_intersections =
				(Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
		}
		hits = kg->transparent_shadow_intersections;
	}
#    endif  /* __KERNEL_GPU__ */
	/* Invoke actual traversal. */
	return shadow_blocked_transparent_all_loop(kg,
	                                           shadow_sd,
	                                           state,
	                                           skip_object,
	                                           ray,
	                                           hits,
	                                           max_hits,
	                                           shadow);
}
#  endif  /* __SHADOW_RECORD_ALL__ */

#  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
/* Shadow function to compute how much light is blocked,
 *
 * Here we raytrace from one transparent surface to the next step by step.
 * To minimize overhead in cases where we don't need transparent shadows, we
 * first trace a regular shadow ray. We check if the hit primitive was
 * potentially transparent, and only in that case start marching. this gives
 * one extra ray cast for the cases were we do want transparency.
 */

/* This function is only implementing device-independent traversal logic
 * which requires some precalculation done.
 */
ccl_device bool shadow_blocked_transparent_stepped_loop(
        KernelGlobals *kg,
        ShaderData *shadow_sd,
        ccl_addr_space PathState *state,
        const int skip_object,
        Ray *ray,
        Intersection *isect,
        const bool blocked,
        const bool is_transparent_isect,
        float3 *shadow)
{
	if((blocked && is_transparent_isect) || skip_object != OBJECT_NONE) {
		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
		float3 Pend = ray->P + ray->D*ray->t;
		int bounce = state->transparent_bounce;
#    ifdef __VOLUME__
#      ifdef __SPLIT_KERNEL__
		ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
#      else
		PathState ps_object;
		PathState *ps = &ps_object;
#      endif
		*ps = *state;
#    endif
		for(;;) {
			if(bounce >= kernel_data.integrator.transparent_max_bounce) {
				return true;
			}
			if(!scene_intersect(kg,
			                    *ray,
			                    PATH_RAY_SHADOW_TRANSPARENT,
			                    isect,
			                    NULL,
			                    0.0f, 0.0f))
			{
				break;
			}
#ifdef __SHADOW_TRICKS__
			if(skip_object != OBJECT_NONE) {
				const int isect_object = (isect->object == PRIM_NONE)
				        ? kernel_tex_fetch(__prim_object, isect->prim)
				        : isect->object;
				if(isect_object == skip_object) {
					shader_setup_from_ray(kg, shadow_sd, isect, ray);
					/* Move ray forward. */
					ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
					if(ray->t != FLT_MAX) {
						ray->D = normalize_len(Pend - ray->P, &ray->t);
					}
					bounce++;
					continue;
				}
			}
#endif
			if(!shader_transparent_shadow(kg, isect)) {
				return true;
			}
			/* Attenuate the throughput. */
			if(shadow_handle_transparent_isect(kg,
			                                   shadow_sd,
			                                   state,
#ifdef __VOLUME__
			                                   ps,
#endif
			                                   isect,
			                                   ray,
			                                   &throughput))
			{
				return true;
			}
			/* Move ray forward. */
			ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
			if(ray->t != FLT_MAX) {
				ray->D = normalize_len(Pend - ray->P, &ray->t);
			}
			bounce++;
		}
#    ifdef __VOLUME__
		/* Attenuation for last line segment towards light. */
		if(ps->volume_stack[0].shader != SHADER_NONE) {
			kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
		}
#    endif
		*shadow *= throughput;
		return is_zero(throughput);
	}
#    ifdef __VOLUME__
	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
		/* Apply attenuation from current volume shader. */
		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
	}
#    endif
	return blocked;
}

ccl_device bool shadow_blocked_transparent_stepped(
        KernelGlobals *kg,
        ShaderData *shadow_sd,
        ccl_addr_space PathState *state,
        const int skip_object,
        Ray *ray,
        Intersection *isect,
        float3 *shadow)
{
	bool blocked, is_transparent_isect;
	if (skip_object == OBJECT_NONE) {
		blocked = scene_intersect(kg,
		                          *ray,
		                          PATH_RAY_SHADOW_OPAQUE,
		                          isect,
		                          NULL,
		                          0.0f, 0.0f);
		is_transparent_isect = blocked
			        ? shader_transparent_shadow(kg, isect)
			        : false;
	}
	else {
		blocked = false;
		is_transparent_isect = false;
	}
	return shadow_blocked_transparent_stepped_loop(kg,
	                                               shadow_sd,
	                                               state,
	                                               skip_object,
	                                               ray,
	                                               isect,
	                                               blocked,
	                                               is_transparent_isect,
	                                               shadow);
}

#  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
#endif /* __TRANSPARENT_SHADOWS__ */

ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                      ShaderData *shadow_sd,
                                      ccl_addr_space PathState *state,
                                      Ray *ray_input,
                                      float3 *shadow)
{
	Ray *ray = ray_input;
	Intersection isect;
	/* Some common early checks. */
	*shadow = make_float3(1.0f, 1.0f, 1.0f);
	if(ray->t == 0.0f) {
		return false;
	}
#ifdef __SHADOW_TRICKS__
    const int skip_object = state->catcher_object;
#else
    const int skip_object = OBJECT_NONE;
#endif
	/* Do actual shadow shading. */
	/* First of all, we check if integrator requires transparent shadows.
	 * if not, we use simplest and fastest ever way to calculate occlusion.
	 *
	 * NOTE: We can't do quick opaque test here if we are on shadow-catcher
	 * path because we don't want catcher object to be casting shadow here.
	 */
#ifdef __TRANSPARENT_SHADOWS__
	if(!kernel_data.integrator.transparent_shadows &&
	   skip_object == OBJECT_NONE)
#endif
	{
		return shadow_blocked_opaque(kg,
		                             shadow_sd,
		                             state,
		                             ray,
		                             &isect,
		                             shadow);
	}
#ifdef __TRANSPARENT_SHADOWS__
#  ifdef __SHADOW_RECORD_ALL__
	/* For the transparent shadows we try to use record-all logic on the
	 * devices which supports this.
	 */
	const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
	/* Check transparent bounces here, for volume scatter which can do
	 * lighting before surface path termination is checked.
	 */
	if(state->transparent_bounce >= transparent_max_bounce) {
		return true;
	}
	const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
#    ifdef __KERNEL_GPU__
	/* On GPU we do trickey with tracing opaque ray first, this avoids speed
	 * regressions in some files.
	 *
	 * TODO(sergey): Check why using record-all behavior causes slowdown in such
	 * cases. Could that be caused by a higher spill pressure?
	 */
	const bool blocked = scene_intersect(kg,
	                                     *ray,
	                                     PATH_RAY_SHADOW_OPAQUE,
	                                     &isect,
	                                     NULL,
	                                     0.0f, 0.0f);
	const bool is_transparent_isect = blocked
	        ? shader_transparent_shadow(kg, &isect)
	        : false;
	if(!blocked || !is_transparent_isect ||
	   max_hits + 1 >= SHADOW_STACK_MAX_HITS)
	{
		return shadow_blocked_transparent_stepped_loop(kg,
		                                               shadow_sd,
		                                               state,
		                                               skip_object,
		                                               ray,
		                                               &isect,
		                                               blocked,
		                                               is_transparent_isect,
		                                               shadow);
	}
#    endif  /* __KERNEL_GPU__ */
	return shadow_blocked_transparent_all(kg,
	                                      shadow_sd,
	                                      state,
	                                      skip_object,
	                                      ray,
	                                      max_hits,
	                                      shadow);
#  else  /* __SHADOW_RECORD_ALL__ */
	/* Fallback to a slowest version which works on all devices. */
	return shadow_blocked_transparent_stepped(kg,
	                                          shadow_sd,
	                                          state,
	                                          skip_object,
	                                          ray,
	                                          &isect,
	                                          shadow);
#  endif  /* __SHADOW_RECORD_ALL__ */
#endif  /* __TRANSPARENT_SHADOWS__ */
}

#undef SHADOW_STACK_MAX_HITS

CCL_NAMESPACE_END