Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/kernel/bvh/bvh.h22
-rw-r--r--intern/cycles/kernel/kernel_shadow.h98
2 files changed, 70 insertions, 50 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 36798982653..2667f236064 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -357,7 +357,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
#endif
}
-#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
+#if defined(__SHADOW_RECORD_ALL__) || defined(__VOLUME_RECORD_ALL__)
/* ToDo: Move to another file? */
ccl_device int intersections_compare(const void *a, const void *b)
{
@@ -371,7 +371,25 @@ ccl_device int intersections_compare(const void *a, const void *b)
else
return 0;
}
+
+ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
+{
+#ifdef __KERNEL_GPU__
+ /* Use bubble sort which has more friendly memory pattern on GPU. */
+ int i, j;
+ for(i = 0; i < num_hits; ++i) {
+ for(j = 0; j < num_hits - 1; ++j) {
+ if(hits[j].t < hits[j + 1].t) {
+ Intersection tmp = hits[j];
+ hits[j] = hits[j + 1];
+ hits[j + 1] = tmp;
+ }
+ }
+ }
+#else
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
#endif
+}
+#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index b74902f8dd7..05a6c7d1827 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -34,10 +34,8 @@ ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg,
kernel_volume_shadow(kg, shadow_sd, state, &segment_ray, throughput);
}
#endif
-
/* Setup shader data at surface. */
shader_setup_from_ray(kg, shadow_sd, isect, ray);
-
/* Attenuation from transparent surface. */
if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
@@ -51,42 +49,19 @@ ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg,
path_state_modify_bounce(state, false);
*throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
-
/* Stop if all light is blocked. */
if(is_zero(*throughput)) {
return true;
}
-
#ifdef __VOLUME__
/* Exit/enter volume. */
kernel_volume_stack_enter_exit(kg, shadow_sd, state->volume_stack);
#endif
-
return false;
}
#ifdef __SHADOW_RECORD_ALL__
-
-ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
-{
-#ifdef __KERNEL_GPU__
- /* Use bubble sort which has more friendly memory pattern on GPU. */
- int i, j;
- for(i = 0; i < num_hits; ++i) {
- for(j = 0; j < num_hits - 1; ++j) {
- if(hits[j].t < hits[j + 1].t) {
- Intersection tmp = hits[j];
- hits[j] = hits[j + 1];
- hits[j + 1] = tmp;
- }
- }
- }
-#else
- qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-#endif
-}
-
-/* Shadow function to compute how much light is blocked, CPU variation.
+/* Shadow function to compute how much light is blocked,
*
* We trace a single ray. If it hits any opaque surface, or more than a given
* number of transparent surfaces is hit, then we consider the geometry to be
@@ -104,12 +79,20 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
* or there is a performance increase anyway due to avoiding the need to send
* two rays with transparent shadows.
*
- * This is CPU only because of qsort, and malloc or high stack space usage to
- * record all these intersections. */
+ * On CPU it'll handle all transparent bounces (by allocating storage for
+ * intersections when they don't fit into the stack storage).
+ *
+ * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
+ * is something to be kept an eye on.
+ */
-#define STACK_MAX_HITS 64
+#define SHADOW_STACK_MAX_HITS 64
-ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
+ccl_device_inline bool shadow_blocked_all(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ PathState *state,
+ Ray *ray,
+ float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
if(ray->t == 0.0f) {
@@ -126,7 +109,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd,
/* Intersect to find an opaque surface, or record all transparent
* surface hits.
*/
- Intersection hits_stack[STACK_MAX_HITS];
+ Intersection hits_stack[SHADOW_STACK_MAX_HITS];
Intersection *hits = hits_stack;
const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
@@ -138,7 +121,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd,
*
* Ignore this on GPU because of slow/unavailable malloc().
*/
- if(max_hits + 1 > STACK_MAX_HITS) {
+ if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
if(kg->transparent_shadow_intersections == NULL) {
kg->transparent_shadow_intersections =
(Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
@@ -211,30 +194,27 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd,
#endif
return blocked;
}
+#endif /* __SHADOW_RECORD_ALL__ */
-#undef STACK_MAX_HITS
-
-#else
-
-/* Shadow function to compute how much light is blocked, GPU variation.
+#ifndef __KERNEL_CPU__
+/* Shadow function to compute how much light is blocked,
*
* Here we raytrace from one transparent surface to the next step by step.
* To minimize overhead in cases where we don't need transparent shadows, we
* first trace a regular shadow ray. We check if the hit primitive was
* potentially transparent, and only in that case start marching. this gives
- * one extra ray cast for the cases were we do want transparency. */
-
-ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray_input,
- float3 *shadow)
+ * one extra ray cast for the cases were we do want transparency.
+ */
+ccl_device_noinline bool shadow_blocked_stepped(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray_input,
+ float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
-
- if(ray_input->t == 0.0f)
+ if(ray_input->t == 0.0f) {
return false;
-
+ }
#ifdef __SPLIT_KERNEL__
Ray private_ray = *ray_input;
Ray *ray = &private_ray;
@@ -313,10 +293,32 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
}
# endif
#endif
-
return blocked;
}
+#endif /* __KERNEL_CPU__ */
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ PathState *state,
+ Ray *ray,
+ float3 *shadow)
+{
+#ifdef __SHADOW_RECORD_ALL__
+# ifdef __KERNEL_CPU__
+ return shadow_blocked_all(kg, shadow_sd, state, ray, shadow);
+# else
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
+ if(max_hits + 1 < SHADOW_STACK_MAX_HITS) {
+ return shadow_blocked_all(kg, shadow_sd, state, ray, shadow);
+ }
+# endif
+#endif
+#ifndef __KERNEL_CPU__
+ return shadow_blocked_stepped(kg, shadow_sd, state, ray, shadow);
#endif
+}
+
+#undef SHADOW_STACK_MAX_HITS
CCL_NAMESPACE_END