Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2017-01-30 16:11:58 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2017-02-08 16:00:48 +0300
commitda31a8283228eef0e83d4aa76d71ff57527cbab6 (patch)
treebe8ef36bab70cbb546b94c757ac4015ccd5933b8 /intern/cycles/kernel/kernel_shadow.h
parent04cf1538b5900f9f7eda1c338f15a8a5fe662529 (diff)
Cycles: Solve speed regression by casting opaque ray first
Diffstat (limited to 'intern/cycles/kernel/kernel_shadow.h')
-rw-r--r--intern/cycles/kernel/kernel_shadow.h97
1 files changed, 77 insertions, 20 deletions
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 8c8758269cd..73c5dbd1d2a 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -247,22 +247,21 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
* potentially transparent, and only in that case start marching. this gives
* one extra ray cast for the cases were we do want transparency.
*/
-ccl_device bool shadow_blocked_transparent_stepped(
+
+/* This function is only implementing device-independent traversal logic
+ * which requires some precalculation done.
+ */
+ccl_device bool shadow_blocked_transparent_stepped_loop(
KernelGlobals *kg,
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
Ray *ray,
Intersection *isect,
+ const bool blocked,
+ const bool is_transparent_isect,
float3 *shadow)
{
- /* Early check for opaque shadows. */
- const bool blocked = scene_intersect(kg,
- *ray,
- PATH_RAY_SHADOW_OPAQUE,
- isect,
- NULL,
- 0.0f, 0.0f);
- if(blocked && shader_transparent_shadow(kg, isect)) {
+ if(blocked && is_transparent_isect) {
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
float3 Pend = ray->P + ray->D*ray->t;
int bounce = state->transparent_bounce;
@@ -319,6 +318,34 @@ ccl_device bool shadow_blocked_transparent_stepped(
# endif
return blocked;
}
+
+ccl_device bool shadow_blocked_transparent_stepped(
+ KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ Intersection *isect,
+ float3 *shadow)
+{
+ const bool blocked = scene_intersect(kg,
+ *ray,
+ PATH_RAY_SHADOW_OPAQUE,
+ isect,
+ NULL,
+ 0.0f, 0.0f);
+ const bool is_transparent_isect = blocked
+ ? shader_transparent_shadow(kg, isect)
+ : false;
+ return shadow_blocked_transparent_stepped_loop(kg,
+ shadow_sd,
+ state,
+ ray,
+ isect,
+ blocked,
+ is_transparent_isect,
+ shadow);
+}
+
# endif /* __KERNEL_GPU__ */
#endif /* __TRANSPARENT_SHADOWS__ */
@@ -346,6 +373,9 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
return false;
}
/* Do actual shadow shading. */
+ /* First of all, we check if integrator requires transparent shadows.
+ * if not, we use simplest and fastest ever way to calculate occlusion.
+ */
#ifdef __TRANSPARENT_SHADOWS__
if(!kernel_data.integrator.transparent_shadows)
#endif
@@ -359,6 +389,9 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
}
#ifdef __TRANSPARENT_SHADOWS__
# ifdef __SHADOW_RECORD_ALL__
+ /* For the transparent shadows we try to use record-all logic on the
+ * devices which supports this.
+ */
const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
/* Check transparent bounces here, for volume scatter which can do
* lighting before surface path termination is checked.
@@ -368,25 +401,49 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
}
const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
# ifdef __KERNEL_GPU__
- if(max_hits + 1 < SHADOW_STACK_MAX_HITS)
-# endif
+ /* On GPU we do trickey with tracing opaque ray first, this avoids speed
+ * regressions in some files.
+ *
+ * TODO(sergey): Check why using record-all behavior causes slowdown in such
+ * cases. Could that be caused by a higher spill pressure?
+ */
+ const bool blocked = scene_intersect(kg,
+ *ray,
+ PATH_RAY_SHADOW_OPAQUE,
+ isect,
+ NULL,
+ 0.0f, 0.0f);
+ const bool is_transparent_isect = blocked
+ ? shader_transparent_shadow(kg, isect)
+ : false;
+ if(!blocked || !is_transparent_isect ||
+ max_hits + 1 >= SHADOW_STACK_MAX_HITS)
{
- return shadow_blocked_transparent_all(kg,
- shadow_sd,
- state,
- ray,
- max_hits,
- shadow);
+ return shadow_blocked_transparent_stepped_loop(kg,
+ shadow_sd,
+ state,
+ ray,
+ isect,
+ blocked,
+ is_transparent_isect,
+ shadow);
}
-# endif /* __SHADOW_RECORD_ALL__ */
-# ifdef __KERNEL_GPU__
+# endif /* __KERNEL_GPU__ */
+ return shadow_blocked_transparent_all(kg,
+ shadow_sd,
+ state,
+ ray,
+ max_hits,
+ shadow);
+# else /* __SHADOW_RECORD_ALL__ */
+ /* Fallback to a slowest version which works on all devices. */
return shadow_blocked_transparent_stepped(kg,
shadow_sd,
state,
ray,
isect,
shadow);
-# endif /* __KERNEL_GPU__ */
+# endif /* __SHADOW_RECORD_ALL__ */
#endif /* __TRANSPARENT_SHADOWS__ */
}