diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2014-12-16 18:39:31 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2014-12-25 00:50:49 +0300 |
commit | 345ed4dd105aca5dbe6fbc3936ef2af83c16544b (patch) | |
tree | 9232db8e0c09ba8965d9a6f732199b04285216ad /intern/cycles/kernel | |
parent | f4df3ec05a61dcebdcc0faa97ead8f7a1d8af71c (diff) |
Cycles: Don't do node visibility check in subsurface and volume traversal
Visibility flags are set to all visibility anyway, So there was no reason
to perform that test.
TODO: We need to investigate if having primitive intersection functions
which doesn't do visibility check gives any speedup here as well.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_subsurface.h | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_volume.h | 12 |
2 files changed, 0 insertions, 25 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index a8f57cffa78..5f1bd41d789 100644 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -54,7 +54,6 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio int object = OBJECT_NONE; float isect_t = ray->t; - const uint visibility = PATH_RAY_ALL_VISIBILITY; uint num_hits = 0; #if FEATURE(BVH_MOTION) @@ -118,14 +117,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); /* decide which nodes to traverse next */ -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility); -#else traverseChild0 = (c0max >= c0min); traverseChild1 = (c1max >= c1min); -#endif #else // __KERNEL_SSE2__ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ @@ -145,14 +138,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); /* decide which nodes to traverse next */ -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility); -#else traverseChild0 = (movemask(lrhit) & 1); traverseChild1 = (movemask(lrhit) & 2); -#endif #endif // __KERNEL_SSE2__ nodeAddr = __float_as_int(cnodes.x); diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h index 16c16beee39..d9425caf7ee 100644 --- a/intern/cycles/kernel/geom/geom_bvh_volume.h +++ b/intern/cycles/kernel/geom/geom_bvh_volume.h @@ -121,14 +121,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); /* decide which nodes to traverse next */ -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility); -#else traverseChild0 = (c0max >= c0min); traverseChild1 = (c1max >= c1min); -#endif #else // __KERNEL_SSE2__ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ @@ -149,14 +143,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); /* decide which nodes to traverse next */ -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility); -#else traverseChild0 = (movemask(lrhit) & 1); traverseChild1 = (movemask(lrhit) & 2); -#endif #endif // __KERNEL_SSE2__ nodeAddr = __float_as_int(cnodes.x); |