Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-06-13 23:13:18 +0400
committerThomas Dinges <blender@dingto.org>2014-06-13 23:59:12 +0400
commitcd5e1ff74e4f6443f3e4b836dd23fe46b56cb7ed (patch)
tree578ee132eab87d348147e49c91e1929660558c20 /intern/cycles/kernel/geom/geom_bvh_subsurface.h
parentd0573ce9054e325c0ad2fbb943087e0f8b9e159a (diff)
Cycles Refactor: Add SSE Utility code from Embree for cleaner SSE code.
This makes the code a bit easier to understand, and might come in handy if we want to reuse more Embree code. Differential Revision: https://developer.blender.org/D482 Code by Brecht, with fixes by Lockal, Sergey and myself.
Diffstat (limited to 'intern/cycles/kernel/geom/geom_bvh_subsurface.h')
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h54
1 files changed, 27 insertions, 27 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index a19f05dd371..a8f57cffa78 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -65,15 +65,15 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const __m128 pn = _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0, 0));
- __m128 Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
shuffle_swap_t shufflexyz[3];
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -131,25 +131,27 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
/* fetch node data */
- const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
const float4 cnodes = ((float4*)bvh_nodes)[3];
/* intersect ray against child nodes */
- const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]);
- const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shufflexyz[1]), Psplat[1]), idirsplat[1]);
- const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflexyz[2]), Psplat[2]), idirsplat[2]);
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
- const __m128 tminmax = _mm_xor_ps(_mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat)), pn);
- const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax));
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+ traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
#else
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
#endif
#endif // __KERNEL_SSE2__
@@ -161,9 +163,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#if !defined(__KERNEL_SSE2__)
bool closestChild1 = (c1min < c0min);
#else
- union { __m128 m128; float v[4]; } uminmax;
- uminmax.m128 = tminmax;
- bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+ bool closestChild1 = tminmax[1] < tminmax[0];
#endif
if(closestChild1) {
@@ -243,11 +243,11 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#endif
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -279,11 +279,11 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#endif
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif