diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/kernel/bvh/qbvh_nodes.h | 30 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/qbvh_traversal.h | 65 |
2 files changed, 39 insertions, 56 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h index 2ee2a393e80..6d22f0b0d6a 100644 --- a/intern/cycles/kernel/bvh/qbvh_nodes.h +++ b/intern/cycles/kernel/bvh/qbvh_nodes.h @@ -21,6 +21,36 @@ struct QBVHStackItem { float dist; }; +ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir, + int *ccl_restrict near_x, + int *ccl_restrict near_y, + int *ccl_restrict near_z, + int *ccl_restrict far_x, + int *ccl_restrict far_y, + int *ccl_restrict far_z) + +{ +#ifdef __KERNEL_SSE__ + *near_x = 0; *far_x = 1; + *near_y = 2; *far_y = 3; + *near_z = 4; *far_z = 5; + + const size_t mask = movemask(ssef(idir.m128)); + + const int mask_x = mask & 1; + const int mask_y = (mask & 2) >> 1; + const int mask_z = (mask & 4) >> 2; + + *near_x += mask_x; *far_x -= mask_x; + *near_y += mask_y; *far_y -= mask_y; + *near_z += mask_z; *far_z -= mask_z; +#else + if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; } + if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; } + if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; } +#endif +} + /* TOOD(sergey): Investigate if using intrinsics helps for both * stack item swap and float comparison. */ diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h index b9da539306b..f2d8e558dcc 100644 --- a/intern/cycles/kernel/bvh/qbvh_traversal.h +++ b/intern/cycles/kernel/bvh/qbvh_traversal.h @@ -100,27 +100,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, #endif /* Offsets to select the side that becomes the lower or upper bound. */ -#ifdef __KERNEL_SSE__ - int near_x = 0, near_y = 2, near_z = 4; - int far_x = 1, far_y = 3, far_z = 5; - - const size_t mask = movemask(ssef(idir.m128)); - - const int mask_x = mask & 1; - const int mask_y = (mask & 2) >> 1; - const int mask_z = (mask & 4) >> 2; - - near_x += mask_x; far_x -= mask_x; - near_y += mask_y; far_y -= mask_y; - near_z += mask_z; far_z -= mask_z; -#else int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } -#endif + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -442,24 +426,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist); # endif -#ifdef __KERNEL_SSE__ - near_x = 0; near_y = 2; near_z = 4; - far_x = 1; far_y = 3; far_z = 5; - - const size_t mask = movemask(ssef(idir.m128)); - - const int mask_x = mask & 1; - const int mask_y = (mask & 2) >> 1; - const int mask_z = (mask & 4) >> 2; - - near_x += mask_x; far_x -= mask_x; - near_y += mask_y; far_y -= mask_y; - near_z += mask_z; far_z -= mask_z; -#else - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } -#endif + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); @@ -499,25 +468,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); # endif -#ifdef __KERNEL_SSE__ - near_x = 0; near_y = 2; near_z = 4; - far_x = 1; far_y = 3; far_z = 5; - - const size_t mask = movemask(ssef(idir.m128)); - - const int mask_x = mask & 1; - const int mask_y = (mask & 2) >> 1; - const int mask_z = (mask & 4) >> 2; - - near_x += mask_x; far_x -= mask_x; - near_y += mask_y; far_y -= mask_y; - near_z += mask_z; far_z -= mask_z; -#else - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } -#endif - + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); |