diff options
Diffstat (limited to 'intern/cycles/kernel/bvh/bvh_nodes.h')
-rw-r--r-- | intern/cycles/kernel/bvh/bvh_nodes.h | 145 |
1 files changed, 11 insertions, 134 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h index db598d1c7fa..5367bdb633c 100644 --- a/intern/cycles/kernel/bvh/bvh_nodes.h +++ b/intern/cycles/kernel/bvh/bvh_nodes.h @@ -28,7 +28,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k return space; } -#if !defined(__KERNEL_SSE2__) ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg, const float3 P, const float3 idir, @@ -39,9 +38,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg, { /* fetch node data */ -# ifdef __VISIBILITY_FLAG__ +#ifdef __VISIBILITY_FLAG__ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); -# endif +#endif float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); @@ -68,13 +67,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg, dist[0] = c0min; dist[1] = c1min; -# ifdef __VISIBILITY_FLAG__ +#ifdef __VISIBILITY_FLAG__ /* this visibility test gives a 5% performance hit, how to solve? */ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); -# else +#else return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0); -# endif +#endif } ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg, @@ -113,21 +112,21 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, float dist[2]) { int mask = 0; -# ifdef __VISIBILITY_FLAG__ +#ifdef __VISIBILITY_FLAG__ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); -# endif +#endif if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { -# ifdef __VISIBILITY_FLAG__ +#ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.x) & visibility)) -# endif +#endif { mask |= 1; } } if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) { -# ifdef __VISIBILITY_FLAG__ +#ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.y) & visibility)) -# endif +#endif { mask |= 2; } @@ -152,125 +151,3 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist); } } - -#else /* !defined(__KERNEL_SSE2__) */ - -int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg, - const float3 &P, - const float3 &dir, - const ssef &tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const int node_addr, - const uint visibility, - float dist[2]) -{ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - - /* fetch node data */ - const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - dist[0] = tminmax[0]; - dist[1] = tminmax[1]; - - int mask = movemask(lrhit); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); - return cmask; -# else - return mask & 3; -# endif -} - -ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, - const float3 P, - const float3 dir, - const ssef &isect_near, - const ssef &isect_far, - const int node_addr, - const uint visibility, - float dist[2]) -{ - Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); - Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); - - float3 aligned_dir0 = transform_direction(&space0, dir), - aligned_dir1 = transform_direction(&space1, dir); - float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P); - float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), - nrdir1 = -bvh_inverse_direction(aligned_dir1); - - ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f), - lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f), - lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f); - - ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), - upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), - upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); - - ssef tnear_x = min(lower_x, upper_x); - ssef tnear_y = min(lower_y, upper_y); - ssef tnear_z = min(lower_z, upper_z); - ssef tfar_x = max(lower_x, upper_x); - ssef tfar_y = max(lower_y, upper_y); - ssef tfar_z = max(lower_z, upper_z); - - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - sseb vmask = tnear <= tfar; - dist[0] = tnear.f[0]; - dist[1] = tnear.f[1]; - - int mask = (int)movemask(vmask); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); - return cmask; -# else - return mask & 3; -# endif -} - -ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, - const float3 &P, - const float3 &dir, - const ssef &isect_near, - const ssef &isect_far, - const ssef &tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const int node_addr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect( - kg, P, dir, isect_near, isect_far, node_addr, visibility, dist); - } - else { - return bvh_aligned_node_intersect( - kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist); - } -} -#endif /* !defined(__KERNEL_SSE2__) */ |