Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/bvh/bvh_nodes.h')
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h145
1 files changed, 11 insertions, 134 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index db598d1c7fa..5367bdb633c 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -28,7 +28,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
return space;
}
-#if !defined(__KERNEL_SSE2__)
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 idir,
@@ -39,9 +38,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
{
/* fetch node data */
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-# endif
+#endif
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
@@ -68,13 +67,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-# else
+#else
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-# endif
+#endif
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
@@ -113,21 +112,21 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
int mask = 0;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-# endif
+#endif
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
-# endif
+#endif
{
mask |= 1;
}
}
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
-# endif
+#endif
{
mask |= 2;
}
@@ -152,125 +151,3 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}
-
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask = tnear <= tfar;
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(
- kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
- }
- else {
- return bvh_aligned_node_intersect(
- kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
- }
-}
-#endif /* !defined(__KERNEL_SSE2__) */