Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/bvh/bvh_nodes.h')
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h899
1 files changed, 413 insertions, 486 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 060b3934a41..042630121c8 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -20,12 +20,12 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
int node_addr,
int child)
{
- Transform space;
- const int child_addr = node_addr + child * 3;
- space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
- space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
- space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
- return space;
+ Transform space;
+ const int child_addr = node_addr + child * 3;
+ space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1);
+ space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2);
+ space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3);
+ return space;
}
#if !defined(__KERNEL_SSE2__)
@@ -38,42 +38,41 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
- float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
- float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
- return ((c0max >= c0min)? 1: 0) |
- ((c1max >= c1min)? 2: 0);
-#endif
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+ float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+ float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+# else
+ return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+# endif
}
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
@@ -87,118 +86,115 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
float dist[2])
{
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
- float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
- float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
- if(difl != 0.0f) {
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
- return ((c0max >= c0min)? 1: 0) |
- ((c1max >= c1min)? 2: 0);
-#endif
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+ float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+ float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+ if (difl != 0.0f) {
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
+ c0min = max(ldiff * c0min, c0min - extmax);
+ c0max = min(hdiff * c0max, c0max + extmax);
+ }
+ if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
+ c1min = max(ldiff * c1min, c1min - extmax);
+ c1max = min(hdiff * c1max, c1max + extmax);
+ }
+ }
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+# else
+ return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+# endif
}
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
- KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- int node_addr,
- int child,
- float dist[2])
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float t,
+ int node_addr,
+ int child,
+ float dist[2])
{
- Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 lower_xyz = aligned_P * nrdir;
- float3 upper_xyz = lower_xyz - nrdir;
- const float near_x = min(lower_xyz.x, upper_xyz.x);
- const float near_y = min(lower_xyz.y, upper_xyz.y);
- const float near_z = min(lower_xyz.z, upper_xyz.z);
- const float far_x = max(lower_xyz.x, upper_xyz.x);
- const float far_y = max(lower_xyz.y, upper_xyz.y);
- const float far_z = max(lower_xyz.z, upper_xyz.z);
- const float tnear = max4(0.0f, near_x, near_y, near_z);
- const float tfar = min4(t, far_x, far_y, far_z);
- *dist = tnear;
- return tnear <= tfar;
+ Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+ float3 aligned_dir = transform_direction(&space, dir);
+ float3 aligned_P = transform_point(&space, P);
+ float3 nrdir = -bvh_inverse_direction(aligned_dir);
+ float3 lower_xyz = aligned_P * nrdir;
+ float3 upper_xyz = lower_xyz - nrdir;
+ const float near_x = min(lower_xyz.x, upper_xyz.x);
+ const float near_y = min(lower_xyz.y, upper_xyz.y);
+ const float near_z = min(lower_xyz.z, upper_xyz.z);
+ const float far_x = max(lower_xyz.x, upper_xyz.x);
+ const float far_y = max(lower_xyz.y, upper_xyz.y);
+ const float far_z = max(lower_xyz.z, upper_xyz.z);
+ const float tnear = max4(0.0f, near_x, near_y, near_z);
+ const float tfar = min4(t, far_x, far_y, far_z);
+ *dist = tnear;
+ return tnear <= tfar;
}
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
- KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- const float difl,
- int node_addr,
- int child,
- float dist[2])
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float t,
+ const float difl,
+ int node_addr,
+ int child,
+ float dist[2])
{
- Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 tLowerXYZ = aligned_P * nrdir;
- float3 tUpperXYZ = tLowerXYZ - nrdir;
- const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
- const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
- const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
- const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
- const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
- const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
- const float tnear = max4(0.0f, near_x, near_y, near_z);
- const float tfar = min4(t, far_x, far_y, far_z);
- *dist = tnear;
- if(difl != 0.0f) {
- /* TODO(sergey): Same as for QBVH, needs a proper use. */
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- return round_down*tnear <= round_up*tfar;
- }
- else {
- return tnear <= tfar;
- }
+ Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+ float3 aligned_dir = transform_direction(&space, dir);
+ float3 aligned_P = transform_point(&space, P);
+ float3 nrdir = -bvh_inverse_direction(aligned_dir);
+ float3 tLowerXYZ = aligned_P * nrdir;
+ float3 tUpperXYZ = tLowerXYZ - nrdir;
+ const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
+ const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
+ const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
+ const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
+ const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
+ const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
+ const float tnear = max4(0.0f, near_x, near_y, near_z);
+ const float tfar = min4(t, far_x, far_y, far_z);
+ *dist = tnear;
+ if (difl != 0.0f) {
+ /* TODO(sergey): Same as for QBVH, needs a proper use. */
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ return round_down * tnear <= round_up * tfar;
+ }
+ else {
+ return tnear <= tfar;
+ }
}
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
@@ -210,25 +206,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.x) & visibility))
-#endif
- {
- mask |= 1;
- }
- }
- if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.y) & visibility))
-#endif
- {
- mask |= 2;
- }
- }
- return mask;
+ int mask = 0;
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.x) & visibility))
+# endif
+ {
+ mask |= 1;
+ }
+ }
+ if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.y) & visibility))
+# endif
+ {
+ mask |= 2;
+ }
+ }
+ return mask;
}
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
@@ -242,25 +238,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
const uint visibility,
float dist[2])
{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.x) & visibility))
-#endif
- {
- mask |= 1;
- }
- }
- if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.y) & visibility))
-#endif
- {
- mask |= 2;
- }
- }
- return mask;
+ int mask = 0;
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.x) & visibility))
+# endif
+ {
+ mask |= 1;
+ }
+ }
+ if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.y) & visibility))
+# endif
+ {
+ mask |= 2;
+ }
+ }
+ return mask;
}
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
@@ -272,26 +268,13 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg,
- P,
- dir,
- idir,
- t,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect(kg,
- P,
- idir,
- t,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
+ }
}
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
@@ -305,279 +288,244 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(kg,
- P,
- dir,
- idir,
- t,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- idir,
- t,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect_robust(
+ kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect_robust(
+ kg, P, idir, t, difl, extmax, node_addr, visibility, dist);
+ }
}
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(
- KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
+#else /* !defined(__KERNEL_SSE2__) */
+
+int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
+ dist[0] = tminmax[0];
+ dist[1] = tminmax[1];
- int mask = movemask(lrhit);
+ int mask = movemask(lrhit);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
-ccl_device_forceinline int bvh_aligned_node_intersect_robust(
- KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
+ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const float difl,
+ const float extmax,
+ const int nodeAddr,
+ const uint visibility,
+ float dist[2])
{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
- if(difl != 0.0f) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 *tminmaxview = (float4*)&tminmax;
- float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
- float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr;
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+
+ if (difl != 0.0f) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
+ float4 *tminmaxview = (float4 *)&tminmax;
+ float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
+ float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
+ c0min = max(ldiff * c0min, c0min - extmax);
+ c0max = min(hdiff * c0max, c0max + extmax);
+ }
+ if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
+ c1min = max(ldiff * c1min, c1min - extmax);
+ c1max = min(hdiff * c1max, c1max + extmax);
+ }
+ }
+
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ dist[0] = tminmax[0];
+ dist[1] = tminmax[1];
+
+ int mask = movemask(lrhit);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
- const ssef& isect_near,
- const ssef& isect_far,
+ const ssef &isect_near,
+ const ssef &isect_far,
const int node_addr,
const uint visibility,
float dist[2])
{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P),
- aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
- aligned_P1.x * nrdir1.x,
- 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y,
- aligned_P1.y * nrdir1.y,
- 0.0f,
- 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z,
- aligned_P1.z * nrdir1.z,
- 0.0f,
- 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask = tnear <= tfar;
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
+ Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+ Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+ float3 aligned_dir0 = transform_direction(&space0, dir),
+ aligned_dir1 = transform_direction(&space1, dir);
+ float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
+ float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+ nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+ ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
+ lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
+ lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
+
+ ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+ upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+ upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+ ssef tnear_x = min(lower_x, upper_x);
+ ssef tnear_y = min(lower_y, upper_y);
+ ssef tnear_z = min(lower_z, upper_z);
+ ssef tfar_x = max(lower_x, upper_x);
+ ssef tfar_y = max(lower_y, upper_y);
+ ssef tfar_z = max(lower_z, upper_z);
+
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ sseb vmask = tnear <= tfar;
+ dist[0] = tnear.f[0];
+ dist[1] = tnear.f[1];
+
+ int mask = (int)movemask(vmask);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
- const ssef& isect_near,
- const ssef& isect_far,
+ const ssef &isect_near,
+ const ssef &isect_far,
const float difl,
const int node_addr,
const uint visibility,
float dist[2])
{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P),
- aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
- aligned_P1.x * nrdir1.x,
- 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y,
- aligned_P1.y * nrdir1.y,
- 0.0f,
- 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z,
- aligned_P1.z * nrdir1.z,
- 0.0f,
- 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask;
- if(difl != 0.0f) {
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- vmask = round_down*tnear <= round_up*tfar;
- }
- else {
- vmask = tnear <= tfar;
- }
-
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
+ Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+ Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+ float3 aligned_dir0 = transform_direction(&space0, dir),
+ aligned_dir1 = transform_direction(&space1, dir);
+ float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
+ float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+ nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+ ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
+ lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
+ lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
+
+ ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+ upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+ upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+ ssef tnear_x = min(lower_x, upper_x);
+ ssef tnear_y = min(lower_y, upper_y);
+ ssef tnear_z = min(lower_z, upper_z);
+ ssef tfar_x = max(lower_x, upper_x);
+ ssef tfar_y = max(lower_y, upper_y);
+ ssef tfar_z = max(lower_z, upper_z);
+
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ sseb vmask;
+ if (difl != 0.0f) {
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ vmask = round_down * tnear <= round_up * tfar;
+ }
+ else {
+ vmask = tnear <= tfar;
+ }
+
+ dist[0] = tnear.f[0];
+ dist[1] = tnear.f[1];
+
+ int mask = (int)movemask(vmask);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& isect_near,
- const ssef& isect_far,
- const ssef& tsplat,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &isect_near,
+ const ssef &isect_far,
+ const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -585,37 +533,23 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg,
- P,
- dir,
- isect_near,
- isect_far,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect(
+ kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect(
+ kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
+ }
}
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& isect_near,
- const ssef& isect_far,
- const ssef& tsplat,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &isect_near,
+ const ssef &isect_far,
+ const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -625,31 +559,24 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(kg,
- P,
- dir,
- isect_near,
- isect_far,
- difl,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect_robust(
+ kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect_robust(kg,
+ P,
+ dir,
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
}
-#endif /* !defined(__KERNEL_SSE2__) */
+#endif /* !defined(__KERNEL_SSE2__) */