Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/bvh')
-rw-r--r--intern/cycles/kernel/bvh/bvh.h98
-rw-r--r--intern/cycles/kernel/bvh/bvh_local.h63
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h145
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h146
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h154
-rw-r--r--intern/cycles/kernel/bvh/bvh_types.h7
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h108
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h136
-rw-r--r--intern/cycles/kernel/bvh/obvh_local.h398
-rw-r--r--intern/cycles/kernel/bvh/obvh_nodes.h410
-rw-r--r--intern/cycles/kernel/bvh/obvh_shadow_all.h664
-rw-r--r--intern/cycles/kernel/bvh/obvh_traversal.h557
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume.h480
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume_all.h551
-rw-r--r--intern/cycles/kernel/bvh/qbvh_local.h291
-rw-r--r--intern/cycles/kernel/bvh/qbvh_nodes.h329
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h453
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h420
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h367
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h444
20 files changed, 87 insertions, 6134 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 9b9df883b62..80b58f46329 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -35,14 +35,6 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_OPTIX__
-/* Common QBVH functions. */
-# ifdef __QBVH__
-# include "kernel/bvh/qbvh_nodes.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_nodes.h"
-# endif
-# endif
-
/* Regular BVH traversal */
# include "kernel/bvh/bvh_nodes.h"
@@ -51,27 +43,21 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_traversal.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "kernel/bvh/bvh_traversal.h"
-# endif
-
# if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_traversal.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
# endif
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
# endif
@@ -96,15 +82,9 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_volume.h"
-# endif
-
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
# endif
# endif /* __VOLUME__ */
@@ -116,27 +96,21 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_shadow_all.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-
# if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# endif /* __SHADOW_RECORD_ALL__ */
@@ -148,15 +122,9 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_volume_all.h"
-# endif
-
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
# endif
# endif /* __VOLUME_RECORD_ALL__ */
@@ -264,21 +232,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
}
# endif /* __HAIR__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_instancing(kg, ray, isect, visibility);
- }
-# endif /* __INSTANCING__ */
- return bvh_intersect(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, ray, isect, visibility);
-# else
return bvh_intersect(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-#endif /* __KERNEL_OPTIX__ */
+#endif /* __KERNEL_OPTIX__ */
}
#ifdef __BVH_LOCAL__
@@ -476,21 +431,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
}
# endif /* __HAIR__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
- }
-# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
-# else
-# ifdef __INSTANCING__
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
-# else
- return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-# endif /* __KERNEL_OPTIX__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __SHADOW_RECORD_ALL__ */
@@ -548,21 +490,8 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
}
# endif /* __OBJECT_MOTION__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
- }
-# endif /* __INSTANCING__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# else
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-# endif /* __KERNEL_OPTIX__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __VOLUME__ */
@@ -599,11 +528,6 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
}
# endif /* __OBJECT_MOTION__ */
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
- }
-# endif /* __INSTANCING__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
#endif /* __VOLUME_RECORD_ALL__ */
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 7a069ef1108..4006c9c1632 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_local.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_local.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -88,26 +81,6 @@ ccl_device_inline
object = local_object;
}
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -117,33 +90,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -247,20 +203,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index db598d1c7fa..5367bdb633c 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -28,7 +28,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
return space;
}
-#if !defined(__KERNEL_SSE2__)
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 idir,
@@ -39,9 +38,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
{
/* fetch node data */
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-# endif
+#endif
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
@@ -68,13 +67,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-# else
+#else
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-# endif
+#endif
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
@@ -113,21 +112,21 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
int mask = 0;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-# endif
+#endif
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
-# endif
+#endif
{
mask |= 1;
}
}
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
-# endif
+#endif
{
mask |= 2;
}
@@ -152,125 +151,3 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}
-
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask = tnear <= tfar;
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(
- kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
- }
- else {
- return bvh_aligned_node_intersect(
- kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
- }
-}
-#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 268bb149970..dccd257d2de 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_shadow_all.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_shadow_all.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
* BVH_MOTION: motion blur rendering
*/
@@ -76,33 +68,11 @@ ccl_device_inline
Transform ob_itfm;
#endif
-#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
-#endif
*num_hits = 0;
isect_array->t = tmax;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
/* traversal loop */
do {
do {
@@ -112,33 +82,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -174,9 +127,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
@@ -207,31 +158,13 @@ ccl_device_inline
}
#endif
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
+ case PRIMITIVE_CURVE_THICK:
+ case PRIMITIVE_MOTION_CURVE_THICK:
+ case PRIMITIVE_CURVE_RIBBON:
+ case PRIMITIVE_MOTION_CURVE_RIBBON: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
+ hit = curve_intersect(
+ kg, isect_array, P, dir, visibility, object, prim_addr, ray->time, curve_type);
break;
}
#endif
@@ -276,9 +209,7 @@ ccl_device_inline
/* move on to next entry in intersections array */
isect_array++;
(*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-#endif
isect_array->t = isect_t;
}
@@ -286,32 +217,19 @@ ccl_device_inline
prim_addr++;
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
+#else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
+#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -319,10 +237,8 @@ ccl_device_inline
node_addr = kernel_tex_fetch(__object_node, object);
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
@@ -330,11 +246,11 @@ ccl_device_inline
if (num_hits_in_instance) {
float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
+#else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
+#endif
/* scale isect->t to adjust for instancing */
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -342,33 +258,20 @@ ccl_device_inline
}
}
else {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
+#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
+#endif
}
isect_t = tmax;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
@@ -381,20 +284,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 18afc6ae4eb..8b2699ab807 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_traversal.h"
-#endif
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_traversal.h"
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
* BVH_MOTION: motion blur rendering
*/
@@ -77,26 +69,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_INIT();
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -106,37 +78,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
{
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect->t,
node_addr,
visibility,
dist);
}
-#else // __KERNEL_SSE2__
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
- }
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -173,9 +126,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -191,17 +142,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
/* shadow ray early termination */
-#if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-#else
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
-#endif
}
}
break;
@@ -214,51 +156,28 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
/* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
-# endif
}
}
break;
}
#endif /* BVH_FEATURE(BVH_MOTION) */
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
+ case PRIMITIVE_CURVE_THICK:
+ case PRIMITIVE_MOTION_CURVE_THICK:
+ case PRIMITIVE_CURVE_RIBBON:
+ case PRIMITIVE_MOTION_CURVE_RIBBON: {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- else {
- hit = curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
+ const bool hit = curve_intersect(
+ kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
if (hit) {
/* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
-# endif
}
}
break;
@@ -266,30 +185,16 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#endif /* BVH_FEATURE(BVH_HAIR) */
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -300,38 +205,22 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INSTANCE();
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
@@ -342,20 +231,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif /* __QBVH__ */
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index 84dc0dbaef5..b173568266b 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -31,13 +31,10 @@ CCL_NAMESPACE_BEGIN
/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
#define BVH_STACK_SIZE 192
-#define BVH_QSTACK_SIZE 384
-#define BVH_OSTACK_SIZE 768
/* BVH intersection function variations */
-#define BVH_INSTANCING 1
-#define BVH_MOTION 2
-#define BVH_HAIR 4
+#define BVH_MOTION 1
+#define BVH_HAIR 2
#define BVH_NAME_JOIN(x, y) x##_##y
#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index c83b0d783f4..1f2ea47269b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_volume.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*/
@@ -79,26 +71,6 @@ ccl_device_inline
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -108,33 +80,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect->t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -170,9 +125,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -222,31 +175,17 @@ ccl_device_inline
}
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -262,38 +201,22 @@ ccl_device_inline
}
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_MOTION) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
@@ -304,20 +227,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index ae8c4d12e8a..a8664cc4331 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_volume_all.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume_all.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*/
@@ -76,33 +68,11 @@ ccl_device_inline
Transform ob_itfm;
#endif
-#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
-#endif
uint num_hits = 0;
isect_array->t = tmax;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
/* traversal loop */
do {
do {
@@ -112,33 +82,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -174,9 +127,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
bool hit;
@@ -204,25 +155,21 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-#endif
isect_array->t = isect_t;
if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
+#else
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
+#endif
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -248,25 +195,21 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-# endif
isect_array->t = isect_t;
if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
+# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
+# else
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
+# endif
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -279,35 +222,21 @@ ccl_device_inline
}
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
+#else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
+#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -322,55 +251,39 @@ ccl_device_inline
}
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if (num_hits_in_instance) {
float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
+#else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
+#endif
/* Scale isect->t to adjust for instancing. */
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
+#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
+#endif
}
isect_t = tmax;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
@@ -382,20 +295,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
- }
- kernel_assert(!"Should not happen");
- return 0;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
deleted file mode 100644
index e6bb548bc5b..00000000000
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for subsurface scattering, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- if (local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
-
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
- object = local_object;
- }
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
- int prim_addr = __float_as_int(leaf.x);
-
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while (node_addr != ENTRYPOINT_SENTINEL);
- } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h
deleted file mode 100644
index e5c935b75ed..00000000000
--- a/intern/cycles/kernel/bvh/obvh_nodes.h
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection AVX code is adopted from Embree,
- */
-
-struct OBVHStackItem {
- int addr;
- float dist;
-};
-
-ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
-#else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
-#endif
-}
-
-ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
-{
- OBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3)
-{
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4)
-{
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- obvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- obvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- obvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5)
-{
- obvh_stack_sort(s1, s2, s3, s4);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7,
- OBVHStackItem *ccl_restrict s8)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
- if (s8->dist < s7->dist) {
- obvh_item_swap(s7, s8);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- }
-}
-
-/* Axis-aligned nodes intersection */
-
-ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#else
- const avx3f &org,
-#endif
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr + 2;
-#ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
- const avxf tnear_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
- const avxf tnear_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
- const avxf tfar_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
- const avxf tfar_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
- const avxf tfar_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
-
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
-#else
- return 0;
-#endif
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
-
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
-
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
-
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
-
- const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return obvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
deleted file mode 100644
index b7ab75b723c..00000000000
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ /dev/null
@@ -1,664 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const int skip_object,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (false
-#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- //#if !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- if (p_type == PRIMITIVE_TRIANGLE) {
- int prim_count = prim_addr2 - prim_addr;
- if (prim_count < 3) {
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
- p_type);
- int hit = triangle_intersect(
- kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- } // while
- }
- else {
- kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
- p_type);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int *nhiptr = &num_hits_in_instance;
-#else
- int nhi = 0;
- int *nhiptr = &nhi;
-#endif
-
- int result = triangle_intersect8(kg,
- &isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- prim_count,
- num_hits,
- max_hits,
- nhiptr,
- isect_t);
- if (result == 2) {
- return true;
- }
- } // prim_count
- } // PRIMITIVE_TRIANGLE
- else {
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-
-#ifdef __SHADOW_TRICKS__
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- if (tri_object == skip_object) {
- ++prim_addr;
- continue;
- }
-#endif
-
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch (p_type) {
-
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- } // while prim
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
deleted file mode 100644
index 9095233f8b6..00000000000
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
- avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int child_mask;
- avxf dist;
-
- BVH_DEBUG_NEXT_NODE();
-
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
- }
-
- if (child_mask != 0) {
- avxf cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float *)&dist)[r];
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
- if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if (UNLIKELY((node_dist > isect->t)))
-#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- int prim_count = prim_addr2 - prim_addr;
- if (prim_count < 3) {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- } // for
- }
- else {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect8(kg,
- &isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- prim_count,
- 0,
- 0,
- NULL,
- 0.0f)) {
- tfar = avxf(isect->t);
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- } // prim count
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- else {
- hit = curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- if (hit) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
-
- node_addr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
deleted file mode 100644
index fb41ae783ab..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
deleted file mode 100644
index 56e2afd4a11..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
deleted file mode 100644
index b21f79bd3a0..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for finding local intersections
- * around the shading point, for subsurface scattering and bevel. We disable
- * various features for performance, and for instanced objects avoid traversing
- * other parts of the scene.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - SSE for hair.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- if (local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
-
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
- object = local_object;
- }
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
- int prim_addr = __float_as_int(leaf.x);
-
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while (node_addr != ENTRYPOINT_SENTINEL);
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
deleted file mode 100644
index 070406fb18a..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection SSE code is adopted from Embree,
- */
-
-struct QBVHStackItem {
- int addr;
- float dist;
-};
-
-ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
-#else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
-#endif
-}
-
-/* TOOD(sergey): Investigate if using intrinsics helps for both
- * stack item swap and float comparison.
- */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
-{
- QBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
- QBVHStackItem *ccl_restrict s2,
- QBVHStackItem *ccl_restrict s3)
-{
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- qbvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
- QBVHStackItem *ccl_restrict s2,
- QBVHStackItem *ccl_restrict s3,
- QBVHStackItem *ccl_restrict s4)
-{
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- qbvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- qbvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- qbvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- qbvh_item_swap(s3, s2);
- }
-}
-
-/* Axis-aligned nodes intersection */
-
-// ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#else
- const sse3f &org,
-#endif
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr + 1;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
- const ssef tnear_y = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
- const ssef tnear_z = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
-#endif
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
- const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
- const sseb vmask = cast(tnear) > cast(tfar);
- int mask = (int)movemask(vmask) ^ 0xf;
-#else
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
-#endif
- *dist = tnear;
- return mask;
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#endif
- const sse3f &org,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
-
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
-
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
-
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
-
- const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-#endif
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#endif
- const sse3f &org,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
deleted file mode 100644
index 682251bf25b..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint visibility,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (false
-#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & visibility) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
deleted file mode 100644
index f43e84bf368..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int child_mask;
- ssef dist;
-
- BVH_DEBUG_NEXT_NODE();
-
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
- }
-
- if (child_mask != 0) {
- float4 cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float *)&dist)[r];
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
- if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if (UNLIKELY((node_dist > isect->t)))
-#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- else {
- hit = curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- if (hit) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
-
- node_addr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
deleted file mode 100644
index e4eaed04467..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
deleted file mode 100644
index eddc48c487e..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT