Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-07-11 13:28:45 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-07-11 14:58:47 +0300
commit4355603790712032e89fa4da6d8ce7f3ede62b4f (patch)
tree146102d7c52475a2c334e7b2a5a0e5b61a3b8153 /intern/cycles/kernel/geom
parentc58ae20f6ce2af2a2b71d477917d2a272e47260f (diff)
Cycles: Move BVK kernel files to own directory
BVH traversal is not really that much a geometry and we've got quite some traversals now. Makes sense to keep them separate in the name of source structure clarity.
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r--intern/cycles/kernel/geom/geom.h9
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h417
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_nodes.h656
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h386
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h266
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h428
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume.h324
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume_all.h397
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh.h433
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_shadow.h449
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_subsurface.h299
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_traversal.h465
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume.h374
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume_all.h446
14 files changed, 0 insertions, 5349 deletions
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 33e91d1ee44..d2c7edb11ea 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -15,14 +15,6 @@
* limitations under the License.
*/
-/* bottom-most stack entry, indicating the end of traversal */
-#define ENTRYPOINT_SENTINEL 0x76543210
-
-/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
-#define BVH_STACK_SIZE 192
-#define BVH_QSTACK_SIZE 384
-#define TRI_NODE_SIZE 3
-
#include "geom_attribute.h"
#include "geom_object.h"
#include "geom_triangle.h"
@@ -32,5 +24,4 @@
#include "geom_curve.h"
#include "geom_volume.h"
#include "geom_primitive.h"
-#include "geom_bvh.h"
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
deleted file mode 100644
index f8d563f0afa..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* BVH
- *
- * Bounding volume hierarchy for ray tracing. We compile different variations
- * of the same BVH traversal function for faster rendering when some types of
- * primitives are not needed, using #includes to work around the lack of
- * C++ templates in OpenCL.
- *
- * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
- * the code has been extended and modified to support more primitives and work
- * with CPU/CUDA/OpenCL. */
-
-CCL_NAMESPACE_BEGIN
-
-/* Don't inline intersect functions on GPU, this is faster */
-#ifdef __KERNEL_GPU__
-# define ccl_device_intersect ccl_device_noinline
-#else
-# define ccl_device_intersect ccl_device_inline
-#endif
-
-/* BVH intersection function variations */
-
-#define BVH_INSTANCING 1
-#define BVH_MOTION 2
-#define BVH_HAIR 4
-#define BVH_HAIR_MINIMUM_WIDTH 8
-
-#define BVH_NAME_JOIN(x,y) x ## _ ## y
-#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y)
-#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME)
-
-#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
-
-/* Debugging heleprs */
-#ifdef __KERNEL_DEBUG__
-# define BVH_DEBUG_INIT() \
- do { \
- isect->num_traversal_steps = 0; \
- isect->num_traversed_instances = 0; \
- } while(0)
-# define BVH_DEBUG_NEXT_STEP() \
- do { \
- ++isect->num_traversal_steps; \
- } while(0)
-# define BVH_DEBUG_NEXT_INSTANCE() \
- do { \
- ++isect->num_traversed_instances; \
- } while(0)
-#else /* __KERNEL_DEBUG__ */
-# define BVH_DEBUG_INIT()
-# define BVH_DEBUG_NEXT_STEP()
-# define BVH_DEBUG_NEXT_INSTANCE()
-#endif /* __KERNEL_DEBUG__ */
-
-
-/* Common QBVH functions. */
-#ifdef __QBVH__
-# include "geom_qbvh.h"
-#endif
-
-/* Regular BVH traversal */
-
-#include "geom_bvh_nodes.h"
-
-#define BVH_FUNCTION_NAME bvh_intersect
-#define BVH_FUNCTION_FEATURES 0
-#include "geom_bvh_traversal.h"
-
-#if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "geom_bvh_traversal.h"
-#endif
-
-#if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-# include "geom_bvh_traversal.h"
-#endif
-
-#if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "geom_bvh_traversal.h"
-#endif
-
-#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-# include "geom_bvh_traversal.h"
-#endif
-
-/* Subsurface scattering BVH traversal */
-
-#if defined(__SUBSURFACE__)
-# define BVH_FUNCTION_NAME bvh_intersect_subsurface
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "geom_bvh_subsurface.h"
-#endif
-
-#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
-# include "geom_bvh_subsurface.h"
-#endif
-
-/* Volume BVH traversal */
-
-#if defined(__VOLUME__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "geom_bvh_volume.h"
-#endif
-
-#if defined(__VOLUME__) && defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "geom_bvh_volume.h"
-#endif
-
-#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
-# include "geom_bvh_volume.h"
-#endif
-
-/* Record all intersections - Shadow BVH traversal */
-
-#if defined(__SHADOW_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
-# define BVH_FUNCTION_FEATURES 0
-# include "geom_bvh_shadow.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "geom_bvh_shadow.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "geom_bvh_shadow.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "geom_bvh_shadow.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
-# include "geom_bvh_shadow.h"
-#endif
-
-/* Record all intersections - Volume BVH traversal */
-
-#if defined(__VOLUME_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "geom_bvh_volume_all.h"
-#endif
-
-#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "geom_bvh_volume_all.h"
-#endif
-
-#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
-# include "geom_bvh_volume_all.h"
-#endif
-
-#undef BVH_FEATURE
-#undef BVH_NAME_JOIN
-#undef BVH_NAME_EVAL
-#undef BVH_FUNCTION_FULL_NAME
-
-ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
- const Ray *ray,
- const uint visibility,
- Intersection *isect,
- uint *lcg_state,
- float difl,
- float extmax)
-{
-#ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
-# ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax);
-# endif /* __HAIR__ */
-
- return bvh_intersect_motion(kg, ray, isect, visibility);
- }
-#endif /* __OBJECT_MOTION__ */
-
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax);
-#endif /* __HAIR__ */
-
-#ifdef __KERNEL_CPU__
-
-# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_instancing(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-
- return bvh_intersect(kg, ray, isect, visibility);
-#else /* __KERNEL_CPU__ */
-
-# ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, ray, isect, visibility);
-# else
- return bvh_intersect(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-
-#endif /* __KERNEL_CPU__ */
-}
-
-#ifdef __SUBSURFACE__
-ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
- const Ray *ray,
- SubsurfaceIntersection *ss_isect,
- int subsurface_object,
- uint *lcg_state,
- int max_hits)
-{
-#ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
- return bvh_intersect_subsurface_motion(kg,
- ray,
- ss_isect,
- subsurface_object,
- lcg_state,
- max_hits);
- }
-#endif /* __OBJECT_MOTION__ */
- return bvh_intersect_subsurface(kg,
- ray,
- ss_isect,
- subsurface_object,
- lcg_state,
- max_hits);
-}
-#endif
-
-#ifdef __SHADOW_RECORD_ALL__
-ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
-{
-# ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
-# ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits);
-# endif /* __HAIR__ */
-
- return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits);
- }
-# endif /* __OBJECT_MOTION__ */
-
-# ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits);
-# endif /* __HAIR__ */
-
-# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
-# endif /* __INSTANCING__ */
-
- return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
-}
-#endif /* __SHADOW_RECORD_ALL__ */
-
-#ifdef __VOLUME__
-ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
-# ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
- return bvh_intersect_volume_motion(kg, ray, isect, visibility);
- }
-# endif /* __OBJECT_MOTION__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# else
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-}
-#endif /* __VOLUME__ */
-
-#ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint max_hits,
- const uint visibility)
-{
-# ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
- return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
- }
-# endif /* __OBJECT_MOTION__ */
-# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
-# endif /* __INSTANCING__ */
- return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
-}
-#endif /* __VOLUME_RECORD_ALL__ */
-
-
-/* Ray offset to avoid self intersection.
- *
- * This function should be used to compute a modified ray start position for
- * rays leaving from a surface. */
-
-ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
-{
-#ifdef __INTERSECTION_REFINE__
- const float epsilon_f = 1e-5f;
- /* ideally this should match epsilon_f, but instancing and motion blur
- * precision makes it problematic */
- const float epsilon_test = 1.0f;
- const int epsilon_i = 32;
-
- float3 res;
-
- /* x component */
- if(fabsf(P.x) < epsilon_test) {
- res.x = P.x + Ng.x*epsilon_f;
- }
- else {
- uint ix = __float_as_uint(P.x);
- ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
- res.x = __uint_as_float(ix);
- }
-
- /* y component */
- if(fabsf(P.y) < epsilon_test) {
- res.y = P.y + Ng.y*epsilon_f;
- }
- else {
- uint iy = __float_as_uint(P.y);
- iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
- res.y = __uint_as_float(iy);
- }
-
- /* z component */
- if(fabsf(P.z) < epsilon_test) {
- res.z = P.z + Ng.z*epsilon_f;
- }
- else {
- uint iz = __float_as_uint(P.z);
- iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
- res.z = __uint_as_float(iz);
- }
-
- return res;
-#else
- const float epsilon_f = 1e-4f;
- return P + epsilon_f*Ng;
-#endif
-}
-
-#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
-/* ToDo: Move to another file? */
-ccl_device int intersections_compare(const void *a, const void *b)
-{
- const Intersection *isect_a = (const Intersection*)a;
- const Intersection *isect_b = (const Intersection*)b;
-
- if(isect_a->t < isect_b->t)
- return -1;
- else if(isect_a->t > isect_b->t)
- return 1;
- else
- return 0;
-}
-#endif
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/geom/geom_bvh_nodes.h b/intern/cycles/kernel/geom/geom_bvh_nodes.h
deleted file mode 100644
index 5b0d8785d0e..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh_nodes.h
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- * Copyright 2011-2016, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
-// 3-vector which might be faster.
-ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
- int nodeAddr,
- int child)
-{
- Transform space;
- const int child_addr = nodeAddr + child * 3;
- space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
- space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
- space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
- space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
- return space;
-}
-
-#if !defined(__KERNEL_SSE2__)
-ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 idir,
- const float t,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
-
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
- return ((c0max >= c0min)? 1: 0) |
- ((c1max >= c1min)? 2: 0);
-#endif
-}
-
-ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 idir,
- const float t,
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
-
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
- if(difl != 0.0f) {
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
- return ((c0max >= c0min)? 1: 0) |
- ((c1max >= c1min)? 2: 0);
-#endif
-}
-
-ccl_device_inline bool bvh_unaligned_node_intersect_child(
- KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- int nodeAddr,
- int child,
- float dist[2])
-{
- Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 tLowerXYZ = aligned_P * nrdir;
- float3 tUpperXYZ = tLowerXYZ - nrdir;
- const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x);
- const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y);
- const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z);
- const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x);
- const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y);
- const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z);
- const float tNear = max4(0.0f, tNearX, tNearY, tNearZ);
- const float tFar = min4(t, tFarX, tFarY, tFarZ);
- *dist = tNear;
- return tNear <= tFar;
-}
-
-ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
- KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- const float difl,
- int nodeAddr,
- int child,
- float dist[2])
-{
- Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 tLowerXYZ = aligned_P * nrdir;
- float3 tUpperXYZ = tLowerXYZ - nrdir;
- const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x);
- const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y);
- const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z);
- const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x);
- const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y);
- const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z);
- const float tNear = max4(0.0f, tNearX, tNearY, tNearZ);
- const float tFar = min4(t, tFarX, tFarY, tFarZ);
- *dist = tNear;
- if(difl != 0.0f) {
- /* TODO(sergey): Same as for QBVH, needs a proper use. */
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- return round_down*tNear <= round_up*tFar;
- }
- else {
- return tNear <= tFar;
- }
-}
-
-ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float3 idir,
- const float t,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.x) & visibility))
-#endif
- {
- mask |= 1;
- }
- }
- if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.y) & visibility))
-#endif
- {
- mask |= 2;
- }
- }
- return mask;
-}
-
-ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float3 idir,
- const float t,
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.x) & visibility))
-#endif
- {
- mask |= 1;
- }
- }
- if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.y) & visibility))
-#endif
- {
- mask |= 2;
- }
- }
- return mask;
-}
-
-ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float3 idir,
- const float t,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg,
- P,
- dir,
- idir,
- t,
- nodeAddr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect(kg,
- P,
- idir,
- t,
- nodeAddr,
- visibility,
- dist);
- }
-}
-
-ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float3 idir,
- const float t,
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(kg,
- P,
- dir,
- idir,
- t,
- difl,
- extmax,
- nodeAddr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- idir,
- t,
- difl,
- extmax,
- nodeAddr,
- visibility,
- dist);
- }
-}
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_inline bvh_aligned_node_intersect(
- KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-int ccl_device_inline bvh_aligned_node_intersect_robust(
- KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
- if(difl != 0.0f) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 *tminmaxview = (float4*)&tminmax;
- float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
- float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef& tnear,
- const ssef& tfar,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);;
- float3 aligned_P0 = transform_point(&space0, P),
- aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef tLowerX = ssef(aligned_P0.x * nrdir0.x,
- aligned_P1.x * nrdir1.x,
- 0.0f, 0.0f),
- tLowerY = ssef(aligned_P0.y * nrdir0.y,
- aligned_P1.y * nrdir1.y,
- 0.0f,
- 0.0f),
- tLowerZ = ssef(aligned_P0.z * nrdir0.z,
- aligned_P1.z * nrdir1.z,
- 0.0f,
- 0.0f);
-
- ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(tLowerX, tUpperX);
- ssef tnear_y = min(tLowerY, tUpperY);
- ssef tnear_z = min(tLowerZ, tUpperZ);
- ssef tfar_x = max(tLowerX, tUpperX);
- ssef tfar_y = max(tLowerY, tUpperY);
- ssef tfar_z = max(tLowerZ, tUpperZ);
-
- const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
- const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
- sseb vmask = tNear <= tFar;
- dist[0] = tNear.f[0];
- dist[1] = tNear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef& tnear,
- const ssef& tfar,
- const float difl,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);;
- float3 aligned_P0 = transform_point(&space0, P),
- aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef tLowerX = ssef(aligned_P0.x * nrdir0.x,
- aligned_P1.x * nrdir1.x,
- 0.0f, 0.0f),
- tLowerY = ssef(aligned_P0.y * nrdir0.y,
- aligned_P1.y * nrdir1.y,
- 0.0f,
- 0.0f),
- tLowerZ = ssef(aligned_P0.z * nrdir0.z,
- aligned_P1.z * nrdir1.z,
- 0.0f,
- 0.0f);
-
- ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(tLowerX, tUpperX);
- ssef tnear_y = min(tLowerY, tUpperY);
- ssef tnear_z = min(tLowerZ, tUpperZ);
- ssef tfar_x = max(tLowerX, tUpperX);
- ssef tfar_y = max(tLowerY, tUpperY);
- ssef tfar_z = max(tLowerZ, tUpperZ);
-
- const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
- const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
- sseb vmask;
- if(difl != 0.0f) {
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- vmask = round_down*tNear <= round_up*tFar;
- }
- else {
- vmask = tNear <= tFar;
- }
-
- dist[0] = tNear.f[0];
- dist[1] = tNear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tnear,
- const ssef& tfar,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg,
- P,
- dir,
- tnear,
- tfar,
- nodeAddr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- nodeAddr,
- visibility,
- dist);
- }
-}
-
-ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tnear,
- const ssef& tfar,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(kg,
- P,
- dir,
- tnear,
- tfar,
- difl,
- nodeAddr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- nodeAddr,
- visibility,
- dist);
- }
-}
-#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
deleted file mode 100644
index a54c6024152..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2013, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef __QBVH__
-# include "geom_qbvh_shadow.h"
-#endif
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT bvh_node_intersect
-#else
-# define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- *
- */
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- uint *num_hits)
-{
- /* todo:
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- int nodeAddrChild1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
-#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect_t,
- nodeAddr,
- PATH_RAY_SHADOW,
- dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- nodeAddr,
- PATH_RAY_SHADOW,
- dist);
-#endif // __KERNEL_SSE2__
-
- nodeAddr = __float_as_int(cnodes.z);
- nodeAddrChild1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool closestChild1 = (dist[1] < dist[0]);
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
- }
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- nodeAddr = nodeAddrChild1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- const int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
-
- /* primitive intersection */
- while(primAddr < primAddr2) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
-
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
- else
- hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* shadow ray early termination */
- if(hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- primAddr++;
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- if(num_hits_in_instance) {
- float t_fac;
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* scale isect->t to adjust for instancing */
- for(int i = 0; i < num_hits_in_instance; i++)
- (isect_array-i-1)->t *= t_fac;
- }
- else {
- float ignore_t = FLT_MAX;
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- uint *num_hits)
-{
-#ifdef __QBVH__
- if(kernel_data.bvh.use_qbvh) {
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect_array,
- max_hits,
- num_hits);
- }
- else
-#endif
- {
- kernel_assert(kernel_data.bvh.use_qbvh == false);
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect_array,
- max_hits,
- num_hits);
- }
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
deleted file mode 100644
index 88aaf01d682..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2013, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef __QBVH__
-# include "geom_qbvh_subsurface.h"
-#endif
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT bvh_node_intersect
-#else
-# define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function for subsurface scattering, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- *
- */
-
-ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- SubsurfaceIntersection *ss_isect,
- int subsurface_object,
- uint *lcg_state,
- int max_hits)
-{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
-
- /* ray parameters in registers */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- ss_isect->num_hits = 0;
-
- const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
- if(!(object_flag & SD_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- bvh_instance_motion_push(kg,
- subsurface_object,
- ray,
- &P,
- &dir,
- &idir,
- &isect_t,
- &ob_itfm);
-#else
- bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
-#endif
- object = subsurface_object;
- }
-
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- int nodeAddrChild1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
-#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect_t,
- nodeAddr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- nodeAddr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
-#endif // __KERNEL_SSE2__
-
- nodeAddr = __float_as_int(cnodes.z);
- nodeAddrChild1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool closestChild1 = (dist[1] < dist[0]);
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
- }
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- nodeAddr = nodeAddrChild1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
- const int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- triangle_intersect_subsurface(kg,
- &isect_precalc,
- ss_isect,
- P,
- object,
- primAddr,
- isect_t,
- lcg_state,
- max_hits);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- motion_triangle_intersect_subsurface(kg,
- ss_isect,
- P,
- dir,
- ray->time,
- object,
- primAddr,
- isect_t,
- lcg_state,
- max_hits);
- }
- break;
- }
-#endif
- default: {
- break;
- }
- }
- }
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-}
-
-ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
- const Ray *ray,
- SubsurfaceIntersection *ss_isect,
- int subsurface_object,
- uint *lcg_state,
- int max_hits)
-{
-#ifdef __QBVH__
- if(kernel_data.bvh.use_qbvh) {
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- ss_isect,
- subsurface_object,
- lcg_state,
- max_hits);
- }
- else
-#endif
- {
- kernel_assert(kernel_data.bvh.use_qbvh == false);
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- ss_isect,
- subsurface_object,
- lcg_state,
- max_hits);
- }
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
deleted file mode 100644
index f409dd5f403..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2013, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef __QBVH__
-# include "geom_qbvh_traversal.h"
-#endif
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT bvh_node_intersect
-# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
-#else
-# define NODE_INTERSECT bvh_aligned_node_intersect
-# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
-#endif
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_MOTION: motion blur rendering
- *
- */
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , uint *lcg_state,
- float difl,
- float extmax
-#endif
- )
-{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
-
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- int nodeAddrChild1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
-#if !defined(__KERNEL_SSE2__)
-# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- traverse_mask = NODE_INTERSECT_ROBUST(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect->t,
- difl,
- extmax,
- nodeAddr,
- visibility,
- dist);
- }
- else
-# endif
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect->t,
- nodeAddr,
- visibility,
- dist);
- }
-#else // __KERNEL_SSE2__
-# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- traverse_mask = NODE_INTERSECT_ROBUST(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- nodeAddr,
- visibility,
- dist);
- }
- else
-# endif
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- nodeAddr,
- visibility,
- dist);
- }
-#endif // __KERNEL_SSE2__
-
- nodeAddr = __float_as_int(cnodes.z);
- nodeAddrChild1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool closestChild1 = (dist[1] < dist[0]);
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
- }
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- nodeAddr = nodeAddrChild1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- BVH_DEBUG_NEXT_STEP();
- }
-
- /* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- const int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
- /* shadow ray early termination */
-#if defined(__KERNEL_SSE2__)
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-#else
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
-#endif
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
- /* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
-# endif
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- if(hit) {
- /* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
-# endif
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , uint *lcg_state,
- float difl,
- float extmax
-#endif
- )
-{
-#ifdef __QBVH__
- if(kernel_data.bvh.use_qbvh) {
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect,
- visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , lcg_state,
- difl,
- extmax
-#endif
- );
- }
- else
-#endif
- {
- kernel_assert(kernel_data.bvh.use_qbvh == false);
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect,
- visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , lcg_state,
- difl,
- extmax
-#endif
- );
- }
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
-#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
deleted file mode 100644
index 5e70ce99f51..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef __QBVH__
-# include "geom_qbvh_volume.h"
-#endif
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT bvh_node_intersect
-#else
-# define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- *
- */
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- int nodeAddrChild1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
-#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect->t,
- nodeAddr,
- visibility,
- dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- nodeAddr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
-
- nodeAddr = __float_as_int(cnodes.z);
- nodeAddrChild1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool closestChild1 = (dist[1] < dist[0]);
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
- }
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- nodeAddr = nodeAddrChild1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- const int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
- }
- break;
- }
-#endif
- default: {
- break;
- }
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
-
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* pop */
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
-#endif /* FEATURE(BVH_MOTION) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
-#ifdef __QBVH__
- if(kernel_data.bvh.use_qbvh) {
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect,
- visibility);
- }
- else
-#endif
- {
- kernel_assert(kernel_data.bvh.use_qbvh == false);
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect,
- visibility);
- }
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h
deleted file mode 100644
index ab5ac8505a3..00000000000
--- a/intern/cycles/kernel/geom/geom_bvh_volume_all.h
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef __QBVH__
-# include "geom_qbvh_volume_all.h"
-#endif
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT bvh_node_intersect
-#else
-# define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- *
- */
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- int nodeAddrChild1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
-#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect_t,
- nodeAddr,
- visibility,
- dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- nodeAddr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
-
- nodeAddr = __float_as_int(cnodes.z);
- nodeAddrChild1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool closestChild1 = (dist[1] < dist[0]);
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
- }
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- nodeAddr = nodeAddrChild1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- const int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- bool hit;
-
- /* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif /* BVH_MOTION */
- default: {
- break;
- }
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
-
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* pop */
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- if(num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
- float ignore_t = FLT_MAX;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
-#endif /* FEATURE(BVH_MOTION) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
-#ifdef __QBVH__
- if(kernel_data.bvh.use_qbvh) {
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect_array,
- max_hits,
- visibility);
- }
- else
-#endif
- {
- kernel_assert(kernel_data.bvh.use_qbvh == false);
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect_array,
- max_hits,
- visibility);
- }
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
deleted file mode 100644
index 5eda3213acb..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh.h
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-struct QBVHStackItem {
- int addr;
- float dist;
-};
-
-/* TOOD(sergey): Investigate if using intrinsics helps for both
- * stack item swap and float comparison.
- */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a,
- QBVHStackItem *__restrict b)
-{
- QBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
- QBVHStackItem *__restrict s2,
- QBVHStackItem *__restrict s3)
-{
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
- if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
- QBVHStackItem *__restrict s2,
- QBVHStackItem *__restrict s3,
- QBVHStackItem *__restrict s4)
-{
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
- if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
- if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
- if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
- if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
-}
-
-/* Axis-aligned nodes intersection */
-
-ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
-#else
- const sse3f& org,
-#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr + 1;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
-#endif
-
-#ifdef __KERNEL_SSE41__
- const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
- const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
- const sseb vmask = cast(tNear) > cast(tFar);
- int mask = (int)movemask(vmask)^0xf;
-#else
- const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
- const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
- const sseb vmask = tNear <= tFar;
- int mask = (int)movemask(vmask);
-#endif
- *dist = tNear;
- return mask;
-}
-
-ccl_device_inline int qbvh_aligned_node_intersect_robust(
- KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
-#else
- const sse3f& P,
-#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- const float difl,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr + 1;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
-#endif
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
- const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
- const sseb vmask = round_down*tNear <= round_up*tFar;
- *dist = tNear;
- return (int)movemask(vmask);
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int qbvh_unaligned_node_intersect(
- KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
-#endif
- const sse3f& org,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
-
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
-
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
-
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
-
- const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
- aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
- aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
-
- const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
- aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
- aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
-
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x,
- nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const ssef tlower_x = aligned_P_x * nrdir_x,
- tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const ssef tupper_x = tlower_x - nrdir_x,
- tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
- const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
- const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tNear <= tFar;
- *dist = tNear;
- return movemask(vmask);
-#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
- const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
- const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tNear <= tFar;
- *dist = tNear;
- return movemask(vmask);
-#endif
-}
-
-ccl_device_inline int qbvh_unaligned_node_intersect_robust(
- KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
-#endif
- const sse3f& P,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- const float difl,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
-
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
-
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
-
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
-
- const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
- aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
- aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
-
- const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
- aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
- aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
-
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x,
- nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const ssef tlower_x = aligned_P_x * nrdir_x,
- tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const ssef tupper_x = tlower_x - nrdir_x,
- tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
-#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
-#endif
- const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
- const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
- const sseb vmask = round_down*tNear <= round_up*tFar;
- *dist = tNear;
- return movemask(vmask);
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int qbvh_node_intersect(
- KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
-#endif
- const sse3f& org,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- dist);
- }
-}
-
-ccl_device_inline int qbvh_node_intersect_robust(
- KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
-#endif
- const sse3f& P,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- const float difl,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect_robust(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir,
-#endif
- P,
- dir,
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- difl,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect_robust(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir,
-#else
- P,
-#endif
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- difl,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
deleted file mode 100644
index e5e611a0d47..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- *
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(tmax);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-#endif
-
- ssef dist;
- int traverseChild = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-# endif
-# if BVH_FEATURE(BVH_HAIR)
- dir4,
-# endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
- continue;
- }
- else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
- }
-
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-#endif
-
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
-
- /* Primitive intersection. */
- while(primAddr < primAddr2) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
-
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
- else
- hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if(hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- primAddr++;
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
-
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- if(num_hits_in_instance) {
- float t_fac;
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
-
- /* scale isect->t to adjust for instancing */
- for(int i = 0; i < num_hits_in_instance; i++)
- (isect_array-i-1)->t *= t_fac;
- }
- else {
- float ignore_t = FLT_MAX;
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(tmax);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
deleted file mode 100644
index 4adaf9c8f3d..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for subsurface scattering, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- *
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- SubsurfaceIntersection *ss_isect,
- int subsurface_object,
- uint *lcg_state,
- int max_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - SSE for hair.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- ss_isect->num_hits = 0;
-
- const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
- if(!(object_flag & SD_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- bvh_instance_motion_push(kg,
- subsurface_object,
- ray,
- &P,
- &dir,
- &idir,
- &isect_t,
- &ob_itfm);
-#else
- bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
-#endif
- object = subsurface_object;
- }
-
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return;
- }
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- ssef dist;
-
- int traverseChild = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
- continue;
- }
- else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
- }
-
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
- int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
-
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- triangle_intersect_subsurface(kg,
- &isect_precalc,
- ss_isect,
- P,
- object,
- primAddr,
- isect_t,
- lcg_state,
- max_hits);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- motion_triangle_intersect_subsurface(kg,
- ss_isect,
- P,
- dir,
- ray->time,
- object,
- primAddr,
- isect_t,
- lcg_state,
- max_hits);
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
deleted file mode 100644
index 24bf85f46c8..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h
+++ /dev/null
@@ -1,465 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_MOTION: motion blur rendering
- *
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-# define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-# define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,uint *lcg_state,
- float difl,
- float extmax
-#endif
- )
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
- traversalStack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
- float nodeDist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-
- if(UNLIKELY(nodeDist > isect->t)
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0)
-#endif
- {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- continue;
- }
-
- int traverseChild;
- ssef dist;
-
- BVH_DEBUG_NEXT_STEP();
-
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- /* NOTE: We extend all the child BB instead of fetching
- * and checking visibility flags for each of the,
- *
- * Need to test if doing opposite would be any faster.
- */
- traverseChild = NODE_INTERSECT_ROBUST(kg,
- tnear,
- tfar,
-# ifdef __KERNEL_AVX2__
- P_idir4,
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-# endif
-# if BVH_FEATURE(BVH_HAIR)
- dir4,
-# endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- difl,
- &dist);
- }
- else
-#endif /* BVH_HAIR_MINIMUM_WIDTH */
- {
- traverseChild = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
- }
-
- if(traverseChild != 0) {
- float4 cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here),
- */
-#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- float d0 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- nodeDist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(traverseChild);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- if(d1 < d0) {
- nodeAddr = c1;
- nodeDist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
- continue;
- }
- else {
- nodeAddr = c0;
- nodeDist = d0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
- }
-
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
-
-#ifdef __VISIBILITY_FLAG__
- if(UNLIKELY((nodeDist > isect->t) ||
- ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if(UNLIKELY((nodeDist > isect->t)))
-#endif
- {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- continue;
- }
-
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
-
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- if(hit) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
- traversalStack[stackPtr].dist = -FLT_MAX;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
-#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h
deleted file mode 100644
index da21ede9e12..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh_volume.h
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- *
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
-#ifdef __VISIBILITY_FLAG__
- float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- if((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-#endif
-
- ssef dist;
- int traverseChild = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
- continue;
- }
- else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
- }
-
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
-
- /* Primitive intersection. */
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
-
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
deleted file mode 100644
index 8a31775fae3..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- *
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
-#ifdef __VISIBILITY_FLAG__
- float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- if((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-#endif
-
- ssef dist;
- int traverseChild = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
- continue;
- }
- else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
- }
-
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
-
- /* Primitive intersection. */
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
-
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
-
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
-# else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect_t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
- float ignore_t = FLT_MAX;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
- }
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- triangle_intersect_precalc(dir, &isect_precalc);
- isect_t = tmax;
- isect_array->t = isect_t;
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT