diff options
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r-- | intern/cycles/kernel/geom/geom.h | 22 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh.h | 415 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_shadow.h | 412 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_subsurface.h | 288 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_traversal.h | 435 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_volume.h | 339 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_volume_all.h | 412 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_motion_triangle.h | 32 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh.h | 147 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_shadow.h | 403 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_subsurface.h | 272 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_traversal.h | 412 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_volume.h | 329 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_volume_all.h | 401 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle.h | 61 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle_intersect.h | 28 |
17 files changed, 62 insertions, 4350 deletions
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index c94a5384d1f..d2c7edb11ea 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -15,27 +15,6 @@ * limitations under the License. */ -/* bottom-most stack entry, indicating the end of traversal */ -#define ENTRYPOINT_SENTINEL 0x76543210 - -/* 64 object BVH + 64 mesh BVH + 64 object node splitting */ -#define BVH_STACK_SIZE 192 -#define BVH_QSTACK_SIZE 384 -#define BVH_NODE_SIZE 4 -#define BVH_NODE_LEAF_SIZE 1 -#define BVH_QNODE_SIZE 7 -#define BVH_QNODE_LEAF_SIZE 1 -#define TRI_NODE_SIZE 3 - -/* silly workaround for float extended precision that happens when compiling - * without sse support on x86, it results in different results for float ops - * that you would otherwise expect to compare correctly */ -#if !defined(__i386__) || defined(__SSE__) -# define NO_EXTENDED_PRECISION -#else -# define NO_EXTENDED_PRECISION volatile -#endif - #include "geom_attribute.h" #include "geom_object.h" #include "geom_triangle.h" @@ -45,5 +24,4 @@ #include "geom_curve.h" #include "geom_volume.h" #include "geom_primitive.h" -#include "geom_bvh.h" diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h deleted file mode 100644 index d0eedd3396a..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation - * Modifications Copyright 2011, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* BVH - * - * Bounding volume hierarchy for ray tracing. We compile different variations - * of the same BVH traversal function for faster rendering when some types of - * primitives are not needed, using #includes to work around the lack of - * C++ templates in OpenCL. - * - * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", - * the code has been extended and modified to support more primitives and work - * with CPU/CUDA/OpenCL. */ - -CCL_NAMESPACE_BEGIN - -/* Don't inline intersect functions on GPU, this is faster */ -#ifdef __KERNEL_GPU__ -# define ccl_device_intersect ccl_device_noinline -#else -# define ccl_device_intersect ccl_device_inline -#endif - -/* BVH intersection function variations */ - -#define BVH_INSTANCING 1 -#define BVH_MOTION 2 -#define BVH_HAIR 4 -#define BVH_HAIR_MINIMUM_WIDTH 8 - -#define BVH_NAME_JOIN(x,y) x ## _ ## y -#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y) -#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME) - -#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) - -/* Debugging heleprs */ -#ifdef __KERNEL_DEBUG__ -# define BVH_DEBUG_INIT() \ - do { \ - isect->num_traversal_steps = 0; \ - isect->num_traversed_instances = 0; \ - } while(0) -# define BVH_DEBUG_NEXT_STEP() \ - do { \ - ++isect->num_traversal_steps; \ - } while(0) -# define BVH_DEBUG_NEXT_INSTANCE() \ - do { \ - ++isect->num_traversed_instances; \ - } while(0) -#else /* __KERNEL_DEBUG__ */ -# define BVH_DEBUG_INIT() -# define BVH_DEBUG_NEXT_STEP() -# define BVH_DEBUG_NEXT_INSTANCE() -#endif /* __KERNEL_DEBUG__ */ - - -/* Common QBVH functions. */ -#ifdef __QBVH__ -# include "geom_qbvh.h" -#endif - -/* Regular BVH traversal */ - -#define BVH_FUNCTION_NAME bvh_intersect -#define BVH_FUNCTION_FEATURES 0 -#include "geom_bvh_traversal.h" - -#if defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "geom_bvh_traversal.h" -#endif - -#if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH -# include "geom_bvh_traversal.h" -#endif - -#if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -# include "geom_bvh_traversal.h" -#endif - -#if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION -# include "geom_bvh_traversal.h" -#endif - -/* Subsurface scattering BVH traversal */ - -#if defined(__SUBSURFACE__) -# define BVH_FUNCTION_NAME bvh_intersect_subsurface -# define BVH_FUNCTION_FEATURES 0 -# include "geom_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION -# include "geom_bvh_subsurface.h" -#endif - -/* Volume BVH traversal */ - -#if defined(__VOLUME__) -# define BVH_FUNCTION_NAME bvh_intersect_volume -# define BVH_FUNCTION_FEATURES 0 -# include "geom_bvh_volume.h" -#endif - -#if defined(__VOLUME__) && defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "geom_bvh_volume.h" -#endif - -#if defined(__VOLUME__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -# include "geom_bvh_volume.h" -#endif - -/* Record all intersections - Shadow BVH traversal */ - -#if defined(__SHADOW_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all -# define BVH_FUNCTION_FEATURES 0 -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION -# include "geom_bvh_shadow.h" -#endif - -/* Record all intersections - Volume BVH traversal */ - -#if defined(__VOLUME_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all -# define BVH_FUNCTION_FEATURES 0 -# include "geom_bvh_volume_all.h" -#endif - -#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "geom_bvh_volume_all.h" -#endif - -#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -# include "geom_bvh_volume_all.h" -#endif - -#undef BVH_FEATURE -#undef BVH_NAME_JOIN -#undef BVH_NAME_EVAL -#undef BVH_FUNCTION_FULL_NAME - -ccl_device_intersect bool scene_intersect(KernelGlobals *kg, - const Ray *ray, - const uint visibility, - Intersection *isect, - uint *lcg_state, - float difl, - float extmax) -{ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { -# ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax); -# endif /* __HAIR__ */ - - return bvh_intersect_motion(kg, ray, isect, visibility); - } -#endif /* __OBJECT_MOTION__ */ - -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax); -#endif /* __HAIR__ */ - -#ifdef __KERNEL_CPU__ - -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_instancing(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - - return bvh_intersect(kg, ray, isect, visibility); -#else /* __KERNEL_CPU__ */ - -# ifdef __INSTANCING__ - return bvh_intersect_instancing(kg, ray, isect, visibility); -# else - return bvh_intersect(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - -#endif /* __KERNEL_CPU__ */ -} - -#ifdef __SUBSURFACE__ -ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_subsurface_motion(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -#endif /* __OBJECT_MOTION__ */ - return bvh_intersect_subsurface(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); -} -#endif - -#ifdef __SHADOW_RECORD_ALL__ -ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) -{ -# ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { -# ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits); -# endif /* __HAIR__ */ - - return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits); - } -# endif /* __OBJECT_MOTION__ */ - -# ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits); -# endif /* __HAIR__ */ - -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); -# endif /* __INSTANCING__ */ - - return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); -} -#endif /* __SHADOW_RECORD_ALL__ */ - -#ifdef __VOLUME__ -ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ -# ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_volume_motion(kg, ray, isect, visibility); - } -# endif /* __OBJECT_MOTION__ */ -# ifdef __KERNEL_CPU__ -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_volume_instancing(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - return bvh_intersect_volume(kg, ray, isect, visibility); -# else /* __KERNEL_CPU__ */ -# ifdef __INSTANCING__ - return bvh_intersect_volume_instancing(kg, ray, isect, visibility); -# else - return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ -# endif /* __KERNEL_CPU__ */ -} -#endif /* __VOLUME__ */ - -#ifdef __VOLUME_RECORD_ALL__ -ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint max_hits, - const uint visibility) -{ -# ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); - } -# endif /* __OBJECT_MOTION__ */ -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); -# endif /* __INSTANCING__ */ - return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); -} -#endif /* __VOLUME_RECORD_ALL__ */ - - -/* Ray offset to avoid self intersection. - * - * This function should be used to compute a modified ray start position for - * rays leaving from a surface. */ - -ccl_device_inline float3 ray_offset(float3 P, float3 Ng) -{ -#ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing and motion blur - * precision makes it problematic */ - const float epsilon_test = 1.0f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if(fabsf(P.x) < epsilon_test) { - res.x = P.x + Ng.x*epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if(fabsf(P.y) < epsilon_test) { - res.y = P.y + Ng.y*epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if(fabsf(P.z) < epsilon_test) { - res.z = P.z + Ng.z*epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; -#else - const float epsilon_f = 1e-4f; - return P + epsilon_f*Ng; -#endif -} - -#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__) -/* ToDo: Move to another file? */ -ccl_device int intersections_compare(const void *a, const void *b) -{ - const Intersection *isect_a = (const Intersection*)a; - const Intersection *isect_b = (const Intersection*)b; - - if(isect_a->t < isect_b->t) - return -1; - else if(isect_a->t > isect_b->t) - return 1; - else - return 0; -} -#endif - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h deleted file mode 100644 index 4005489f77d..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_shadow.h +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2013, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_shadow.h" -#endif - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) -{ - /* todo: - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - *num_hits = 0; - isect_array->t = tmax; - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif /* __KERNEL_SSE2__ */ - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - bool traverseChild0, traverseChild1; - int nodeAddrChild1; - -#if !defined(__KERNEL_SSE2__) - /* Intersect two child bounding boxes, non-SSE version */ - float t = isect_t; - - /* fetch node data */ - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); - - /* intersect ray against child nodes */ - NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - - /* decide which nodes to traverse next */ -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW); - traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW); -# else - traverseChild0 = (c0max >= c0min); - traverseChild1 = (c1max >= c1min); -# endif - -#else // __KERNEL_SSE2__ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; - const float4 cnodes = ((float4*)bvh_nodes)[3]; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - /* decide which nodes to traverse next */ -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW); - traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW); -# else - traverseChild0 = (movemask(lrhit) & 1); - traverseChild1 = (movemask(lrhit) & 2); -# endif -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.x); - nodeAddrChild1 = __float_as_int(cnodes.y); - - if(traverseChild0 && traverseChild1) { - /* both children were intersected, push the farther one */ -#if !defined(__KERNEL_SSE2__) - bool closestChild1 = (c1min < c0min); -#else - bool closestChild1 = tminmax[1] < tminmax[0]; -#endif - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* one child was intersected */ - if(traverseChild1) { - nodeAddr = nodeAddrChild1; - } - else if(!traverseChild0) { - /* neither child was intersected */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - while(primAddr < primAddr2) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - - bool hit; - - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ - - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); - break; - } -#endif -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - else - hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - break; - } -#endif - default: { - hit = false; - break; - } - } - - /* shadow ray early termination */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ - - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; - -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - - isect_array->t = isect_t; - } - - primAddr++; - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - num_hits_in_instance = 0; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - if(num_hits_in_instance) { - float t_fac; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - /* scale isect->t to adjust for instancing */ - for(int i = 0; i < num_hits_in_instance; i++) - (isect_array-i-1)->t *= t_fac; - } - else { - float ignore_t = FLT_MAX; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - } - - isect_t = tmax; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return false; -} - -ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect_array, - max_hits, - num_hits); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect_array, - max_hits, - num_hits); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h deleted file mode 100644 index 915e9415c93..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2013, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_subsurface.h" -#endif - -/* This is a template BVH traversal function for subsurface scattering, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; - - ss_isect->num_hits = 0; - - const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); - if(!(object_flag & SD_TRANSFORM_APPLIED)) { -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - bvh_instance_motion_push(kg, - subsurface_object, - ray, - &P, - &dir, - &idir, - &isect_t, - &ob_itfm); -#else - bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); -#endif - object = subsurface_object; - } - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do - { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) - { - bool traverseChild0, traverseChild1; - int nodeAddrChild1; - -#if !defined(__KERNEL_SSE2__) - /* Intersect two child bounding boxes, non-SSE version */ - float t = isect_t; - - /* fetch node data */ - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); - - /* intersect ray against child nodes */ - NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - - /* decide which nodes to traverse next */ - traverseChild0 = (c0max >= c0min); - traverseChild1 = (c1max >= c1min); - -#else // __KERNEL_SSE2__ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; - const float4 cnodes = ((float4*)bvh_nodes)[3]; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - /* decide which nodes to traverse next */ - traverseChild0 = (movemask(lrhit) & 1); - traverseChild1 = (movemask(lrhit) & 2); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.x); - nodeAddrChild1 = __float_as_int(cnodes.y); - - if(traverseChild0 && traverseChild1) { - /* both children were intersected, push the farther one */ -#if !defined(__KERNEL_SSE2__) - bool closestChild1 = (c1min < c0min); -#else - bool closestChild1 = tminmax[1] < tminmax[0]; -#endif - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* one child was intersected */ - if(traverseChild1) { - nodeAddr = nodeAddrChild1; - } - else if(!traverseChild0) { - /* neither child was intersected */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - triangle_intersect_subsurface(kg, - &isect_precalc, - ss_isect, - P, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - motion_triangle_intersect_subsurface(kg, - ss_isect, - P, - dir, - ray->time, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#endif - default: { - break; - } - } - } - } while(nodeAddr != ENTRYPOINT_SENTINEL); - } while(nodeAddr != ENTRYPOINT_SENTINEL); -} - -ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h deleted file mode 100644 index ae919ef3f86..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_traversal.h +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2013, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_traversal.h" -#endif - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, - float difl, - float extmax -#endif - ) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - - BVH_DEBUG_INIT(); - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - bool traverseChild0, traverseChild1; - int nodeAddrChild1; - -#if !defined(__KERNEL_SSE2__) - /* Intersect two child bounding boxes, non-SSE version */ - float t = isect->t; - - /* fetch node data */ - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); - - /* intersect ray against child nodes */ - NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } -# endif - - /* decide which nodes to traverse next */ -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility); -# else - traverseChild0 = (c0max >= c0min); - traverseChild1 = (c1max >= c1min); -# endif - -#else // __KERNEL_SSE2__ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; - const float4 cnodes = ((float4*)bvh_nodes)[3]; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - float4 *tminmaxview = (float4*)&tminmax; - float &c0min = tminmaxview->x, &c1min = tminmaxview->y; - float &c0max = tminmaxview->z, &c1max = tminmaxview->w; - - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } -# endif - - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - /* decide which nodes to traverse next */ -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility); -# else - traverseChild0 = (movemask(lrhit) & 1); - traverseChild1 = (movemask(lrhit) & 2); -# endif -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.x); - nodeAddrChild1 = __float_as_int(cnodes.y); - - if(traverseChild0 && traverseChild1) { - /* both children were intersected, push the farther one */ -#if !defined(__KERNEL_SSE2__) - bool closestChild1 = (c1min < c0min); -#else - bool closestChild1 = tminmax[1] < tminmax[0]; -#endif - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* one child was intersected */ - if(traverseChild1) { - nodeAddr = nodeAddrChild1; - } - else if(!traverseChild0) { - /* neither child was intersected */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - BVH_DEBUG_NEXT_STEP(); - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { - /* shadow ray early termination */ -#if defined(__KERNEL_SSE2__) - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -#else - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; -#endif - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { - /* shadow ray early termination */ -# if defined(__KERNEL_SSE2__) - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# else - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; -# endif - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - else - hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - if(hit) { - /* shadow ray early termination */ -# if defined(__KERNEL_SSE2__) - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# else - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; -# endif - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* instance pop */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} - -ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, - float difl, - float extmax -#endif - ) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax -#endif - ); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax -#endif - ); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h deleted file mode 100644 index f3edf85d723..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_volume.h +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -#include "geom_qbvh_volume.h" -#endif - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - bool traverseChild0, traverseChild1; - int nodeAddrChild1; - -#if !defined(__KERNEL_SSE2__) - /* Intersect two child bounding boxes, non-SSE version */ - float t = isect->t; - - /* fetch node data */ - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); - - /* intersect ray against child nodes */ - NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - - /* decide which nodes to traverse next */ - traverseChild0 = (c0max >= c0min); - traverseChild1 = (c1max >= c1min); - -#else // __KERNEL_SSE2__ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; - const float4 cnodes = ((float4*)bvh_nodes)[3]; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - /* decide which nodes to traverse next */ - traverseChild0 = (movemask(lrhit) & 1); - traverseChild1 = (movemask(lrhit) & 2); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.x); - nodeAddrChild1 = __float_as_int(cnodes.y); - - if(traverseChild0 && traverseChild1) { - /* both children were intersected, push the farther one */ -#if !defined(__KERNEL_SSE2__) - bool closestChild1 = (c1min < c0min); -#else - bool closestChild1 = tminmax[1] < tminmax[0]; -#endif - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* one child was intersected */ - if(traverseChild1) { - nodeAddr = nodeAddrChild1; - } - else if(!traverseChild0) { - /* neither child was intersected */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); - } - break; - } -#endif - default: { - break; - } - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* instance pop */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_MOTION) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} - -ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h deleted file mode 100644 index ec837212471..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_volume_all.h +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -#include "geom_qbvh_volume_all.h" -#endif - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - uint num_hits = 0; - isect_array->t = tmax; - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - bool traverseChild0, traverseChild1; - int nodeAddrChild1; - -#if !defined(__KERNEL_SSE2__) - /* Intersect two child bounding boxes, non-SSE version */ - float t = isect_array->t; - - /* fetch node data */ - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); - - /* intersect ray against child nodes */ - NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; - NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; - NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; - NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; - NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; - NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; - NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - - /* decide which nodes to traverse next */ - traverseChild0 = (c0max >= c0min); - traverseChild1 = (c1max >= c1min); - -#else // __KERNEL_SSE2__ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; - const float4 cnodes = ((float4*)bvh_nodes)[3]; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - /* decide which nodes to traverse next */ - traverseChild0 = (movemask(lrhit) & 1); - traverseChild1 = (movemask(lrhit) & 2); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.x); - nodeAddrChild1 = __float_as_int(cnodes.y); - - if(traverseChild0 && traverseChild1) { - /* both children were intersected, push the farther one */ -#if !defined(__KERNEL_SSE2__) - bool closestChild1 = (c1min < c0min); -#else - bool closestChild1 = tminmax[1] < tminmax[0]; -#endif - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* one child was intersected */ - if(traverseChild1) { - nodeAddr = nodeAddrChild1; - } - else if(!traverseChild0) { - /* neither child was intersected */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - bool hit; - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -#if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -# if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -# endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -# if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#endif /* BVH_MOTION */ - default: { - break; - } - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - num_hits_in_instance = 0; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - if(num_hits_in_instance) { - float t_fac; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { - float ignore_t = FLT_MAX; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - } - - isect_t = tmax; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_MOTION) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return num_hits; -} - -ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect_array, - max_hits, - visibility); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect_array, - max_hits, - visibility); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 8894843997c..292e1bfca0e 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -450,8 +450,8 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect else if(level == 1) { /* the maximum recursion depth is reached. - * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. - * dP* is reversed if necessary.*/ + * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. + * dP* is reversed if necessary.*/ float t = isect->t; float u = 0.0f; float gd = 0.0f; diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index ffe55529110..2fb8e219884 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -47,13 +47,13 @@ ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; } -ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3]) +ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3]) { if(step == numsteps) { /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); + verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); + verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); } else { /* center step not store in this array */ @@ -62,19 +62,19 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, float3 offset += step*numverts; - verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); - verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); - verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); + verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); } } -ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3]) +ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3]) { if(step == numsteps) { /* center step: regular vertex location */ - normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); - normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); - normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z))); + normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); } else { /* center step not stored in this array */ @@ -83,9 +83,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, float offset += step*numverts; - normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); - normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); - normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); + normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); } } @@ -107,7 +107,7 @@ ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, i /* fetch vertex coordinates */ float3 next_verts[3]; - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); @@ -259,7 +259,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD /* fetch vertex coordinates */ float3 verts[3], next_verts[3]; - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim))); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h deleted file mode 100644 index 2a2d7822eee..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -struct QBVHStackItem { - int addr; - float dist; -}; - -/* TOOD(sergey): Investigate if using intrinsics helps for both - * stack item swap and float comparison. - */ -ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a, - QBVHStackItem *__restrict b) -{ - QBVHStackItem tmp = *a; - *a = *b; - *b = tmp; -} - -ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, - QBVHStackItem *__restrict s2, - QBVHStackItem *__restrict s3) -{ - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } - if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } -} - -ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, - QBVHStackItem *__restrict s2, - QBVHStackItem *__restrict s3, - QBVHStackItem *__restrict s4) -{ - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } - if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); } - if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); } - if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); } - if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } -} - -ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& org_idir, -#else - const sse3f& org, -#endif - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - ssef *__restrict dist) -{ - const int offset = nodeAddr*BVH_QNODE_SIZE; -#ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z); -#else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z; -#endif - -#ifdef __KERNEL_SSE41__ - const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear)); - const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar)); - const sseb vmask = cast(tNear) > cast(tFar); - int mask = (int)movemask(vmask)^0xf; -#else - const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); - const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); - const sseb vmask = tNear <= tFar; - int mask = (int)movemask(vmask); -#endif - *dist = tNear; - return mask; -} - -ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& P_idir, -#else - const sse3f& P, -#endif - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - const float difl, - ssef *__restrict dist) -{ - const int offset = nodeAddr*BVH_QNODE_SIZE; -#ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z); -#else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z; -#endif - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); - const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); - const sseb vmask = round_down*tNear <= round_up*tFar; - *dist = tNear; - return (int)movemask(vmask); -} diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h deleted file mode 100644 index edb5b5c78c3..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) -{ - /* TODO(sergey): - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - *num_hits = 0; - isect_array->t = tmax; - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - ssef tnear(0.0f), tfar(tmax); - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - ssef dist; - int traverseChild = qbvh_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#else - org, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } -#endif - - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - while(primAddr < primAddr2) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - - bool hit; - - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ - - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); - break; - } -#endif -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - else - hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - break; - } -#endif - default: { - hit = false; - break; - } - } - - /* Shadow ray early termination. */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ - - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; - -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - - isect_array->t = isect_t; - } - - primAddr++; - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - num_hits_in_instance = 0; - isect_array->t = isect_t; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - if(num_hits_in_instance) { - float t_fac; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - - /* scale isect->t to adjust for instancing */ - for(int i = 0; i < num_hits_in_instance; i++) - (isect_array-i-1)->t *= t_fac; - } - else { - float ignore_t = FLT_MAX; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - } - - isect_t = tmax; - isect_array->t = isect_t; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(tmax); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return false; -} diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h deleted file mode 100644 index 84512a8783c..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function for subsurface scattering, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps (for non shadow rays). - * - Separate version for shadow rays. - * - Likely and unlikely for if() statements. - * - SSE for hair. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; - - ss_isect->num_hits = 0; - - const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); - if(!(object_flag & SD_TRANSFORM_APPLIED)) { -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - bvh_instance_motion_push(kg, - subsurface_object, - ray, - &P, - &dir, - &idir, - &isect_t, - &ob_itfm); -#else - bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); -#endif - object = subsurface_object; - } - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return; - } -#endif - - ssef tnear(0.0f), tfar(isect_t); - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - ssef dist; - int traverseChild = qbvh_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#else - org, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* Intersect ray against primitive, */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - triangle_intersect_subsurface(kg, - &isect_precalc, - ss_isect, - P, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* Intersect ray against primitive. */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - motion_triangle_intersect_subsurface(kg, - ss_isect, - P, - dir, - ray->time, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#endif - default: - break; - } - } - } while(nodeAddr != ENTRYPOINT_SENTINEL); - } while(nodeAddr != ENTRYPOINT_SENTINEL); -} diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h deleted file mode 100644 index 738d08ac6fc..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - ,uint *lcg_state, - float difl, - float extmax -#endif - ) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps (for non shadow rays). - * - Separate version for shadow rays. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - traversalStack[0].dist = -FLT_MAX; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - float nodeDist = -FLT_MAX; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - - BVH_DEBUG_INIT(); - - ssef tnear(0.0f), tfar(ray->t); - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - if(UNLIKELY(nodeDist > isect->t)) { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - continue; - } - - int traverseChild; - ssef dist; - - BVH_DEBUG_NEXT_STEP(); - -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - /* NOTE: We extend all the child BB instead of fetching - * and checking visibility flags for each of the, - * - * Need to test if doing opposite would be any faster. - */ - traverseChild = qbvh_node_intersect_robust(kg, - tnear, - tfar, -# ifdef __KERNEL_AVX2__ - P_idir4, -# else - org, -# endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - difl, - &dist); - } - else -#endif /* BVH_HAIR_MINIMUM_WIDTH */ - { - traverseChild = qbvh_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#else - org, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - } - - if(traverseChild != 0) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - float d0 = ((float*)&dist)[r]; - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - nodeDist = d0; - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - nodeDist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - nodeDist = d0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); - -#ifdef __VISIBILITY_FLAG__ - if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0))) -#else - if(UNLIKELY((nodeDist > isect->t))) -#endif - { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - continue; - } - - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - else - hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - if(hit) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm); -# else - qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - traversalStack[stackPtr].dist = -FLT_MAX; - - nodeAddr = kernel_tex_fetch(__object_node, object); - - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h deleted file mode 100644 index ab2e530dd20..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_volume.h +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - - ssef tnear(0.0f), tfar(ray->t); - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - ssef dist; - int traverseChild = qbvh_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#else - org, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); - } - break; - } -#endif - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h deleted file mode 100644 index 5546471b0e3..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - uint num_hits = 0; - isect_array->t = tmax; - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - ssef tnear(0.0f), tfar(isect_t); - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - ssef dist; - int traverseChild = qbvh_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#else - org, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - bool hit; - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -#if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -# if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -# endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -# if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#endif - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - num_hits_in_instance = 0; - isect_array->t = isect_t; - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { - float ignore_t = FLT_MAX; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - } - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - isect_t = tmax; - isect_array->t = isect_t; - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return num_hits; -} diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 995dfac5b09..0c2351e1d1b 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -27,12 +27,11 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { /* load triangle vertices */ - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); + const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); + const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); - /* return normal */ if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED) return normalize(cross(v2 - v0, v1 - v0)); @@ -44,11 +43,10 @@ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { /* load triangle vertices */ - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); + float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); + float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); /* compute point */ float t = 1.0f - u - v; @@ -71,11 +69,10 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3]) { - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); + P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); + P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); } /* Interpolate smooth vertex normal from vertices */ @@ -83,11 +80,10 @@ ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3 ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v) { /* load triangle vertices */ - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); - float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); - float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z))); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); return normalize((1.0f - u - v)*n2 + u*n0 + v*n1); } @@ -97,11 +93,10 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv) { /* fetch triangle vertex coordinates */ - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); + const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); + const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); /* compute derivatives of P w.r.t. uv */ *dPdu = (p0 - p2); @@ -119,11 +114,11 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); } else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); - float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x)); - float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y)); - float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z)); + float f0 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.x); + float f1 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.y); + float f2 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; @@ -162,11 +157,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); } else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index b6dfc769012..fc081bda525 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -106,9 +106,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - const float4 tri_a = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+0), - tri_b = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+1), - tri_c = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+2); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); @@ -202,9 +203,10 @@ ccl_device_inline void triangle_intersect_subsurface( const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - const float4 tri_a = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+0), - tri_b = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+1), - tri_c = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+2); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); @@ -324,9 +326,10 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, P = P + D*t; - const float4 tri_a = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+0), - tri_b = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+1), - tri_c = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+2); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -381,9 +384,10 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, P = P + D*t; #ifdef __INTERSECTION_REFINE__ - const float4 tri_a = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+0), - tri_b = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+1), - tri_c = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+2); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); |