diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh.h | 76 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_volume_all.h | 454 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_volume_all.h | 446 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 15 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_volume.h | 63 |
7 files changed, 1043 insertions, 14 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index fd690234bc1..83b3450fc1c 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -119,6 +119,7 @@ set(SRC_GEOM_HEADERS geom/geom_bvh_subsurface.h geom/geom_bvh_traversal.h geom/geom_bvh_volume.h + geom/geom_bvh_volume_all.h geom/geom_curve.h geom/geom_motion_curve.h geom/geom_motion_triangle.h @@ -129,6 +130,7 @@ set(SRC_GEOM_HEADERS geom/geom_qbvh_subsurface.h geom/geom_qbvh_traversal.h geom/geom_qbvh_volume.h + geom/geom_qbvh_volume_all.h geom/geom_triangle.h geom/geom_triangle_intersect.h geom/geom_volume.h diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h index c0eefcd9c7f..c2610c7d92c 100644 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -179,6 +179,38 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_volume.h" #endif +/* Record all BVH intersection for volumes */ + +#if defined(__VOLUME_RECORD_ALL__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +#include "geom_bvh_volume_all.h" +#endif + #undef BVH_FEATURE #undef BVH_NAME_JOIN #undef BVH_NAME_EVAL @@ -330,6 +362,37 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, } #endif +#ifdef __VOLUME_RECORD_ALL__ +ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint max_hits) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_all_hair_motion(kg, ray, isect, max_hits); +#endif /* __HAIR__ */ + + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_all_hair(kg, ray, isect, max_hits); +#endif /* __HAIR__ */ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits); +#endif /* __INSTANCING__ */ + + return bvh_intersect_volume_all(kg, ray, isect, max_hits); +} +#endif + /* Ray offset to avoid self intersection. * @@ -384,5 +447,18 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) #endif } +ccl_device int intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection*)a; + const Intersection *isect_b = (const Intersection*)b; + + if(isect_a->t < isect_b->t) + return -1; + else if(isect_a->t > isect_b->t) + return 1; + else + return 0; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h new file mode 100644 index 00000000000..b6db36f4b17 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_bvh_volume_all.h @@ -0,0 +1,454 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +#include "geom_qbvh_volume_all.h" +#endif + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + bool traverseChild0, traverseChild1; + int nodeAddrChild1; + +#if !defined(__KERNEL_SSE2__) + /* Intersect two child bounding boxes, non-SSE version */ + float t = isect_array->t; + + /* fetch node data */ + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); + + /* intersect ray against child nodes */ + NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + /* decide which nodes to traverse next */ + traverseChild0 = (c0max >= c0min); + traverseChild1 = (c1max >= c1min); + +#else // __KERNEL_SSE2__ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + + /* fetch node data */ + const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + const float4 cnodes = ((float4*)bvh_nodes)[3]; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + /* decide which nodes to traverse next */ + traverseChild0 = (movemask(lrhit) & 1); + traverseChild1 = (movemask(lrhit) & 2); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.x); + nodeAddrChild1 = __float_as_int(cnodes.y); + + if(traverseChild0 && traverseChild1) { + /* both children were intersected, push the farther one */ +#if !defined(__KERNEL_SSE2__) + bool closestChild1 = (c1min < c0min); +#else + bool closestChild1 = tminmax[1] < tminmax[0]; +#endif + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + else if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + bool hit; + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +#else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + default: { + break; + } + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + isect_t = tmax; + isect_array->t = isect_t; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + max_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + max_hits); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h new file mode 100644 index 00000000000..d5131919944 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h @@ -0,0 +1,446 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + + ssef tnear(0.0f), tfar(isect_t); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + int traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + bool hit; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +#else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ + if(num_hits_in_instance) { + float t_fac; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + isect_t = tmax; + isect_array->t = isect_t; + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 8923fcebee5..d7c4fa02bcf 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -39,19 +39,6 @@ CCL_NAMESPACE_BEGIN * This is CPU only because of qsort, and malloc or high stack space usage to * record all these intersections. */ -ccl_device_noinline int shadow_intersections_compare(const void *a, const void *b) -{ - const Intersection *isect_a = (const Intersection*)a; - const Intersection *isect_b = (const Intersection*)b; - - if(isect_a->t < isect_b->t) - return -1; - else if(isect_a->t > isect_b->t) - return 1; - else - return 0; -} - #define STACK_MAX_HITS 64 ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) @@ -95,7 +82,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * PathState ps = *state; #endif - qsort(hits, num_hits, sizeof(Intersection), shadow_intersections_compare); + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); for(int hit = 0; hit < num_hits; hit++, isect++) { /* adjust intersection distance for moving ray forward */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index f4f2e22edaa..b948f7de2f4 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -72,6 +72,7 @@ CCL_NAMESPACE_BEGIN #define __VOLUME_DECOUPLED__ #define __VOLUME_SCATTER__ #define __SHADOW_RECORD_ALL__ +#define __VOLUME_RECORD_ALL__ #endif #ifdef __KERNEL_CUDA__ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 32c7e4eef09..e06568457c6 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -993,6 +993,48 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, volume_ray.t = FLT_MAX; int stack_index = 0, enclosed_index = 0; + +#ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2*VOLUME_STACK_SIZE]; + uint num_hits = scene_intersect_volume_all(kg, + &volume_ray, + hits, + 2*VOLUME_STACK_SIZE); + if(num_hits > 0) { + int enclosed_volumes[VOLUME_STACK_SIZE]; + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for(uint hit = 0; hit < num_hits; ++hit, ++isect) { + ShaderData sd; + shader_setup_from_ray(kg, &sd, isect, &volume_ray, 0, 0); + if(sd.flag & SD_BACKFACING) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + bool is_enclosed = false; + for(int i = 0; i < enclosed_index; ++i) { + if(enclosed_volumes[i] == sd.object) { + is_enclosed = true; + break; + } + } + if(is_enclosed == false) { + stack[stack_index].object = sd.object; + stack[stack_index].shader = sd.shader; + ++stack_index; + } + } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = sd.object; + } + } + } +#else int enclosed_volumes[VOLUME_STACK_SIZE]; int step = 0; @@ -1035,6 +1077,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, volume_ray.P = ray_offset(sd.P, -sd.Ng); ++step; } +#endif /* stack_index of 0 means quick checks outside of the kernel gave false * positive, nothing to worry about, just we've wasted quite a few of * ticks just to come into conclusion that camera is in the air. @@ -1105,6 +1148,25 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, kernel_assert(kernel_data.integrator.use_volumes); Ray volume_ray = *ray; + +#ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2*VOLUME_STACK_SIZE]; + uint num_hits = scene_intersect_volume_all(kg, + &volume_ray, + hits, + 2*VOLUME_STACK_SIZE); + if(num_hits > 0) { + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for(uint hit = 0; hit < num_hits; ++hit, ++isect) { + ShaderData sd; + shader_setup_from_ray(kg, &sd, isect, &volume_ray, 0, 0); + kernel_volume_stack_enter_exit(kg, &sd, stack); + } + } +#else Intersection isect; int step = 0; while(step < 2 * VOLUME_STACK_SIZE && @@ -1119,6 +1181,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, volume_ray.t -= sd.ray_length; ++step; } +#endif } #endif |