Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/bvh/qbvh_traversal.h')
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h310
1 files changed, 175 insertions, 135 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index 24bf85f46c8..f82ff661495 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -55,14 +55,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
- traversalStack[0].dist = -FLT_MAX;
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[0].dist = -FLT_MAX;
/* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
- float nodeDist = -FLT_MAX;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+ float node_dist = -FLT_MAX;
/* Ray parameters in registers. */
float3 P = ray->P;
@@ -117,22 +117,22 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- if(UNLIKELY(nodeDist > isect->t)
+ if(UNLIKELY(node_dist > isect->t)
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0)
#endif
{
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
continue;
}
- int traverseChild;
+ int child_mask;
ssef dist;
BVH_DEBUG_NEXT_STEP();
@@ -144,48 +144,48 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*
* Need to test if doing opposite would be any faster.
*/
- traverseChild = NODE_INTERSECT_ROBUST(kg,
- tnear,
- tfar,
+ child_mask = NODE_INTERSECT_ROBUST(kg,
+ tnear,
+ tfar,
# ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
# endif
# if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
# endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- difl,
- &dist);
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ difl,
+ &dist);
}
else
#endif /* BVH_HAIR_MINIMUM_WIDTH */
{
- traverseChild = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ &dist);
}
- if(traverseChild != 0) {
+ if(child_mask != 0) {
float4 cnodes;
/* TODO(sergey): Investigate whether moving cnodes upwards
* gives a speedup (will be different cache pattern but will
@@ -193,20 +193,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
}
else
#endif
{
- cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
}
/* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
+ int r = __bscf(child_mask);
float d0 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- nodeDist = d0;
+ if(child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ node_dist = d0;
continue;
}
@@ -214,26 +214,26 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
+ if(child_mask == 0) {
if(d1 < d0) {
- nodeAddr = c1;
- nodeDist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ node_addr = c1;
+ node_dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
- nodeAddr = c0;
- nodeDist = d0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
+ node_addr = c0;
+ node_dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@@ -241,116 +241,131 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
+ if(child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
}
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
}
/* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
#ifdef __VISIBILITY_FLAG__
- if(UNLIKELY((nodeDist > isect->t) ||
+ if(UNLIKELY((node_dist > isect->t) ||
((__float_as_uint(leaf.z) & visibility) == 0)))
#else
- if(UNLIKELY((nodeDist > isect->t)))
+ if(UNLIKELY((node_dist > isect->t)))
#endif
{
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
continue;
}
- int primAddr = __float_as_int(leaf.x);
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- int primAddr2 = __float_as_int(leaf.y);
+ int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
+ for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if(triangle_intersect(kg,
+ &isect_precalc,
+ isect,
+ P,
+ visibility,
+ object,
+ prim_addr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
+ }
}
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
+ for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if(motion_triangle_intersect(kg,
+ isect,
+ P,
+ dir,
+ ray->time,
+ visibility,
+ object,
+ prim_addr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
+ }
}
}
break;
@@ -359,19 +374,44 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
- for(; primAddr < primAddr2; primAddr++) {
+ for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = bvh_cardinal_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ else {
+ hit = bvh_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ lcg_state,
+ difl,
+ extmax);
+ }
if(hit) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
+ }
}
}
break;
@@ -382,12 +422,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
+ qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
+ qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
@@ -408,21 +448,21 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
- traversalStack[stackPtr].dist = -FLT_MAX;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[stack_ptr].dist = -FLT_MAX;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
BVH_DEBUG_NEXT_INSTANCE();
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@@ -451,12 +491,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}