Cycles: Move BVK kernel files to own directory

BVH traversal is not really that much a geometry and we've got quite some traversals now. Makes sense to keep them separate in the name of source structure clarity.
author: Sergey Sharybin <sergey.vfx@gmail.com> 2016-07-11 13:28:45 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2016-07-11 14:58:47 +0300
commit: 4355603790712032e89fa4da6d8ce7f3ede62b4f (patch)
tree: 146102d7c52475a2c334e7b2a5a0e5b61a3b8153 /intern/cycles/kernel/bvh/qbvh_traversal.h
parent: c58ae20f6ce2af2a2b71d477917d2a272e47260f (diff)
1 files changed, 465 insertions, 0 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
new file mode 100644
index 00000000000..24bf85f46c8
--- /dev/null
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -0,0 +1,465 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT qbvh_node_intersect
+#  define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust
+#else
+#  define NODE_INTERSECT qbvh_aligned_node_intersect
+#  define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust
+#endif
+
+ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+                                             const Ray *ray,
+                                             Intersection *isect,
+                                             const uint visibility
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+                                             ,uint *lcg_state,
+                                             float difl,
+                                             float extmax
+#endif
+                                             )
+{
+	/* TODO(sergey):
+	 * - Test if pushing distance on the stack helps (for non shadow rays).
+	 * - Separate version for shadow rays.
+	 * - Likely and unlikely for if() statements.
+	 * - Test restrict attribute for pointers.
+	 */
+
+	/* Traversal stack in CUDA thread-local memory. */
+	QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
+	traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+	traversalStack[0].dist = -FLT_MAX;
+
+	/* Traversal variables in registers. */
+	int stackPtr = 0;
+	int nodeAddr = kernel_data.bvh.root;
+	float nodeDist = -FLT_MAX;
+
+	/* Ray parameters in registers. */
+	float3 P = ray->P;
+	float3 dir = bvh_clamp_direction(ray->D);
+	float3 idir = bvh_inverse_direction(dir);
+	int object = OBJECT_NONE;
+
+#if BVH_FEATURE(BVH_MOTION)
+	Transform ob_itfm;
+#endif
+
+#ifndef __KERNEL_SSE41__
+	if(!isfinite(P.x)) {
+		return false;
+	}
+#endif
+
+	isect->t = ray->t;
+	isect->u = 0.0f;
+	isect->v = 0.0f;
+	isect->prim = PRIM_NONE;
+	isect->object = OBJECT_NONE;
+
+	BVH_DEBUG_INIT();
+
+	ssef tnear(0.0f), tfar(ray->t);
+#if BVH_FEATURE(BVH_HAIR)
+	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
+	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+	float3 P_idir = P*idir;
+	sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+	sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+	/* Offsets to select the side that becomes the lower or upper bound. */
+	int near_x, near_y, near_z;
+	int far_x, far_y, far_z;
+
+	if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+	if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+	if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+	IsectPrecalc isect_precalc;
+	triangle_intersect_precalc(dir, &isect_precalc);
+
+	/* Traversal loop. */
+	do {
+		do {
+			/* Traverse internal nodes. */
+			while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+				float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+
+				if(UNLIKELY(nodeDist > isect->t)
+#ifdef __VISIBILITY_FLAG__
+				   || (__float_as_uint(inodes.x) & visibility) == 0)
+#endif
+				{
+					/* Pop. */
+					nodeAddr = traversalStack[stackPtr].addr;
+					nodeDist = traversalStack[stackPtr].dist;
+					--stackPtr;
+					continue;
+				}
+
+				int traverseChild;
+				ssef dist;
+
+				BVH_DEBUG_NEXT_STEP();
+
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+				if(difl != 0.0f) {
+					/* NOTE: We extend all the child BB instead of fetching
+					 * and checking visibility flags for each of the,
+					 *
+					 * Need to test if doing opposite would be any faster.
+					 */
+					traverseChild = NODE_INTERSECT_ROBUST(kg,
+					                                      tnear,
+					                                      tfar,
+#  ifdef __KERNEL_AVX2__
+					                                      P_idir4,
+#  endif
+#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+					                                      org4,
+#  endif
+#  if BVH_FEATURE(BVH_HAIR)
+					                                      dir4,
+#  endif
+					                                      idir4,
+					                                      near_x, near_y, near_z,
+					                                      far_x, far_y, far_z,
+					                                      nodeAddr,
+					                                      difl,
+					                                      &dist);
+				}
+				else
+#endif  /* BVH_HAIR_MINIMUM_WIDTH */
+				{
+					traverseChild = NODE_INTERSECT(kg,
+					                               tnear,
+					                               tfar,
+#ifdef __KERNEL_AVX2__
+					                               P_idir4,
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+					                               org4,
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+					                               dir4,
+#endif
+					                               idir4,
+					                               near_x, near_y, near_z,
+					                               far_x, far_y, far_z,
+					                               nodeAddr,
+					                               &dist);
+				}
+
+				if(traverseChild != 0) {
+					float4 cnodes;
+					/* TODO(sergey): Investigate whether moving cnodes upwards
+					 * gives a speedup (will be different cache pattern but will
+					 * avoid extra check here),
+					 */
+#if BVH_FEATURE(BVH_HAIR)
+					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+						cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
+					}
+					else
+#endif
+					{
+						cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+					}
+
+					/* One child is hit, continue with that child. */
+					int r = __bscf(traverseChild);
+					float d0 = ((float*)&dist)[r];
+					if(traverseChild == 0) {
+						nodeAddr = __float_as_int(cnodes[r]);
+						nodeDist = d0;
+						continue;
+					}
+
+					/* Two children are hit, push far child, and continue with
+					 * closer child.
+					 */
+					int c0 = __float_as_int(cnodes[r]);
+					r = __bscf(traverseChild);
+					int c1 = __float_as_int(cnodes[r]);
+					float d1 = ((float*)&dist)[r];
+					if(traverseChild == 0) {
+						if(d1 < d0) {
+							nodeAddr = c1;
+							nodeDist = d1;
+							++stackPtr;
+							kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+							traversalStack[stackPtr].addr = c0;
+							traversalStack[stackPtr].dist = d0;
+							continue;
+						}
+						else {
+							nodeAddr = c0;
+							nodeDist = d0;
+							++stackPtr;
+							kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+							traversalStack[stackPtr].addr = c1;
+							traversalStack[stackPtr].dist = d1;
+							continue;
+						}
+					}
+
+					/* Here starts the slow path for 3 or 4 hit children. We push
+					 * all nodes onto the stack to sort them there.
+					 */
+					++stackPtr;
+					kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+					traversalStack[stackPtr].addr = c1;
+					traversalStack[stackPtr].dist = d1;
+					++stackPtr;
+					kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+					traversalStack[stackPtr].addr = c0;
+					traversalStack[stackPtr].dist = d0;
+
+					/* Three children are hit, push all onto stack and sort 3
+					 * stack items, continue with closest child.
+					 */
+					r = __bscf(traverseChild);
+					int c2 = __float_as_int(cnodes[r]);
+					float d2 = ((float*)&dist)[r];
+					if(traverseChild == 0) {
+						++stackPtr;
+						kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+						traversalStack[stackPtr].addr = c2;
+						traversalStack[stackPtr].dist = d2;
+						qbvh_stack_sort(&traversalStack[stackPtr],
+						                &traversalStack[stackPtr - 1],
+						                &traversalStack[stackPtr - 2]);
+						nodeAddr = traversalStack[stackPtr].addr;
+						nodeDist = traversalStack[stackPtr].dist;
+						--stackPtr;
+						continue;
+					}
+
+					/* Four children are hit, push all onto stack and sort 4
+					 * stack items, continue with closest child.
+					 */
+					r = __bscf(traverseChild);
+					int c3 = __float_as_int(cnodes[r]);
+					float d3 = ((float*)&dist)[r];
+					++stackPtr;
+					kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+					traversalStack[stackPtr].addr = c3;
+					traversalStack[stackPtr].dist = d3;
+					++stackPtr;
+					kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+					traversalStack[stackPtr].addr = c2;
+					traversalStack[stackPtr].dist = d2;
+					qbvh_stack_sort(&traversalStack[stackPtr],
+					                &traversalStack[stackPtr - 1],
+					                &traversalStack[stackPtr - 2],
+					                &traversalStack[stackPtr - 3]);
+				}
+
+				nodeAddr = traversalStack[stackPtr].addr;
+				nodeDist = traversalStack[stackPtr].dist;
+				--stackPtr;
+			}
+
+			/* If node is leaf, fetch triangle list. */
+			if(nodeAddr < 0) {
+				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
+
+#ifdef __VISIBILITY_FLAG__
+				if(UNLIKELY((nodeDist > isect->t) ||
+				            ((__float_as_uint(leaf.z) & visibility) == 0)))
+#else
+				if(UNLIKELY((nodeDist > isect->t)))
+#endif
+				{
+					/* Pop. */
+					nodeAddr = traversalStack[stackPtr].addr;
+					nodeDist = traversalStack[stackPtr].dist;
+					--stackPtr;
+					continue;
+				}
+
+				int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+				if(primAddr >= 0) {
+#endif
+					int primAddr2 = __float_as_int(leaf.y);
+					const uint type = __float_as_int(leaf.w);
+
+					/* Pop. */
+					nodeAddr = traversalStack[stackPtr].addr;
+					nodeDist = traversalStack[stackPtr].dist;
+					--stackPtr;
+
+					/* Primitive intersection. */
+					switch(type & PRIMITIVE_ALL) {
+						case PRIMITIVE_TRIANGLE: {
+							for(; primAddr < primAddr2; primAddr++) {
+								BVH_DEBUG_NEXT_STEP();
+								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+								if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
+									tfar = ssef(isect->t);
+									/* Shadow ray early termination. */
+									if(visibility == PATH_RAY_SHADOW_OPAQUE)
+										return true;
+								}
+							}
+							break;
+						}
+#if BVH_FEATURE(BVH_MOTION)
+						case PRIMITIVE_MOTION_TRIANGLE: {
+							for(; primAddr < primAddr2; primAddr++) {
+								BVH_DEBUG_NEXT_STEP();
+								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+								if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
+									tfar = ssef(isect->t);
+									/* Shadow ray early termination. */
+									if(visibility == PATH_RAY_SHADOW_OPAQUE)
+										return true;
+								}
+							}
+							break;
+						}
+#endif  /* BVH_FEATURE(BVH_MOTION) */
+#if BVH_FEATURE(BVH_HAIR)
+						case PRIMITIVE_CURVE:
+						case PRIMITIVE_MOTION_CURVE: {
+							for(; primAddr < primAddr2; primAddr++) {
+								BVH_DEBUG_NEXT_STEP();
+								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+								bool hit;
+								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+									hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+								else
+									hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+								if(hit) {
+									tfar = ssef(isect->t);
+									/* Shadow ray early termination. */
+									if(visibility == PATH_RAY_SHADOW_OPAQUE)
+										return true;
+								}
+							}
+							break;
+						}
+#endif  /* BVH_FEATURE(BVH_HAIR) */
+					}
+				}
+#if BVH_FEATURE(BVH_INSTANCING)
+				else {
+					/* Instance push. */
+					object = kernel_tex_fetch(__prim_object, -primAddr-1);
+
+#  if BVH_FEATURE(BVH_MOTION)
+					qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
+#  else
+					qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
+#  endif
+
+					if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+					if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+					if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+					tfar = ssef(isect->t);
+#  if BVH_FEATURE(BVH_HAIR)
+					dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#  endif
+					idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#  ifdef __KERNEL_AVX2__
+					P_idir = P*idir;
+					P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#  endif
+#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+					org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#  endif
+
+					triangle_intersect_precalc(dir, &isect_precalc);
+
+					++stackPtr;
+					kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+					traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+					traversalStack[stackPtr].dist = -FLT_MAX;
+
+					nodeAddr = kernel_tex_fetch(__object_node, object);
+
+					BVH_DEBUG_NEXT_INSTANCE();
+				}
+			}
+#endif  /* FEATURE(BVH_INSTANCING) */
+		} while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+		if(stackPtr >= 0) {
+			kernel_assert(object != OBJECT_NONE);
+
+			/* Instance pop. */
+#  if BVH_FEATURE(BVH_MOTION)
+			bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+#  else
+			bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+#  endif
+
+			if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+			if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+			if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+			tfar = ssef(isect->t);
+#  if BVH_FEATURE(BVH_HAIR)
+			dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#  endif
+			idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#  ifdef __KERNEL_AVX2__
+			P_idir = P*idir;
+			P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#  endif
+#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#  endif
+
+			triangle_intersect_precalc(dir, &isect_precalc);
+
+			object = OBJECT_NONE;
+			nodeAddr = traversalStack[stackPtr].addr;
+			nodeDist = traversalStack[stackPtr].dist;
+			--stackPtr;
+		}
+#endif  /* FEATURE(BVH_INSTANCING) */
+	} while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+	return (isect->prim != PRIM_NONE);
+}
+
+#undef NODE_INTERSECT
+#undef NODE_INTERSECT_ROBUST
author	Sergey Sharybin <sergey.vfx@gmail.com>	2016-07-11 13:28:45 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2016-07-11 14:58:47 +0300
commit	4355603790712032e89fa4da6d8ce7f3ede62b4f (patch)
tree	146102d7c52475a2c334e7b2a5a0e5b61a3b8153 /intern/cycles/kernel/bvh/qbvh_traversal.h
parent	c58ae20f6ce2af2a2b71d477917d2a272e47260f (diff)