Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2014-12-25 20:40:02 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2014-12-25 20:40:02 +0300
commitcd095aae139ecbcfdf2103f635eae8d5bc5f3b8e (patch)
tree655ab197b47b076e8903f586ad22c061e5215abe /intern
parent30e3aa1561ef0b2b1fdebc343b628b6bbf6365c5 (diff)
Cycles: Distance optimization for QBVH
This commit implements heuristic which allows to skip nodes pushed to the stack from intersection if distance to them is larger than the distance to the current intersection. This should solve speed regression which i didn't notice in the original QBVH commit (which could have because i had WIP version of this patch applied in my local branch). From quick tests speed seems to be much closer to what is was with regular BVH. There's still some possible code cleanup, but they'll need a bit of assembly code check and now i want to make it so artists can happily use Cycles over the holidays.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/geom/geom_object.h59
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh.h43
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_shadow.h41
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_subsurface.h43
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_traversal.h71
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume.h43
6 files changed, 201 insertions, 99 deletions
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 91edd5863ac..79a56683454 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -391,6 +391,38 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra
*t *= len;
}
+#ifdef __QBVH__
+/* Same as above, but optimized for QBVH scene intersection,
+ * which needs to modify two max distances.
+ *
+ * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
+ * so we can avoid having this duplication.
+ */
+ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
+ int object,
+ const Ray *ray,
+ float3 *P,
+ float3 *dir,
+ float3 *idir,
+ float *t,
+ float *t1)
+{
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+
+ *P = transform_point(&tfm, ray->P);
+
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
+
+ if(*t != FLT_MAX)
+ *t *= len;
+
+ if(*t1 != -FLT_MAX)
+ *t1 *= len;
+}
+#endif
+
/* Transorm ray to exit static object in BVH */
ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
@@ -436,6 +468,33 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, c
*t *= len;
}
+#ifdef __QBVH__
+/* Same as above, but optimized for QBVH scene intersection,
+ * which needs to modify two max distances.
+ *
+ * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
+ * so we can avoid having this duplication.
+ */
+ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, float *t1, Transform *tfm)
+{
+ Transform itfm;
+ *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
+
+ *P = transform_point(&itfm, ray->P);
+
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
+
+
+ if(*t != FLT_MAX)
+ *t *= len;
+
+ if(*t1 != -FLT_MAX)
+ *t1 *= len;
+}
+#endif
+
/* Transorm ray to exit motion blurred object in BVH */
ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
index a1dd89c41ca..7a354379bed 100644
--- a/intern/cycles/kernel/geom/geom_qbvh.h
+++ b/intern/cycles/kernel/geom/geom_qbvh.h
@@ -14,32 +14,31 @@
* limitations under the License.
*/
-ccl_device_inline void qbvh_stack_sort(int *__restrict s1,
- int *__restrict s2,
- int *__restrict s3,
- float *__restrict d1,
- float *__restrict d2,
- float *__restrict d3)
+struct QBVHStackItem {
+ int addr;
+ float dist;
+};
+
+/* TOOD(sergey): Investigate if using instrinsics helps here. */
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
+ QBVHStackItem *__restrict s2,
+ QBVHStackItem *__restrict s3)
{
- if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
- if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); }
- if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
+ if(s2->dist < s1->dist) { util_swap(s2, s1); }
+ if(s3->dist < s2->dist) { util_swap(s3, s2); }
+ if(s2->dist < s1->dist) { util_swap(s2, s1); }
}
-ccl_device_inline void qbvh_stack_sort(int *__restrict s1,
- int *__restrict s2,
- int *__restrict s3,
- int *__restrict s4,
- float *__restrict d1,
- float *__restrict d2,
- float *__restrict d3,
- float *__restrict d4)
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
+ QBVHStackItem *__restrict s2,
+ QBVHStackItem *__restrict s3,
+ QBVHStackItem *__restrict s4)
{
- if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
- if(*d4 < *d3) { util_swap(s4, s3); util_swap(d4, d3); }
- if(*d3 < *d1) { util_swap(s3, s1); util_swap(d3, d1); }
- if(*d4 < *d2) { util_swap(s4, s2); util_swap(d4, d2); }
- if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); }
+ if(s2->dist < s1->dist) { util_swap(s2, s1); }
+ if(s4->dist < s3->dist) { util_swap(s4, s3); }
+ if(s3->dist < s1->dist) { util_swap(s3, s1); }
+ if(s4->dist < s2->dist) { util_swap(s4, s2); }
+ if(s3->dist < s2->dist) { util_swap(s3, s2); }
}
ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
index f8279996450..2d1ad498972 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
@@ -39,8 +39,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversalStack[BVH_STACK_SIZE];
+ traversalStack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
@@ -128,13 +128,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -143,9 +145,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = c1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = c0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -155,12 +159,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &d2, &d1, &d0);
- nodeAddr = traversalStack[stackPtr];
+ &traversalStack[stackPtr - 2]);
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
continue;
}
@@ -172,17 +176,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
- traversalStack[stackPtr] = c3;
+ traversalStack[stackPtr].addr = c3;
+ traversalStack[stackPtr].dist = d3;
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3],
- &d3, &d2, &d1, &d0);
+ &traversalStack[stackPtr - 3]);
}
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -197,7 +202,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
#ifdef __VISIBILITY_FLAG__
@@ -315,7 +320,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
@@ -368,7 +373,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
index bc43d81f9d3..acb1bbd01c0 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -42,8 +42,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversalStack[BVH_STACK_SIZE];
+ traversalStack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
@@ -124,13 +124,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -139,9 +141,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -151,12 +155,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &d2, &d1, &d0);
- nodeAddr = traversalStack[stackPtr];
+ &traversalStack[stackPtr - 2]);
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
continue;
}
@@ -168,17 +172,18 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
- traversalStack[stackPtr] = c3;
+ traversalStack[stackPtr].addr = c3;
+ traversalStack[stackPtr].dist = d3;
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3],
- &d3, &d2, &d1, &d0);
+ &traversalStack[stackPtr - 3]);
}
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -193,7 +198,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
/* Primitive intersection. */
@@ -250,13 +255,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -290,7 +295,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
index 56289900e80..fdb22725ceb 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
@@ -47,12 +47,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversalStack[BVH_STACK_SIZE];
+ traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+ traversalStack[0].dist = -FLT_MAX;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
+ float nodeDist = -FLT_MAX;
/* Ray parameters in registers. */
float3 P = ray->P;
@@ -100,6 +102,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ if(UNLIKELY(nodeDist > isect->t)) {
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr].addr;
+ nodeDist = traversalStack[stackPtr].dist;
+ --stackPtr;
+ continue;
+ }
+
int traverseChild;
ssef dist;
@@ -152,8 +162,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
+ float d0 = ((float*)&dist)[r];
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
+ nodeDist = d0;
continue;
}
@@ -161,7 +173,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
r = __bscf(traverseChild);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
@@ -169,13 +180,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -184,9 +197,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -196,12 +211,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &d2, &d1, &d0);
- nodeAddr = traversalStack[stackPtr];
+ &traversalStack[stackPtr - 2]);
+ nodeAddr = traversalStack[stackPtr].addr;
+ nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
continue;
}
@@ -213,22 +229,32 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
- traversalStack[stackPtr] = c3;
+ traversalStack[stackPtr].addr = c3;
+ traversalStack[stackPtr].dist = d3;
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3],
- &d3, &d2, &d1, &d0);
+ &traversalStack[stackPtr - 3]);
}
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
+ nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
+ if(UNLIKELY(nodeDist > isect->t)) {
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr].addr;
+ nodeDist = traversalStack[stackPtr].dist;
+ --stackPtr;
+ continue;
+ }
+
float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
int primAddr = __float_as_int(leaf.x);
@@ -238,7 +264,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
+ nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
#ifdef __VISIBILITY_FLAG__
@@ -299,9 +326,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+ qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_tfm);
#else
- bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+ qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
#endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
@@ -318,7 +345,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+ traversalStack[stackPtr].dist = -FLT_MAX;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
@@ -351,7 +379,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
+ nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h
index 3630436bddc..21466cca92a 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h
@@ -38,8 +38,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversalStack[BVH_STACK_SIZE];
+ traversalStack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
@@ -130,13 +130,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -145,9 +147,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
- traversalStack[stackPtr] = c1;
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
++stackPtr;
- traversalStack[stackPtr] = c0;
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -157,12 +161,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &d2, &d1, &d0);
- nodeAddr = traversalStack[stackPtr];
+ &traversalStack[stackPtr - 2]);
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
continue;
}
@@ -174,17 +178,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
- traversalStack[stackPtr] = c3;
+ traversalStack[stackPtr].addr = c3;
+ traversalStack[stackPtr].dist = d3;
++stackPtr;
- traversalStack[stackPtr] = c2;
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3],
- &d3, &d2, &d1, &d0);
+ &traversalStack[stackPtr - 3]);
}
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -199,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
/* Primitive intersection. */
@@ -270,14 +275,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
}
@@ -310,7 +315,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
+ nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */