Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/bvh/bvh.cpp7
-rw-r--r--intern/cycles/kernel/CMakeLists.txt5
-rw-r--r--intern/cycles/kernel/geom/geom.h1
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h5
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h26
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h29
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h37
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume.h20
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh.h138
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_shadow.h378
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_subsurface.h300
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_traversal.h361
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume.h320
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h7
-rw-r--r--intern/cycles/render/mesh.cpp8
-rw-r--r--intern/cycles/render/scene.h5
17 files changed, 1616 insertions, 33 deletions
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index b591d5973fe..e7141c9ec64 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -479,8 +479,11 @@ void BVH::pack_instances(size_t nodes_size)
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
- if(use_qbvh)
- pack_nodes[pack_nodes_offset + nsize_bbox+1] = bvh_nodes[i + nsize_bbox+1];
+ if(use_qbvh) {
+ memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox+1],
+ &bvh_nodes[i + nsize_bbox+1],
+ sizeof(int4) * (nsize - (nsize_bbox+1)));
+ }
pack_nodes_offset += nsize;
}
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index ca1065f114a..f8d2ee60a3a 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -123,6 +123,11 @@ set(SRC_GEOM_HEADERS
geom/geom_motion_triangle.h
geom/geom_object.h
geom/geom_primitive.h
+ geom/geom_qbvh.h
+ geom/geom_qbvh_shadow.h
+ geom/geom_qbvh_subsurface.h
+ geom/geom_qbvh_traversal.h
+ geom/geom_qbvh_volume.h
geom/geom_triangle.h
geom/geom_triangle_intersect.h
geom/geom_volume.h
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 3a768f37dd9..38fd7858a99 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -21,6 +21,7 @@
/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
#define BVH_STACK_SIZE 192
#define BVH_NODE_SIZE 4
+#define BVH_QNODE_SIZE 7
#define TRI_NODE_SIZE 3
/* silly workaround for float extended precision that happens when compiling
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index a9892679e24..c0eefcd9c7f 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -48,6 +48,11 @@ CCL_NAMESPACE_BEGIN
#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
+/* Common QBVH functions. */
+#ifdef __QBVH__
+#include "geom_qbvh.h"
+#endif
+
/* Regular BVH traversal */
#define BVH_FUNCTION_NAME bvh_intersect
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index 4bdfc7478aa..d6056026f24 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+#ifdef __QBVH__
+#include "geom_qbvh_shadow.h"
+#endif
+
/* This is a template BVH traversal function, where various features can be
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
@@ -380,11 +384,23 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect_array,
- max_hits,
- num_hits);
+#ifdef __QBVH__
+ if(kernel_data.bvh.use_qbvh) {
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect_array,
+ max_hits,
+ num_hits);
+ }
+ else
+#endif
+ {
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect_array,
+ max_hits,
+ num_hits);
+ }
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index 90cbbc08153..ff462142f6f 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+#ifdef __QBVH__
+#include "geom_qbvh_subsurface.h"
+#endif
+
/* This is a template BVH traversal function for subsurface scattering, where
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
@@ -300,12 +304,25 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect_array,
- subsurface_object,
- lcg_state,
- max_hits);
+#ifdef __QBVH__
+ if(kernel_data.bvh.use_qbvh) {
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect_array,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
+ else
+#endif
+ {
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect_array,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index d48eda5f554..6e5b6ea476e 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+#ifdef __QBVH__
+#include "geom_qbvh_traversal.h"
+#endif
+
/* This is a template BVH traversal function, where various features can be
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
@@ -381,16 +385,33 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
#endif
)
{
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect,
- visibility
+#ifdef __QBVH__
+ if(kernel_data.bvh.use_qbvh) {
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect,
+ visibility
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+ , lcg_state,
+ difl,
+ extmax
+#endif
+ );
+ }
+ else
+#endif
+ {
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect,
+ visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , lcg_state,
- difl,
- extmax
+ , lcg_state,
+ difl,
+ extmax
#endif
- );
+ );
+ }
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
index bae90c2a24a..8a25b5dc884 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+#ifdef __QBVH__
+#include "geom_qbvh_volume.h"
+#endif
+
/* This is a template BVH traversal function for volumes, where
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
@@ -314,9 +318,19 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const Ray *ray,
Intersection *isect)
{
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect);
+#ifdef __QBVH__
+ if(kernel_data.bvh.use_qbvh) {
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect);
+ }
+ else
+#endif
+ {
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect);
+ }
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
new file mode 100644
index 00000000000..a1dd89c41ca
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ccl_device_inline void qbvh_stack_sort(int *__restrict s1,
+ int *__restrict s2,
+ int *__restrict s3,
+ float *__restrict d1,
+ float *__restrict d2,
+ float *__restrict d3)
+{
+ if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
+ if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); }
+ if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
+}
+
+ccl_device_inline void qbvh_stack_sort(int *__restrict s1,
+ int *__restrict s2,
+ int *__restrict s3,
+ int *__restrict s4,
+ float *__restrict d1,
+ float *__restrict d2,
+ float *__restrict d3,
+ float *__restrict d4)
+{
+ if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
+ if(*d4 < *d3) { util_swap(s4, s3); util_swap(d4, d3); }
+ if(*d3 < *d1) { util_swap(s3, s1); util_swap(d3, d1); }
+ if(*d4 < *d2) { util_swap(s4, s2); util_swap(d4, d2); }
+ if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); }
+}
+
+ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
+ const ssef& tnear,
+ const ssef& tfar,
+#ifdef __KERNEL_AVX2__
+ const sse3f& org_idir,
+#else
+ const sse3f& org,
+#endif
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int nodeAddr,
+ ssef *__restrict dist)
+{
+ const int offset = nodeAddr*BVH_QNODE_SIZE;
+#ifdef __KERNEL_AVX2__
+ const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
+ const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
+ const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
+ const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
+ const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
+ const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
+#else
+ const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
+ const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
+ const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
+ const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
+ const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
+ const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
+#endif
+
+#ifdef __KERNEL_SSE41__
+ const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
+ const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
+ const sseb vmask = cast(tNear) > cast(tFar);
+ int mask = (int)movemask(vmask)^0xf;
+#else
+ const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
+ const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
+ const sseb vmask = tNear <= tFar;
+ int mask = (int)movemask(vmask);
+#endif
+ *dist = tNear;
+ return mask;
+}
+
+ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
+ const ssef& tnear,
+ const ssef& tfar,
+#ifdef __KERNEL_AVX2__
+ const sse3f& P_idir,
+#else
+ const sse3f& P,
+#endif
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int nodeAddr,
+ const float difl,
+ ssef *__restrict dist)
+{
+ const int offset = nodeAddr*BVH_QNODE_SIZE;
+#ifdef __KERNEL_AVX2__
+ const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
+ const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
+ const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
+ const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
+ const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
+ const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
+#else
+ const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
+ const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
+ const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
+ const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
+ const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
+ const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
+#endif
+
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
+ const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
+ const sseb vmask = round_down*tNear <= round_up*tFar;
+ *dist = tNear;
+ return (int)movemask(vmask);
+}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
new file mode 100644
index 00000000000..f8279996450
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
@@ -0,0 +1,378 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits,
+ uint *num_hits)
+{
+ /* TODO(sergey):
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ int num_hits_in_instance = 0;
+#endif
+
+ *num_hits = 0;
+ isect_array->t = tmax;
+
+ ssef tnear(0.0f), tfar(tmax);
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ ssef dist;
+ int traverseChild = qbvh_node_intersect(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
+
+ if(traverseChild != 0) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(traverseChild);
+ if(traverseChild == 0) {
+ nodeAddr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float*)&dist)[r];
+ r = __bscf(traverseChild);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ if(d1 < d0) {
+ nodeAddr = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+ continue;
+ }
+ else {
+ nodeAddr = c0;
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &d2, &d1, &d0);
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stackPtr;
+ traversalStack[stackPtr] = c3;
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &traversalStack[stackPtr - 3],
+ &d3, &d2, &d1, &d0);
+ }
+
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
+ continue;
+ }
+#endif
+
+ /* Primitive intersection. */
+ while(primAddr < primAddr2) {
+ bool hit;
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ /* todo: specialized intersect functions which don't fill in
+ * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+ * might give a few % performance improvement */
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
+ break;
+ }
+#endif
+ default: {
+ hit = false;
+ break;
+ }
+ }
+
+ /* Shadow ray early termination. */
+ if(hit) {
+ /* detect if this surface has a shader with transparent shadows */
+
+ /* todo: optimize so primitive visibility flag indicates if
+ * the primitive has a transparent shadow shader? */
+ int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+ int shader = 0;
+
+#ifdef __HAIR__
+ if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+#endif
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
+#ifdef __HAIR__
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+#endif
+ int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+
+ /* if no transparent shadows, all light is blocked */
+ if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return true;
+ }
+ /* if maximum number of hits reached, block all light */
+ else if(*num_hits == max_hits) {
+ return true;
+ }
+
+ /* move on to next entry in intersections array */
+ isect_array++;
+ (*num_hits)++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+
+ isect_array->t = isect_t;
+ }
+
+ primAddr++;
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ if(num_hits_in_instance) {
+ float t_fac;
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm);
+#else
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+
+ /* scale isect->t to adjust for instancing */
+ for(int i = 0; i < num_hits_in_instance; i++)
+ (isect_array-i-1)->t *= t_fac;
+ }
+ else {
+ float ignore_t = FLT_MAX;
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+#endif
+ }
+
+ isect_t = tmax;
+ isect_array->t = isect_t;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(tmax);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return false;
+}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
new file mode 100644
index 00000000000..bc43d81f9d3
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -0,0 +1,300 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for subsurface scattering, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ int subsurface_object,
+ uint *lcg_state,
+ int max_hits)
+{
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps (for non shadow rays).
+ * - Separate version for shadow rays.
+ * - Likely and unlikely for if() statements.
+ * - SSE for hair.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = ray->t;
+ uint num_hits = 0;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+ ssef tnear(0.0f), tfar(isect_t);
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ ssef dist;
+ int traverseChild = qbvh_node_intersect(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
+
+ if(traverseChild != 0) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(traverseChild);
+ if(traverseChild == 0) {
+ nodeAddr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float*)&dist)[r];
+ r = __bscf(traverseChild);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ if(d1 < d0) {
+ nodeAddr = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+ continue;
+ }
+ else {
+ nodeAddr = c0;
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &d2, &d1, &d0);
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stackPtr;
+ traversalStack[stackPtr] = c3;
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &traversalStack[stackPtr - 3],
+ &d3, &d2, &d1, &d0);
+ }
+
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* Primitive intersection. */
+ for(; primAddr < primAddr2; primAddr++) {
+ /* only primitives from the same object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+
+ if(tri_object != subsurface_object)
+ continue;
+
+ /* Intersect ray against primitive */
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ break;
+ }
+#endif
+ default: {
+ break;
+ }
+ }
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
+ object = subsurface_object;
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* Instance pop. */
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return num_hits;
+}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
new file mode 100644
index 00000000000..56289900e80
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
@@ -0,0 +1,361 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect,
+ const uint visibility
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+ ,uint *lcg_state,
+ float difl,
+ float extmax
+#endif
+ )
+{
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps (for non shadow rays).
+ * - Separate version for shadow rays.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps = 0;
+#endif
+
+ ssef tnear(0.0f), tfar(ray->t);
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ int traverseChild;
+ ssef dist;
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps++;
+#endif
+
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+ if(difl != 0.0f) {
+ /* NOTE: We extend all the child BB instead of fetching
+ * and checking visibility flags for each of the,
+ *
+ * Need to test if doing opposite would be any faster.
+ */
+ traverseChild = qbvh_node_intersect_robust(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ difl,
+ &dist);
+ }
+ else
+#endif
+ {
+ traverseChild = qbvh_node_intersect(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
+ }
+
+ if(traverseChild != 0) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(traverseChild);
+ if(traverseChild == 0) {
+ nodeAddr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float*)&dist)[r];
+ r = __bscf(traverseChild);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ if(d1 < d0) {
+ nodeAddr = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+ continue;
+ }
+ else {
+ nodeAddr = c0;
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &d2, &d1, &d0);
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stackPtr;
+ traversalStack[stackPtr] = c3;
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &traversalStack[stackPtr - 3],
+ &d3, &d2, &d1, &d0);
+ }
+
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(leaf.z) & visibility) == 0) {
+ continue;
+ }
+#endif
+
+ /* Primitive intersection. */
+ while(primAddr < primAddr2) {
+ bool hit;
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ hit = triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+ else
+ hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+ break;
+ }
+#endif
+ default: {
+ hit = false;
+ break;
+ }
+ }
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps++;
+#endif
+
+ /* Shadow ray early termination. */
+ if(hit) {
+ tfar = ssef(isect->t);
+ if(visibility == PATH_RAY_SHADOW_OPAQUE)
+ return true;
+ }
+
+ primAddr++;
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect->t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* Instance pop. */
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect->t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return (isect->prim != PRIM_NONE);
+}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h
new file mode 100644
index 00000000000..3630436bddc
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h
@@ -0,0 +1,320 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect)
+{
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
+
+ ssef tnear(0.0f), tfar(ray->t);
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps++;
+#endif
+
+ ssef dist;
+ int traverseChild = qbvh_node_intersect(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
+
+ if(traverseChild != 0) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(traverseChild);
+ if(traverseChild == 0) {
+ nodeAddr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float*)&dist)[r];
+ r = __bscf(traverseChild);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ if(d1 < d0) {
+ nodeAddr = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+ continue;
+ }
+ else {
+ nodeAddr = c0;
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stackPtr;
+ traversalStack[stackPtr] = c1;
+ ++stackPtr;
+ traversalStack[stackPtr] = c0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &d2, &d1, &d0);
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stackPtr;
+ traversalStack[stackPtr] = c3;
+ ++stackPtr;
+ traversalStack[stackPtr] = c2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &traversalStack[stackPtr - 3],
+ &d3, &d2, &d1, &d0);
+ }
+
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* Primitive intersection. */
+ for(; primAddr < primAddr2; primAddr++) {
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+
+ /* Intersect ray against primitive. */
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ break;
+ }
+#endif
+ default: {
+ break;
+ }
+ }
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect->t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* Instance pop. */
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect->t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return (isect->prim != PRIM_NONE);
+}
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 2f0b78ea073..8140a3b7725 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -52,7 +52,6 @@ template<typename T> struct texture {
return data[index];
}
-#if 0
ccl_always_inline ssef fetch_ssef(int index)
{
kernel_assert(index >= 0 && index < width);
@@ -64,7 +63,6 @@ template<typename T> struct texture {
kernel_assert(index >= 0 && index < width);
return ((ssei*)data)[index];
}
-#endif
T *data;
int width;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 460ca7b68eb..1bc5cf1fc32 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -57,6 +57,9 @@ CCL_NAMESPACE_BEGIN
/* device capabilities */
#ifdef __KERNEL_CPU__
+#ifdef __KERNEL_SSE2__
+# define __QBVH__
+#endif
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __BRANCHED_PATH__
@@ -947,8 +950,8 @@ typedef struct KernelBVH {
int have_motion;
int have_curves;
int have_instancing;
-
- int pad1, pad2, pad3;
+ int use_qbvh;
+ int pad1, pad2;
} KernelBVH;
typedef enum CurveFlag {
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 6137f7d4fdc..f8671db18dd 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -1027,6 +1027,7 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
}
dscene->data.bvh.root = pack.root_index;
+ dscene->data.bvh.use_qbvh = scene->params.use_qbvh;
}
void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
@@ -1094,7 +1095,12 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update) {
- pool.push(function_bind(&Mesh::compute_bvh, mesh, &scene->params, &progress, i, num_bvh));
+ pool.push(function_bind(&Mesh::compute_bvh,
+ mesh,
+ &scene->params,
+ &progress,
+ i,
+ num_bvh));
i++;
}
}
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 5d205225d97..51324edd8ff 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -26,6 +26,7 @@
#include "util_param.h"
#include "util_string.h"
+#include "util_system.h"
#include "util_thread.h"
#include "util_types.h"
#include "util_vector.h"
@@ -135,11 +136,7 @@ public:
bvh_type = BVH_DYNAMIC;
use_bvh_cache = false;
use_bvh_spatial_split = false;
-#ifdef __QBVH__
- use_qbvh = true;
-#else
use_qbvh = false;
-#endif
persistent_data = false;
}