Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2018-02-14 13:23:30 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2018-08-29 16:03:09 +0300
commit73f20560529457ea177cb93e8e8eaaf44a589643 (patch)
tree45ea2ebad9adabcedd7833629421909ede9f6fb5 /intern/cycles/bvh
parent66f8a4c07e8a5fc166579101933264b8425a7cd1 (diff)
Cycles: Add BVH8 and packeted triangle intersection
This is an initial implementation of BVH8 optimization structure and packated triangle intersection. The aim is to get faster ray to scene intersection checks. Scene BVH4 BVH8 barbershop_interior 10:24.94 10:10.74 bmw27 02:41.25 02:38.83 classroom 08:16.49 07:56.15 fishy_cat 04:24.56 04:17.29 koro 06:03.06 06:01.45 pavillon_barcelona 09:21.26 09:02.98 victor 23:39.65 22:53.71 As memory goes, peak usage raises by about 4.7% in a complex scenes. Note that BVH8 is disabled when using OSL, this is because OSL kernel does not get per-microarchitecture optimizations and hence always considers BVH3 is used. Original BVH8 patch from Anton Gavrikov. Batched triangles intersection from Victoria Zhislina. Extra work and tests and fixes from Maxym Dmytrychenko.
Diffstat (limited to 'intern/cycles/bvh')
-rw-r--r--intern/cycles/bvh/CMakeLists.txt2
-rw-r--r--intern/cycles/bvh/bvh.cpp91
-rw-r--r--intern/cycles/bvh/bvh.h8
-rw-r--r--intern/cycles/bvh/bvh2.cpp11
-rw-r--r--intern/cycles/bvh/bvh4.cpp29
-rw-r--r--intern/cycles/bvh/bvh8.cpp515
-rw-r--r--intern/cycles/bvh/bvh8.h97
-rw-r--r--intern/cycles/bvh/bvh_node.cpp49
-rw-r--r--intern/cycles/bvh/bvh_node.h2
9 files changed, 755 insertions, 49 deletions
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index b8171e7f70d..fcd28572fdf 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -10,6 +10,7 @@ set(SRC
bvh.cpp
bvh2.cpp
bvh4.cpp
+ bvh8.cpp
bvh_binning.cpp
bvh_build.cpp
bvh_node.cpp
@@ -22,6 +23,7 @@ set(SRC_HEADERS
bvh.h
bvh2.h
bvh4.h
+ bvh8.h
bvh_binning.h
bvh_build.h
bvh_node.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index b524ca07d8d..bc73a3ad264 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -22,6 +22,7 @@
#include "bvh/bvh2.h"
#include "bvh/bvh4.h"
+#include "bvh/bvh8.h"
#include "bvh/bvh_build.h"
#include "bvh/bvh_node.h"
@@ -38,6 +39,7 @@ const char *bvh_layout_name(BVHLayout layout)
switch(layout) {
case BVH_LAYOUT_BVH2: return "BVH2";
case BVH_LAYOUT_BVH4: return "BVH4";
+ case BVH_LAYOUT_BVH8: return "BVH8";
case BVH_LAYOUT_NONE: return "NONE";
case BVH_LAYOUT_ALL: return "ALL";
}
@@ -92,6 +94,8 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
return new BVH2(params, objects);
case BVH_LAYOUT_BVH4:
return new BVH4(params, objects);
+ case BVH_LAYOUT_BVH8:
+ return new BVH8(params, objects);
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
break;
@@ -215,6 +219,38 @@ void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility)
}
}
visibility |= ob->visibility_for_tracing();
+
+ }
+}
+
+bool BVH::leaf_check(const BVHNode *node, BVH_TYPE bvh)
+{
+ if(node->is_leaf()) {
+ return node->is_unaligned;
+ }
+ else {
+ return node_is_unaligned(node, bvh);
+ }
+}
+
+bool BVH::node_is_unaligned(const BVHNode *node, BVH_TYPE bvh)
+{
+ const BVHNode *node0 = node->get_child(0);
+ const BVHNode *node1 = node->get_child(1);
+
+ switch(bvh) {
+ case bvh2:
+ return node0->is_unaligned || node1->is_unaligned;
+ break;
+ case bvh4:
+ return leaf_check(node0, bvh2) || leaf_check(node1, bvh2);
+ break;
+ case bvh8:
+ return leaf_check(node0, bvh4) || leaf_check(node1, bvh4);
+ break;
+ default:
+ assert(0);
+ return false;
}
}
@@ -291,8 +327,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
* BVH's are stored in global arrays. This function merges them into the
* top level BVH, adjusting indexes and offsets where appropriate.
*/
- /* TODO(sergey): This code needs adjustment for wider BVH than 4. */
const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
+ const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
/* Adjust primitive index to point to the triangle in the global array, for
* meshes with transform applied and already in the top level BVH.
@@ -469,14 +505,26 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
for(size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if(bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
- nsize = use_qbvh
- ? BVH_UNALIGNED_QNODE_SIZE
- : BVH_UNALIGNED_NODE_SIZE;
- nsize_bbox = (use_qbvh)? 13: 0;
+ if(use_obvh) {
+ nsize = BVH_UNALIGNED_ONODE_SIZE;
+ nsize_bbox = BVH_UNALIGNED_ONODE_SIZE-1;
+ }
+ else {
+ nsize = use_qbvh
+ ? BVH_UNALIGNED_QNODE_SIZE
+ : BVH_UNALIGNED_NODE_SIZE;
+ nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE-1 : 0;
+ }
}
else {
- nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
- nsize_bbox = (use_qbvh)? 7: 0;
+ if(use_obvh) {
+ nsize = BVH_ONODE_SIZE;
+ nsize_bbox = BVH_ONODE_SIZE-1;
+ }
+ else {
+ nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
+ nsize_bbox = (use_qbvh)? BVH_QNODE_SIZE-1 : 0;
+ }
}
memcpy(pack_nodes + pack_nodes_offset,
@@ -485,16 +533,29 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
-
- data.z += (data.z < 0)? -noffset_leaf: noffset;
- data.w += (data.w < 0)? -noffset_leaf: noffset;
-
- if(use_qbvh) {
- data.x += (data.x < 0)? -noffset_leaf: noffset;
- data.y += (data.y < 0)? -noffset_leaf: noffset;
+ int4 data1 = bvh_nodes[i + nsize_bbox-1];
+ if(use_obvh) {
+ data.z += (data.z < 0) ? -noffset_leaf : noffset;
+ data.w += (data.w < 0) ? -noffset_leaf : noffset;
+ data.x += (data.x < 0) ? -noffset_leaf : noffset;
+ data.y += (data.y < 0) ? -noffset_leaf : noffset;
+ data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
+ data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
+ data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
+ data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
+ }
+ else {
+ data.z += (data.z < 0) ? -noffset_leaf : noffset;
+ data.w += (data.w < 0) ? -noffset_leaf : noffset;
+ if(use_qbvh) {
+ data.x += (data.x < 0)? -noffset_leaf: noffset;
+ data.y += (data.y < 0)? -noffset_leaf: noffset;
+ }
}
-
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
+ if(use_obvh) {
+ pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
+ }
/* Usually this copies nothing, but we better
* be prepared for possible node size extension.
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 6a82f915692..86be0bae4be 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -73,6 +73,12 @@ struct PackedBVH {
}
};
+enum BVH_TYPE {
+ bvh2,
+ bvh4,
+ bvh8
+};
+
/* BVH */
class BVH
@@ -93,6 +99,8 @@ protected:
/* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox& bbox, uint& visibility);
+ static __forceinline bool leaf_check(const BVHNode *node, BVH_TYPE bvh);
+ static bool node_is_unaligned(const BVHNode *node, BVH_TYPE bvh);
/* triangles and strands */
void pack_primitives();
diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp
index 9d89d2b6afb..4a423c16559 100644
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -25,13 +25,6 @@
CCL_NAMESPACE_BEGIN
-static bool node_bvh_is_unaligned(const BVHNode *node)
-{
- const BVHNode *node0 = node->get_child(0),
- *node1 = node->get_child(1);
- return node0->is_unaligned || node1->is_unaligned;
-}
-
BVH2::BVH2(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
{
@@ -195,7 +188,7 @@ void BVH2::pack_nodes(const BVHNode *root)
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += node_bvh_is_unaligned(root)
+ nextNodeIdx += node_is_unaligned(root, bvh2)
? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
}
@@ -218,7 +211,7 @@ void BVH2::pack_nodes(const BVHNode *root)
}
else {
idx[i] = nextNodeIdx;
- nextNodeIdx += node_bvh_is_unaligned(e.node->get_child(i))
+ nextNodeIdx += node_is_unaligned(e.node->get_child(i), bvh2)
? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
}
diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp
index 4faf47af7bb..a449587e607 100644
--- a/intern/cycles/bvh/bvh4.cpp
+++ b/intern/cycles/bvh/bvh4.cpp
@@ -30,27 +30,6 @@ CCL_NAMESPACE_BEGIN
* Perhaps we can merge nodes in actual tree and make our
* life easier all over the place.
*/
-static bool node_qbvh_is_unaligned(const BVHNode *node)
-{
- const BVHNode *node0 = node->get_child(0),
- *node1 = node->get_child(1);
- bool has_unaligned = false;
- if(node0->is_leaf()) {
- has_unaligned |= node0->is_unaligned;
- }
- else {
- has_unaligned |= node0->get_child(0)->is_unaligned;
- has_unaligned |= node0->get_child(1)->is_unaligned;
- }
- if(node1->is_leaf()) {
- has_unaligned |= node1->is_unaligned;
- }
- else {
- has_unaligned |= node1->get_child(0)->is_unaligned;
- has_unaligned |= node1->get_child(1)->is_unaligned;
- }
- return has_unaligned;
-}
BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
@@ -304,7 +283,7 @@ void BVH4::pack_nodes(const BVHNode *root)
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += node_qbvh_is_unaligned(root)
+ nextNodeIdx += node_is_unaligned(root, bvh4)
? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
}
@@ -348,7 +327,7 @@ void BVH4::pack_nodes(const BVHNode *root)
}
else {
idx = nextNodeIdx;
- nextNodeIdx += node_qbvh_is_unaligned(nodes[i])
+ nextNodeIdx += node_is_unaligned(nodes[i], bvh4)
? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
}
@@ -438,7 +417,7 @@ void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
visibility,
0.0f,
1.0f,
- 4);
+ num_nodes);
}
else {
pack_aligned_node(idx,
@@ -447,7 +426,7 @@ void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
visibility,
0.0f,
1.0f,
- 4);
+ num_nodes);
}
}
}
diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp
new file mode 100644
index 00000000000..e8b6726602c
--- /dev/null
+++ b/intern/cycles/bvh/bvh8.cpp
@@ -0,0 +1,515 @@
+/*
+Copyright (c) 2017, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of Intel Corporation nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "bvh/bvh8.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "bvh/bvh_node.h"
+#include "bvh/bvh_unaligned.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVH8::BVH8(const BVHParams& params_, const vector<Object*>& objects_)
+: BVH(params_, objects_)
+{
+}
+
+void BVH8::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
+{
+ float4 data[BVH_ONODE_LEAF_SIZE];
+ memset(data, 0, sizeof(data));
+ if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
+ /* object */
+ data[0].x = __int_as_float(~(leaf->lo));
+ data[0].y = __int_as_float(0);
+ }
+ else {
+ /* triangle */
+ data[0].x = __int_as_float(leaf->lo);
+ data[0].y = __int_as_float(leaf->hi);
+ }
+ data[0].z = __uint_as_float(leaf->visibility);
+ if(leaf->num_triangles() != 0) {
+ data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
+ }
+
+ memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_ONODE_LEAF_SIZE);
+}
+
+void BVH8::pack_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
+{
+ bool has_unaligned = false;
+ /* Check whether we have to create unaligned node or all nodes are aligned
+ * and we can cut some corner here.
+ */
+ if(params.use_unaligned_nodes) {
+ for(int i = 0; i < num; i++) {
+ if(en[i].node->is_unaligned) {
+ has_unaligned = true;
+ break;
+ }
+ }
+ }
+ if(has_unaligned) {
+ /* There's no unaligned children, pack into AABB node. */
+ pack_unaligned_inner(e, en, num);
+ }
+ else {
+ /* Create unaligned node with orientation transform for each of the
+ * children.
+ */
+ pack_aligned_inner(e, en, num);
+ }
+}
+
+void BVH8::pack_aligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
+{
+ BoundBox bounds[8];
+ int child[8];
+ for(int i = 0; i < num; ++i) {
+ bounds[i] = en[i].node->bounds;
+ child[i] = en[i].encodeIdx();
+ }
+ pack_aligned_node(e.idx,
+ bounds,
+ child,
+ e.node->visibility,
+ e.node->time_from,
+ e.node->time_to,
+ num);
+}
+
+void BVH8::pack_aligned_node(int idx,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num)
+{
+ float8 data[8];
+ memset(data, 0, sizeof(data));
+
+ data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
+ data[0].b = time_from;
+ data[0].c = time_to;
+ for(int i = 0; i < num; i++) {
+ float3 bb_min = bounds[i].min;
+ float3 bb_max = bounds[i].max;
+
+ data[1][i] = bb_min.x;
+ data[2][i] = bb_max.x;
+ data[3][i] = bb_min.y;
+ data[4][i] = bb_max.y;
+ data[5][i] = bb_min.z;
+ data[6][i] = bb_max.z;
+
+ data[7][i] = __int_as_float(child[i]);
+ }
+
+ for(int i = num; i < 8; i++) {
+ /* We store BB which would never be recorded as intersection
+ * so kernel might safely assume there are always 4 child nodes.
+ */
+ data[1][i] = FLT_MAX;
+ data[2][i] = -FLT_MAX;
+
+ data[3][i] = FLT_MAX;
+ data[4][i] = -FLT_MAX;
+
+ data[5][i] = FLT_MAX;
+ data[6][i] = -FLT_MAX;
+
+ data[7][i] = __int_as_float(0);
+ }
+ memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_ONODE_SIZE);
+}
+
+void BVH8::pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
+{
+ Transform aligned_space[8];
+ BoundBox bounds[8];
+ int child[8];
+ for(int i = 0; i < num; ++i) {
+ aligned_space[i] = en[i].node->get_aligned_space();
+ bounds[i] = en[i].node->bounds;
+ child[i] = en[i].encodeIdx();
+ }
+ pack_unaligned_node(e.idx,
+ aligned_space,
+ bounds,
+ child,
+ e.node->visibility,
+ e.node->time_from,
+ e.node->time_to,
+ num);
+}
+
+void BVH8::pack_unaligned_node(int idx,
+ const Transform *aligned_space,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num)
+{
+ float8 data[BVH_UNALIGNED_ONODE_SIZE];
+ memset(data, 0, sizeof(data));
+ data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
+ data[0].b = time_from;
+ data[0].c = time_to;
+
+ for(int i = 0; i < num; i++) {
+ Transform space = BVHUnaligned::compute_node_transform(
+ bounds[i],
+ aligned_space[i]);
+
+ data[1][i] = space.x.x;
+ data[2][i] = space.x.y;
+ data[3][i] = space.x.z;
+
+ data[4][i] = space.y.x;
+ data[5][i] = space.y.y;
+ data[6][i] = space.y.z;
+
+ data[7][i] = space.z.x;
+ data[8][i] = space.z.y;
+ data[9][i] = space.z.z;
+
+ data[10][i] = space.x.w;
+ data[11][i] = space.y.w;
+ data[12][i] = space.z.w;
+
+ data[13][i] = __int_as_float(child[i]);
+ }
+
+ for(int i = num; i < 8; i++) {
+ /* We store BB which would never be recorded as intersection
+ * so kernel might safely assume there are always 4 child nodes.
+ */
+
+ data[1][i] = 1.0f;
+ data[2][i] = 0.0f;
+ data[3][i] = 0.0f;
+
+ data[4][i] = 0.0f;
+ data[5][i] = 0.0f;
+ data[6][i] = 0.0f;
+
+ data[7][i] = 0.0f;
+ data[8][i] = 0.0f;
+ data[9][i] = 0.0f;
+
+ data[10][i] = -FLT_MAX;
+ data[11][i] = -FLT_MAX;
+ data[12][i] = -FLT_MAX;
+
+ data[13][i] = __int_as_float(0);
+ }
+
+ memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_ONODE_SIZE);
+}
+
+/* Quad SIMD Nodes */
+
+void BVH8::pack_nodes(const BVHNode *root)
+{
+ /* Calculate size of the arrays required. */
+ const size_t num_nodes = root->getSubtreeSize(BVH_STAT_ONODE_COUNT);
+ const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+ assert(num_leaf_nodes <= num_nodes);
+ const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+ size_t node_size;
+ if(params.use_unaligned_nodes) {
+ const size_t num_unaligned_nodes =
+ root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_ONODE_COUNT);
+ node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
+ (num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
+ }
+ else {
+ node_size = num_inner_nodes * BVH_ONODE_SIZE;
+ }
+ /* Resize arrays. */
+ pack.nodes.clear();
+ pack.leaf_nodes.clear();
+ /* For top level BVH, first merge existing BVH's so we know the offsets. */
+ if(params.top_level) {
+ pack_instances(node_size, num_leaf_nodes*BVH_ONODE_LEAF_SIZE);
+ }
+ else {
+ pack.nodes.resize(node_size);
+ pack.leaf_nodes.resize(num_leaf_nodes*BVH_ONODE_LEAF_SIZE);
+ }
+
+ int nextNodeIdx = 0, nextLeafNodeIdx = 0;
+
+ vector<BVHStackEntry> stack;
+ stack.reserve(BVHParams::MAX_DEPTH*2);
+ if(root->is_leaf()) {
+ stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
+ }
+ else {
+ stack.push_back(BVHStackEntry(root, nextNodeIdx));
+ nextNodeIdx += node_is_unaligned(root, bvh8)
+ ? BVH_UNALIGNED_ONODE_SIZE
+ : BVH_ONODE_SIZE;
+ }
+
+ while(stack.size()) {
+ BVHStackEntry e = stack.back();
+ stack.pop_back();
+
+ if(e.node->is_leaf()) {
+ /* leaf node */
+ const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
+ pack_leaf(e, leaf);
+ }
+ else {
+ /* Inner node. */
+ const BVHNode *node = e.node;
+ const BVHNode *node0 = node->get_child(0);
+ const BVHNode *node1 = node->get_child(1);
+ /* Collect nodes. */
+ const BVHNode *nodes[8];
+ int numnodes = 0;
+ if(node0->is_leaf()) {
+ nodes[numnodes++] = node0;
+ }
+ else {
+ const BVHNode *node00 = node0->get_child(0),
+ *node01 = node0->get_child(1);
+ if(node00->is_leaf()) {
+ nodes[numnodes++] = node00;
+ }
+ else {
+ nodes[numnodes++] = node00->get_child(0);
+ nodes[numnodes++] = node00->get_child(1);
+ }
+ if(node01->is_leaf()) {
+ nodes[numnodes++] = node01;
+ }
+ else {
+ nodes[numnodes++] = node01->get_child(0);
+ nodes[numnodes++] = node01->get_child(1);
+ }
+ }
+ if(node1->is_leaf()) {
+ nodes[numnodes++] = node1;
+ }
+ else {
+ const BVHNode *node10 = node1->get_child(0),
+ *node11 = node1->get_child(1);
+ if(node10->is_leaf()) {
+ nodes[numnodes++] = node10;
+ }
+ else {
+ nodes[numnodes++] = node10->get_child(0);
+ nodes[numnodes++] = node10->get_child(1);
+ }
+ if(node11->is_leaf()) {
+ nodes[numnodes++] = node11;
+ }
+ else {
+ nodes[numnodes++] = node11->get_child(0);
+ nodes[numnodes++] = node11->get_child(1);
+ }
+ }
+ /* Push entries on the stack. */
+ for(int i = 0; i < numnodes; ++i) {
+ int idx;
+ if(nodes[i]->is_leaf()) {
+ idx = nextLeafNodeIdx++;
+ }
+ else {
+ idx = nextNodeIdx;
+ nextNodeIdx += node_is_unaligned(nodes[i], bvh8)
+ ? BVH_UNALIGNED_ONODE_SIZE
+ : BVH_ONODE_SIZE;
+ }
+ stack.push_back(BVHStackEntry(nodes[i], idx));
+ }
+ /* Set node. */
+ pack_inner(e, &stack[stack.size() - numnodes], numnodes);
+ }
+ }
+ assert(node_size == nextNodeIdx);
+ /* Root index to start traversal at, to handle case of single leaf node. */
+ pack.root_index = (root->is_leaf()) ? -1 : 0;
+}
+
+void BVH8::refit_nodes()
+{
+ assert(!params.top_level);
+
+ BoundBox bbox = BoundBox::empty;
+ uint visibility = 0;
+ refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
+}
+
+void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
+{
+ if(leaf) {
+ int4 *data = &pack.leaf_nodes[idx];
+ int4 c = data[0];
+ /* Refit leaf node. */
+ for(int prim = c.x; prim < c.y; prim++) {
+ int pidx = pack.prim_index[prim];
+ int tob = pack.prim_object[prim];
+ Object *ob = objects[tob];
+
+ if(pidx == -1) {
+ /* Object instance. */
+ bbox.grow(ob->bounds);
+ }
+ else {
+ /* Primitives. */
+ const Mesh *mesh = ob->mesh;
+
+ if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
+ /* Curves. */
+ int str_offset = (params.top_level) ? mesh->curve_offset : 0;
+ Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
+ int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
+
+ curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
+
+ visibility |= PATH_RAY_CURVE;
+
+ /* Motion curves. */
+ if(mesh->use_motion_blur) {
+ Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if(attr) {
+ size_t mesh_size = mesh->curve_keys.size();
+ size_t steps = mesh->motion_steps - 1;
+ float3 *key_steps = attr->data_float3();
+
+ for(size_t i = 0; i < steps; i++) {
+ curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
+ }
+ }
+ }
+ }
+ else {
+ /* Triangles. */
+ int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
+ Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
+ const float3 *vpos = &mesh->verts[0];
+
+ triangle.bounds_grow(vpos, bbox);
+
+ /* Motion triangles. */
+ if(mesh->use_motion_blur) {
+ Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if(attr) {
+ size_t mesh_size = mesh->verts.size();
+ size_t steps = mesh->motion_steps - 1;
+ float3 *vert_steps = attr->data_float3();
+
+ for(size_t i = 0; i < steps; i++) {
+ triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
+ }
+ }
+ }
+ }
+ }
+
+ visibility |= ob->visibility;
+ }
+
+ float4 leaf_data[BVH_ONODE_LEAF_SIZE];
+ leaf_data[0].x = __int_as_float(c.x);
+ leaf_data[0].y = __int_as_float(c.y);
+ leaf_data[0].z = __uint_as_float(visibility);
+ leaf_data[0].w = __uint_as_float(c.w);
+ memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_ONODE_LEAF_SIZE);
+ }
+ else {
+ int4 *data = &pack.nodes[idx];
+ bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
+ int4 c;
+ if(is_unaligned) {
+ c = data[BVH_UNALIGNED_ONODE_SIZE-1];
+ }
+ else {
+ c = data[BVH_ONODE_SIZE-1];
+ }
+ /* Refit inner node, set bbox from children. */
+ BoundBox child_bbox[8] = { BoundBox::empty, BoundBox::empty,
+ BoundBox::empty, BoundBox::empty,
+ BoundBox::empty, BoundBox::empty,
+ BoundBox::empty, BoundBox::empty };
+ uint child_visibility[8] = { 0 };
+ int num_nodes = 0;
+
+ for(int i = 0; i < 8; ++i) {
+ if(c[i] != 0) {
+ refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
+ child_bbox[i], child_visibility[i]);
+ ++num_nodes;
+ bbox.grow(child_bbox[i]);
+ visibility |= child_visibility[i];
+ }
+ }
+
+ if(is_unaligned) {
+ Transform aligned_space[8] = { transform_identity(), transform_identity(),
+ transform_identity(), transform_identity(),
+ transform_identity(), transform_identity(),
+ transform_identity(), transform_identity()};
+ pack_unaligned_node(idx,
+ aligned_space,
+ child_bbox,
+ &c[0],
+ visibility,
+ 0.0f,
+ 1.0f,
+ num_nodes);
+ }
+ else {
+ pack_aligned_node(idx,
+ child_bbox,
+ &c[0],
+ visibility,
+ 0.0f,
+ 1.0f,
+ num_nodes);
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h
new file mode 100644
index 00000000000..99ff79e60b5
--- /dev/null
+++ b/intern/cycles/bvh/bvh8.h
@@ -0,0 +1,97 @@
+/*
+Copyright (c) 2017, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of Intel Corporation nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __BVH8_H__
+#define __BVH8_H__
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_params.h"
+
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHNode;
+struct BVHStackEntry;
+class BVHParams;
+class BoundBox;
+class LeafNode;
+class Object;
+class Progress;
+
+#define BVH_ONODE_SIZE 16
+#define BVH_ONODE_LEAF_SIZE 1
+#define BVH_UNALIGNED_ONODE_SIZE 28
+
+/* BVH8
+*
+* Octo BVH, with each node having eight children, to use with SIMD instructions.
+*/
+class BVH8 : public BVH {
+protected:
+ /* constructor */
+ friend class BVH;
+ BVH8(const BVHParams& params, const vector<Object*>& objects);
+
+ /* pack */
+ void pack_nodes(const BVHNode *root);
+
+ void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
+ void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
+
+ void pack_aligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num);
+ void pack_aligned_node(int idx,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num);
+
+ void pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num);
+ void pack_unaligned_node(int idx,
+ const Transform *aligned_space,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num);
+
+ /* refit */
+ void refit_nodes();
+ void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH8_H__ */
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 879d07b9625..fd8b4977699 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -61,6 +61,55 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
}
}
return cnt;
+ case BVH_STAT_ONODE_COUNT:
+ cnt = 1;
+ for(int i = 0; i < num_children(); i++) {
+ BVHNode *node = get_child(i);
+ if(node->is_leaf()) {
+ cnt += 1;
+ }
+ else {
+ for(int j = 0; j < node->num_children(); j++)
+ {
+ BVHNode *node_next = node->get_child(j);
+ if(node_next->is_leaf()) {
+ cnt += 1;
+ }
+ else {
+ for(int k = 0; k < node_next->num_children(); k++) {
+ cnt += node_next->get_child(k)->getSubtreeSize(stat);
+ }
+ }
+ }
+ }
+ }
+ return cnt;
+ case BVH_STAT_UNALIGNED_INNER_ONODE_COUNT:
+ {
+ bool has_unaligned = false;
+ for(int i = 0; i < num_children(); i++) {
+ BVHNode *node = get_child(i);
+ if(node->is_leaf()) {
+ has_unaligned |= node->is_unaligned;
+ }
+ else {
+ for(int j = 0; j < node->num_children(); j++) {
+ BVHNode *node_next = node->get_child(j);
+ if(node_next->is_leaf()) {
+ has_unaligned |= node_next->is_unaligned;
+ }
+ else {
+ for(int k = 0; k < node_next->num_children(); k++) {
+ cnt += node_next->get_child(k)->getSubtreeSize(stat);
+ has_unaligned |= node_next->get_child(k)->is_unaligned;
+ }
+ }
+ }
+ }
+ }
+ cnt += has_unaligned? 1: 0;
+ }
+ return cnt;
case BVH_STAT_ALIGNED_COUNT:
if(!is_unaligned) {
cnt = 1;
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index 94cf5ab730c..ed89d52a50a 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -39,6 +39,8 @@ enum BVH_STAT {
BVH_STAT_ALIGNED_LEAF_COUNT,
BVH_STAT_UNALIGNED_LEAF_COUNT,
BVH_STAT_DEPTH,
+ BVH_STAT_ONODE_COUNT,
+ BVH_STAT_UNALIGNED_INNER_ONODE_COUNT,
};
class BVHParams;