/* * Adapted from code copyright 2009-2010 NVIDIA Corporation * Modifications Copyright 2011, Blender Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "bvh/build.h" #include "bvh/binning.h" #include "bvh/node.h" #include "bvh/params.h" #include "bvh/split.h" #include "scene/curves.h" #include "scene/hair.h" #include "scene/mesh.h" #include "scene/object.h" #include "scene/pointcloud.h" #include "scene/scene.h" #include "util/algorithm.h" #include "util/foreach.h" #include "util/log.h" #include "util/progress.h" #include "util/queue.h" #include "util/simd.h" #include "util/stack_allocator.h" #include "util/time.h" CCL_NAMESPACE_BEGIN /* Constructor / Destructor */ BVHBuild::BVHBuild(const vector &objects_, array &prim_type_, array &prim_index_, array &prim_object_, array &prim_time_, const BVHParams ¶ms_, Progress &progress_) : objects(objects_), prim_type(prim_type_), prim_index(prim_index_), prim_object(prim_object_), prim_time(prim_time_), params(params_), progress(progress_), progress_start_time(0.0), unaligned_heuristic(objects_) { spatial_min_overlap = 0.0f; } BVHBuild::~BVHBuild() { } /* Adding References */ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int object_index) { const PrimitiveType primitive_type = mesh->primitive_type(); const Attribute *attr_mP = NULL; if (mesh->has_motion_blur()) { attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); } const size_t num_triangles = mesh->num_triangles(); for (uint j = 0; j < num_triangles; j++) { Mesh::Triangle t = mesh->get_triangle(j); const float3 *verts = &mesh->verts[0]; if (attr_mP == NULL) { BoundBox bounds = BoundBox::empty; t.bounds_grow(verts, bounds); if (bounds.valid() && t.valid(verts)) { references.push_back(BVHReference(bounds, j, object_index, primitive_type)); root.grow(bounds); center.grow(bounds.center2()); } } else if (params.num_motion_triangle_steps == 0 || params.use_spatial_split) { /* Motion triangles, simple case: single node for the whole * primitive. Lowest memory footprint and faster BVH build but * least optimal ray-tracing. */ /* TODO(sergey): Support motion steps for spatially split BVH. */ const size_t num_verts = mesh->verts.size(); const size_t num_steps = mesh->motion_steps; const float3 *vert_steps = attr_mP->data_float3(); BoundBox bounds = BoundBox::empty; t.bounds_grow(verts, bounds); for (size_t step = 0; step < num_steps - 1; step++) { t.bounds_grow(vert_steps + step * num_verts, bounds); } if (bounds.valid()) { references.push_back(BVHReference(bounds, j, object_index, primitive_type)); root.grow(bounds); center.grow(bounds.center2()); } } else { /* Motion triangles, trace optimized case: we split triangle * primitives into separate nodes for each of the time steps. * This way we minimize overlap of neighbor triangle primitives. */ const int num_bvh_steps = params.num_motion_triangle_steps * 2 + 1; const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1); const size_t num_verts = mesh->verts.size(); const size_t num_steps = mesh->motion_steps; const float3 *vert_steps = attr_mP->data_float3(); /* Calculate bounding box of the previous time step. * Will be reused later to avoid duplicated work on * calculating BVH time step boundbox. */ float3 prev_verts[3]; t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts); BoundBox prev_bounds = BoundBox::empty; prev_bounds.grow(prev_verts[0]); prev_bounds.grow(prev_verts[1]); prev_bounds.grow(prev_verts[2]); /* Create all primitive time steps, */ for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) { const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1; float3 curr_verts[3]; t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts); BoundBox curr_bounds = BoundBox::empty; curr_bounds.grow(curr_verts[0]); curr_bounds.grow(curr_verts[1]); curr_bounds.grow(curr_verts[2]); BoundBox bounds = prev_bounds; bounds.grow(curr_bounds); if (bounds.valid()) { const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1; references.push_back( BVHReference(bounds, j, object_index, primitive_type, prev_time, curr_time)); root.grow(bounds); center.grow(bounds.center2()); } /* Current time boundbox becomes previous one for the * next time step. */ prev_bounds = curr_bounds; } } } } void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int object_index) { const Attribute *curve_attr_mP = NULL; if (hair->has_motion_blur()) { curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); } const PrimitiveType primitive_type = hair->primitive_type(); const size_t num_curves = hair->num_curves(); for (uint j = 0; j < num_curves; j++) { const Hair::Curve curve = hair->get_curve(j); const float *curve_radius = &hair->get_curve_radius()[0]; for (int k = 0; k < curve.num_keys - 1; k++) { if (curve_attr_mP == NULL) { /* Really simple logic for static hair. */ BoundBox bounds = BoundBox::empty; curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds); if (bounds.valid()) { int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k); references.push_back(BVHReference(bounds, j, object_index, packed_type)); root.grow(bounds); center.grow(bounds.center2()); } } else if (params.num_motion_curve_steps == 0 || params.use_spatial_split) { /* Simple case of motion curves: single node for the while * shutter time. Lowest memory usage but less optimal * rendering. */ /* TODO(sergey): Support motion steps for spatially split BVH. */ BoundBox bounds = BoundBox::empty; curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds); const size_t num_keys = hair->get_curve_keys().size(); const size_t num_steps = hair->get_motion_steps(); const float3 *key_steps = curve_attr_mP->data_float3(); for (size_t step = 0; step < num_steps - 1; step++) { curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds); } if (bounds.valid()) { int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k); references.push_back(BVHReference(bounds, j, object_index, packed_type)); root.grow(bounds); center.grow(bounds.center2()); } } else { /* Motion curves, trace optimized case: we split curve keys * primitives into separate nodes for each of the time steps. * This way we minimize overlap of neighbor curve primitives. */ const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1; const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1); const size_t num_steps = hair->get_motion_steps(); const float3 *curve_keys = &hair->get_curve_keys()[0]; const float3 *key_steps = curve_attr_mP->data_float3(); const size_t num_keys = hair->get_curve_keys().size(); /* Calculate bounding box of the previous time step. * Will be reused later to avoid duplicated work on * calculating BVH time step boundbox. */ float4 prev_keys[4]; curve.cardinal_motion_keys(curve_keys, curve_radius, key_steps, num_keys, num_steps, 0.0f, k - 1, k, k + 1, k + 2, prev_keys); BoundBox prev_bounds = BoundBox::empty; curve.bounds_grow(prev_keys, prev_bounds); /* Create all primitive time steps, */ for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) { const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1; float4 curr_keys[4]; curve.cardinal_motion_keys(curve_keys, curve_radius, key_steps, num_keys, num_steps, curr_time, k - 1, k, k + 1, k + 2, curr_keys); BoundBox curr_bounds = BoundBox::empty; curve.bounds_grow(curr_keys, curr_bounds); BoundBox bounds = prev_bounds; bounds.grow(curr_bounds); if (bounds.valid()) { const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1; int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k); references.push_back( BVHReference(bounds, j, object_index, packed_type, prev_time, curr_time)); root.grow(bounds); center.grow(bounds.center2()); } /* Current time boundbox becomes previous one for the * next time step. */ prev_bounds = curr_bounds; } } } } } void BVHBuild::add_reference_points(BoundBox &root, BoundBox ¢er, PointCloud *pointcloud, int i) { const Attribute *point_attr_mP = NULL; if (pointcloud->has_motion_blur()) { point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); } const float3 *points_data = &pointcloud->points[0]; const float *radius_data = &pointcloud->radius[0]; const size_t num_points = pointcloud->num_points(); const float3 *motion_data = (point_attr_mP) ? point_attr_mP->data_float3() : NULL; const size_t num_steps = pointcloud->get_motion_steps(); if (point_attr_mP == NULL) { /* Really simple logic for static points. */ for (uint j = 0; j < num_points; j++) { const PointCloud::Point point = pointcloud->get_point(j); BoundBox bounds = BoundBox::empty; point.bounds_grow(points_data, radius_data, bounds); if (bounds.valid()) { references.push_back(BVHReference(bounds, j, i, PRIMITIVE_POINT)); root.grow(bounds); center.grow(bounds.center2()); } } } else if (params.num_motion_point_steps == 0 || params.use_spatial_split) { /* Simple case of motion points: single node for the whole * shutter time. Lowest memory usage but less optimal * rendering. */ /* TODO(sergey): Support motion steps for spatially split BVH. */ for (uint j = 0; j < num_points; j++) { const PointCloud::Point point = pointcloud->get_point(j); BoundBox bounds = BoundBox::empty; point.bounds_grow(points_data, radius_data, bounds); for (size_t step = 0; step < num_steps - 1; step++) { point.bounds_grow(motion_data + step * num_points, radius_data, bounds); } if (bounds.valid()) { references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT)); root.grow(bounds); center.grow(bounds.center2()); } } } else { /* Motion points, trace optimized case: we split point * primitives into separate nodes for each of the time steps. * This way we minimize overlap of neighbor point primitives. */ const int num_bvh_steps = params.num_motion_point_steps * 2 + 1; const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1); for (uint j = 0; j < num_points; j++) { const PointCloud::Point point = pointcloud->get_point(j); const size_t num_steps = pointcloud->get_motion_steps(); const float3 *point_steps = point_attr_mP->data_float3(); /* Calculate bounding box of the previous time step. * Will be reused later to avoid duplicated work on * calculating BVH time step boundbox. */ float4 prev_key = point.motion_key( points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j); BoundBox prev_bounds = BoundBox::empty; point.bounds_grow(prev_key, prev_bounds); /* Create all primitive time steps, */ for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) { const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1; float4 curr_key = point.motion_key( points_data, radius_data, point_steps, num_points, num_steps, curr_time, j); BoundBox curr_bounds = BoundBox::empty; point.bounds_grow(curr_key, curr_bounds); BoundBox bounds = prev_bounds; bounds.grow(curr_bounds); if (bounds.valid()) { const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1; references.push_back( BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT, prev_time, curr_time)); root.grow(bounds); center.grow(bounds.center2()); } /* Current time boundbox becomes previous one for the * next time step. */ prev_bounds = curr_bounds; } } } } void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int object_index) { if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { Mesh *mesh = static_cast(geom); add_reference_triangles(root, center, mesh, object_index); } else if (geom->geometry_type == Geometry::HAIR) { Hair *hair = static_cast(geom); add_reference_curves(root, center, hair, object_index); } else if (geom->geometry_type == Geometry::POINTCLOUD) { PointCloud *pointcloud = static_cast(geom); add_reference_points(root, center, pointcloud, object_index); } } void BVHBuild::add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i) { references.push_back(BVHReference(ob->bounds, -1, i, 0)); root.grow(ob->bounds); center.grow(ob->bounds.center2()); } static size_t count_curve_segments(Hair *hair) { size_t num = 0, num_curves = hair->num_curves(); for (size_t i = 0; i < num_curves; i++) num += hair->get_curve(i).num_keys - 1; return num; } static size_t count_primitives(Geometry *geom) { if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { Mesh *mesh = static_cast(geom); return mesh->num_triangles(); } else if (geom->geometry_type == Geometry::HAIR) { Hair *hair = static_cast(geom); return count_curve_segments(hair); } else if (geom->geometry_type == Geometry::POINTCLOUD) { PointCloud *pointcloud = static_cast(geom); return pointcloud->num_points(); } return 0; } void BVHBuild::add_references(BVHRange &root) { /* reserve space for references */ size_t num_alloc_references = 0; foreach (Object *ob, objects) { if (params.top_level) { if (!ob->is_traceable()) { continue; } if (!ob->get_geometry()->is_instanced()) { num_alloc_references += count_primitives(ob->get_geometry()); } else { num_alloc_references++; } } else { num_alloc_references += count_primitives(ob->get_geometry()); } } references.reserve(num_alloc_references); /* add references from objects */ BoundBox bounds = BoundBox::empty, center = BoundBox::empty; int i = 0; foreach (Object *ob, objects) { if (params.top_level) { if (!ob->is_traceable()) { ++i; continue; } if (!ob->get_geometry()->is_instanced()) add_reference_geometry(bounds, center, ob->get_geometry(), i); else add_reference_object(bounds, center, ob, i); } else add_reference_geometry(bounds, center, ob->get_geometry(), i); i++; if (progress.get_cancel()) return; } /* happens mostly on empty meshes */ if (!bounds.valid()) bounds.grow(zero_float3()); root = BVHRange(bounds, center, 0, references.size()); } /* Build */ BVHNode *BVHBuild::run() { BVHRange root; /* add references */ add_references(root); if (progress.get_cancel()) return NULL; /* init spatial splits */ if (params.top_level) { /* NOTE: Technically it is supported by the builder but it's not really * optimized for speed yet and not really clear yet if it has measurable * improvement on render time. Needs some extra investigation before * enabling spatial split for top level BVH. */ params.use_spatial_split = false; } spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha; spatial_free_index = 0; need_prim_time = params.use_motion_steps(); /* init progress updates */ double build_start_time; build_start_time = progress_start_time = time_dt(); progress_count = 0; progress_total = references.size(); progress_original_total = progress_total; prim_type.resize(references.size()); prim_index.resize(references.size()); prim_object.resize(references.size()); if (need_prim_time) { prim_time.resize(references.size()); } else { prim_time.resize(0); } /* build recursively */ BVHNode *rootnode; if (params.use_spatial_split) { /* Perform multithreaded spatial split build. */ BVHSpatialStorage *local_storage = &spatial_storage.local(); rootnode = build_node(root, references, 0, local_storage); task_pool.wait_work(); } else { /* Perform multithreaded binning build. */ BVHObjectBinning rootbin(root, (references.size()) ? &references[0] : NULL); rootnode = build_node(rootbin, 0); task_pool.wait_work(); } /* clean up temporary memory usage by threads */ spatial_storage.clear(); /* delete if we canceled */ if (rootnode) { if (progress.get_cancel()) { rootnode->deleteSubtree(); rootnode = NULL; VLOG(1) << "BVH build cancelled."; } else { /*rotate(rootnode, 4, 5);*/ rootnode->update_visibility(); rootnode->update_time(); } if (rootnode != NULL) { VLOG(1) << "BVH build statistics:\n" << " Build time: " << time_dt() - build_start_time << "\n" << " Total number of nodes: " << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n" << " Number of inner nodes: " << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n" << " Number of leaf nodes: " << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n" << " Number of unaligned nodes: " << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT)) << "\n" << " Allocation slop factor: " << ((prim_type.capacity() != 0) ? (float)prim_type.size() / prim_type.capacity() : 1.0f) << "\n" << " Maximum depth: " << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH)) << "\n"; } } return rootnode; } void BVHBuild::progress_update() { if (time_dt() - progress_start_time < 0.25) return; double progress_start = (double)progress_count / (double)progress_total; double duplicates = (double)(progress_total - progress_original_total) / (double)progress_total; string msg = string_printf( "Building BVH %.0f%%, duplicates %.0f%%", progress_start * 100.0, duplicates * 100.0); progress.set_substatus(msg); progress_start_time = time_dt(); } void BVHBuild::thread_build_node(InnerNode *inner, int child, const BVHObjectBinning &range, int level) { if (progress.get_cancel()) return; /* build nodes */ BVHNode *node = build_node(range, level); /* set child in inner node */ inner->children[child] = node; /* update progress */ if (range.size() < THREAD_TASK_SIZE) { /*rotate(node, INT_MAX, 5);*/ thread_scoped_lock lock(build_mutex); progress_count += range.size(); progress_update(); } } void BVHBuild::thread_build_spatial_split_node(InnerNode *inner, int child, const BVHRange &range, vector &references, int level) { if (progress.get_cancel()) { return; } /* Get per-thread memory for spatial split. */ BVHSpatialStorage *local_storage = &spatial_storage.local(); /* build nodes */ BVHNode *node = build_node(range, references, level, local_storage); /* set child in inner node */ inner->children[child] = node; } bool BVHBuild::range_within_max_leaf_size(const BVHRange &range, const vector &references) const { size_t size = range.size(); size_t max_leaf_size = max(max(params.max_triangle_leaf_size, params.max_curve_leaf_size), params.max_point_leaf_size); if (size > max_leaf_size) return false; size_t num_triangles = 0; size_t num_motion_triangles = 0; size_t num_curves = 0; size_t num_motion_curves = 0; size_t num_points = 0; size_t num_motion_points = 0; for (int i = 0; i < size; i++) { const BVHReference &ref = references[range.start() + i]; if (ref.prim_type() & PRIMITIVE_CURVE) { if (ref.prim_type() & PRIMITIVE_MOTION) { num_motion_curves++; } else { num_curves++; } } else if (ref.prim_type() & PRIMITIVE_TRIANGLE) { if (ref.prim_type() & PRIMITIVE_MOTION) { num_motion_triangles++; } else { num_triangles++; } } else if (ref.prim_type() & PRIMITIVE_POINT) { if (ref.prim_type() & PRIMITIVE_MOTION) { num_motion_points++; } else { num_points++; } } } return (num_triangles <= params.max_triangle_leaf_size) && (num_motion_triangles <= params.max_motion_triangle_leaf_size) && (num_curves <= params.max_curve_leaf_size) && (num_motion_curves <= params.max_motion_curve_leaf_size) && (num_points <= params.max_point_leaf_size) && (num_motion_points <= params.max_motion_point_leaf_size); } /* multithreaded binning builder */ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level) { size_t size = range.size(); float leafSAH = params.sah_primitive_cost * range.leafSAH; float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH; /* Have at least one inner node on top level, for performance and correct * visibility tests, since object instances do not check visibility flag. */ if (!(range.size() > 0 && params.top_level && level == 0)) { /* Make leaf node when threshold reached or SAH tells us. */ if ((params.small_enough_for_leaf(size, level)) || (range_within_max_leaf_size(range, references) && leafSAH < splitSAH)) { return create_leaf_node(range, references); } } BVHObjectBinning unaligned_range; float unalignedSplitSAH = FLT_MAX; float unalignedLeafSAH = FLT_MAX; Transform aligned_space; bool do_unalinged_split = false; if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) { aligned_space = unaligned_heuristic.compute_aligned_space(range, &references[0]); unaligned_range = BVHObjectBinning( range, &references[0], &unaligned_heuristic, &aligned_space); unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() + params.sah_primitive_cost * unaligned_range.splitSAH; unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH; if (!(range.size() > 0 && params.top_level && level == 0)) { if (unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH && range_within_max_leaf_size(range, references)) { return create_leaf_node(range, references); } } /* Check whether unaligned split is better than the regular one. */ if (unalignedSplitSAH < splitSAH) { do_unalinged_split = true; } } /* Perform split. */ BVHObjectBinning left, right; if (do_unalinged_split) { unaligned_range.split(&references[0], left, right); } else { range.split(&references[0], left, right); } BoundBox bounds; if (do_unalinged_split) { bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references[0], aligned_space); } else { bounds = range.bounds(); } /* Create inner node. */ InnerNode *inner; if (range.size() < THREAD_TASK_SIZE) { /* local build */ BVHNode *leftnode = build_node(left, level + 1); BVHNode *rightnode = build_node(right, level + 1); inner = new InnerNode(bounds, leftnode, rightnode); } else { /* Threaded build */ inner = new InnerNode(bounds); task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); }); task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); }); } if (do_unalinged_split) { inner->set_aligned_space(aligned_space); } return inner; } /* multithreaded spatial split builder */ BVHNode *BVHBuild::build_node(const BVHRange &range, vector &references, int level, BVHSpatialStorage *storage) { /* Update progress. * * TODO(sergey): Currently it matches old behavior, but we can move it to the * task thread (which will mimic non=split builder) and save some CPU ticks * on checking cancel status. */ progress_update(); if (progress.get_cancel()) { return NULL; } /* Small enough or too deep => create leaf. */ if (!(range.size() > 0 && params.top_level && level == 0)) { if (params.small_enough_for_leaf(range.size(), level)) { progress_count += range.size(); return create_leaf_node(range, references); } } /* Perform splitting test. */ BVHMixedSplit split(this, storage, range, references, level); if (!(range.size() > 0 && params.top_level && level == 0)) { if (split.no_split) { progress_count += range.size(); return create_leaf_node(range, references); } } float leafSAH = params.sah_primitive_cost * split.leafSAH; float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * split.nodeSAH; BVHMixedSplit unaligned_split; float unalignedSplitSAH = FLT_MAX; /* float unalignedLeafSAH = FLT_MAX; */ Transform aligned_space; bool do_unalinged_split = false; if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) { aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0)); unaligned_split = BVHMixedSplit( this, storage, range, references, level, &unaligned_heuristic, &aligned_space); /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */ unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() + params.sah_primitive_cost * unaligned_split.nodeSAH; /* TOOD(sergey): Check we can create leaf already. */ /* Check whether unaligned split is better than the regular one. */ if (unalignedSplitSAH < splitSAH) { do_unalinged_split = true; } } /* Do split. */ BVHRange left, right; if (do_unalinged_split) { unaligned_split.split(this, left, right, range); } else { split.split(this, left, right, range); } progress_total += left.size() + right.size() - range.size(); BoundBox bounds; if (do_unalinged_split) { bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space); } else { bounds = range.bounds(); } /* Create inner node. */ InnerNode *inner; if (range.size() < THREAD_TASK_SIZE) { /* Local build. */ /* Build left node. */ vector right_references(references.begin() + right.start(), references.begin() + right.end()); right.set_start(0); BVHNode *leftnode = build_node(left, references, level + 1, storage); /* Build right node. */ BVHNode *rightnode = build_node(right, right_references, level + 1, storage); inner = new InnerNode(bounds, leftnode, rightnode); } else { /* Threaded build. */ inner = new InnerNode(bounds); vector left_references(references.begin() + left.start(), references.begin() + left.end()); vector right_references(references.begin() + right.start(), references.begin() + right.end()); right.set_start(0); /* Create tasks for left and right nodes, using copy for most arguments and * move for reference to avoid memory copies. */ task_pool.push([=, refs = std::move(left_references)]() mutable { thread_build_spatial_split_node(inner, 0, left, refs, level + 1); }); task_pool.push([=, refs = std::move(right_references)]() mutable { thread_build_spatial_split_node(inner, 1, right, refs, level + 1); }); } if (do_unalinged_split) { inner->set_aligned_space(aligned_space); } return inner; } /* Create Nodes */ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num) { if (num == 0) { BoundBox bounds = BoundBox::empty; return new LeafNode(bounds, 0, 0, 0); } else if (num == 1) { assert(start < prim_type.size()); prim_type[start] = ref->prim_type(); prim_index[start] = ref->prim_index(); prim_object[start] = ref->prim_object(); if (need_prim_time) { prim_time[start] = make_float2(ref->time_from(), ref->time_to()); } const uint visibility = objects[ref->prim_object()]->visibility_for_tracing(); BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start + 1); leaf_node->time_from = ref->time_from(); leaf_node->time_to = ref->time_to(); return leaf_node; } else { int mid = num / 2; BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid); BVHNode *leaf1 = create_object_leaf_nodes(ref + mid, start + mid, num - mid); BoundBox bounds = BoundBox::empty; bounds.grow(leaf0->bounds); bounds.grow(leaf1->bounds); BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1); inner_node->time_from = min(leaf0->time_from, leaf1->time_from); inner_node->time_to = max(leaf0->time_to, leaf1->time_to); return inner_node; } } BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector &references) { /* This is a bit overallocating here (considering leaf size into account), * but chunk-based re-allocation in vector makes it difficult to use small * size of stack storage here. Some tweaks are possible tho. * * NOTES: * - If the size is too big, we'll have inefficient stack usage, * and lots of cache misses. * - If the size is too small, then we can run out of memory * allowed to be used by vector. * In practice it wouldn't mean crash, just allocator will fallback * to heap which is slower. * - Optimistic re-allocation in STL could jump us out of stack usage * because re-allocation happens in chunks and size of those chunks we * can not control. */ typedef StackAllocator<256, int> LeafStackAllocator; typedef StackAllocator<256, float2> LeafTimeStackAllocator; typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator; vector p_type[PRIMITIVE_NUM]; vector p_index[PRIMITIVE_NUM]; vector p_object[PRIMITIVE_NUM]; vector p_time[PRIMITIVE_NUM]; vector p_ref[PRIMITIVE_NUM]; /* TODO(sergey): In theory we should be able to store references. */ vector object_references; uint visibility[PRIMITIVE_NUM] = {0}; /* NOTE: Keep initialization in sync with actual number of primitives. */ BoundBox bounds[PRIMITIVE_NUM] = { BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty}; int ob_num = 0; int num_new_prims = 0; /* Fill in per-type type/index array. */ for (int i = 0; i < range.size(); i++) { const BVHReference &ref = references[range.start() + i]; if (ref.prim_index() != -1) { uint32_t type_index = PRIMITIVE_INDEX(ref.prim_type() & PRIMITIVE_ALL); p_ref[type_index].push_back(ref); p_type[type_index].push_back(ref.prim_type()); p_index[type_index].push_back(ref.prim_index()); p_object[type_index].push_back(ref.prim_object()); p_time[type_index].push_back(make_float2(ref.time_from(), ref.time_to())); bounds[type_index].grow(ref.bounds()); visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing(); ++num_new_prims; } else { object_references.push_back(ref); ++ob_num; } } /* Create leaf nodes for every existing primitive. * * Here we write primitive types, indices and objects to a temporary array. * This way we keep all the heavy memory allocation code outside of the * thread lock in the case of spatial split building. * * TODO(sergey): With some pointer trickery we can write directly to the * destination buffers for the non-spatial split BVH. */ BVHNode *leaves[PRIMITIVE_NUM + 1] = {NULL}; int num_leaves = 0; size_t start_index = 0; vector local_prim_type, local_prim_index, local_prim_object; vector local_prim_time; local_prim_type.resize(num_new_prims); local_prim_index.resize(num_new_prims); local_prim_object.resize(num_new_prims); if (need_prim_time) { local_prim_time.resize(num_new_prims); } for (int i = 0; i < PRIMITIVE_NUM; ++i) { int num = (int)p_type[i].size(); if (num != 0) { assert(p_type[i].size() == p_index[i].size()); assert(p_type[i].size() == p_object[i].size()); Transform aligned_space; bool alignment_found = false; for (int j = 0; j < num; ++j) { const int index = start_index + j; local_prim_type[index] = p_type[i][j]; local_prim_index[index] = p_index[i][j]; local_prim_object[index] = p_object[i][j]; if (need_prim_time) { local_prim_time[index] = p_time[i][j]; } if (params.use_unaligned_nodes && !alignment_found) { alignment_found = unaligned_heuristic.compute_aligned_space(p_ref[i][j], &aligned_space); } } LeafNode *leaf_node = new LeafNode(bounds[i], visibility[i], start_index, start_index + num); if (true) { float time_from = 1.0f, time_to = 0.0f; for (int j = 0; j < num; ++j) { const BVHReference &ref = p_ref[i][j]; time_from = min(time_from, ref.time_from()); time_to = max(time_to, ref.time_to()); } leaf_node->time_from = time_from; leaf_node->time_to = time_to; } if (alignment_found) { /* Need to recalculate leaf bounds with new alignment. */ leaf_node->bounds = BoundBox::empty; for (int j = 0; j < num; ++j) { const BVHReference &ref = p_ref[i][j]; BoundBox ref_bounds = unaligned_heuristic.compute_aligned_prim_boundbox(ref, aligned_space); leaf_node->bounds.grow(ref_bounds); } /* Set alignment space. */ leaf_node->set_aligned_space(aligned_space); } leaves[num_leaves++] = leaf_node; start_index += num; } } /* Get size of new data to be copied to the packed arrays. */ const int num_new_leaf_data = start_index; const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data; /* Copy actual data to the packed array. */ if (params.use_spatial_split) { spatial_spin_lock.lock(); /* We use first free index in the packed arrays and mode pointer to the * end of the current range. * * This doesn't give deterministic packed arrays, but it shouldn't really * matter because order of children in BVH is deterministic. */ start_index = spatial_free_index; spatial_free_index += range.size(); /* Extend an array when needed. */ const size_t range_end = start_index + range.size(); if (prim_type.size() < range_end) { /* Avoid extra re-allocations by pre-allocating bigger array in an * advance. */ if (range_end >= prim_type.capacity()) { float progress = (float)progress_count / (float)progress_total; float factor = (1.0f - progress); const size_t reserve = (size_t)(range_end + (float)range_end * factor); prim_type.reserve(reserve); prim_index.reserve(reserve); prim_object.reserve(reserve); if (need_prim_time) { prim_time.reserve(reserve); } } prim_type.resize(range_end); prim_index.resize(range_end); prim_object.resize(range_end); if (need_prim_time) { prim_time.resize(range_end); } } /* Perform actual data copy. */ if (new_leaf_data_size > 0) { memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size); memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size); memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size); if (need_prim_time) { memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data); } } spatial_spin_lock.unlock(); } else { /* For the regular BVH builder we simply copy new data starting at the * range start. This is totally thread-safe, all threads are living * inside of their own range. */ start_index = range.start(); if (new_leaf_data_size > 0) { memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size); memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size); memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size); if (need_prim_time) { memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data); } } } /* So far leaves were created with the zero-based index in an arrays, * here we modify the indices to correspond to actual packed array start * index. */ for (int i = 0; i < num_leaves; ++i) { LeafNode *leaf = (LeafNode *)leaves[i]; leaf->lo += start_index; leaf->hi += start_index; } /* Create leaf node for object. */ if (num_leaves == 0 || ob_num) { /* Only create object leaf nodes if there are objects or no other * nodes created. */ const BVHReference *ref = (ob_num) ? &object_references[0] : NULL; leaves[num_leaves] = create_object_leaf_nodes(ref, start_index + num_new_leaf_data, ob_num); ++num_leaves; } /* TODO(sergey): Need to take care of alignment when number of leaves * is more than 1. */ if (num_leaves == 1) { /* Simplest case: single leaf, just return it. * In all the rest cases we'll be creating intermediate inner node with * an appropriate bounding box. */ return leaves[0]; } else if (num_leaves == 2) { return new InnerNode(range.bounds(), leaves[0], leaves[1]); } else if (num_leaves == 3) { BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds); BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]); return new InnerNode(range.bounds(), leaves[0], inner); } else { /* Should be doing more branches if more primitive types added. */ assert(num_leaves <= 5); BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds); BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds); BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]); BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]); BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds); BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b); if (num_leaves == 5) { return new InnerNode(range.bounds(), inner_c, leaves[4]); } return inner_c; } #undef MAX_ITEMS_PER_LEAF } /* Tree Rotations */ void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations) { /* in tested scenes, this resulted in slightly slower raytracing, so disabled * it for now. could be implementation bug, or depend on the scene */ if (node) for (int i = 0; i < iterations; i++) rotate(node, max_depth); } void BVHBuild::rotate(BVHNode *node, int max_depth) { /* nothing to rotate if we reached a leaf node. */ if (node->is_leaf() || max_depth < 0) return; InnerNode *parent = (InnerNode *)node; /* rotate all children first */ for (size_t c = 0; c < 2; c++) rotate(parent->children[c], max_depth - 1); /* compute current area of all children */ BoundBox bounds0 = parent->children[0]->bounds; BoundBox bounds1 = parent->children[1]->bounds; float area0 = bounds0.half_area(); float area1 = bounds1.half_area(); float4 child_area = make_float4(area0, area1, 0.0f, 0.0f); /* find best rotation. we pick a target child of a first child, and swap * this with an other child. we perform the best such swap. */ float best_cost = FLT_MAX; int best_child = -1, best_target = -1, best_other = -1; for (size_t c = 0; c < 2; c++) { /* ignore leaf nodes as we cannot descent into */ if (parent->children[c]->is_leaf()) continue; InnerNode *child = (InnerNode *)parent->children[c]; BoundBox &other = (c == 0) ? bounds1 : bounds0; /* transpose child bounds */ BoundBox target0 = child->children[0]->bounds; BoundBox target1 = child->children[1]->bounds; /* compute cost for both possible swaps */ float cost0 = merge(other, target1).half_area() - child_area[c]; float cost1 = merge(target0, other).half_area() - child_area[c]; if (min(cost0, cost1) < best_cost) { best_child = (int)c; best_other = (int)(1 - c); if (cost0 < cost1) { best_cost = cost0; best_target = 0; } else { best_cost = cost0; best_target = 1; } } } /* if we did not find a swap that improves the SAH then do nothing */ if (best_cost >= 0) return; assert(best_child == 0 || best_child == 1); assert(best_target != -1); /* perform the best found tree rotation */ InnerNode *child = (InnerNode *)parent->children[best_child]; swap(parent->children[best_other], child->children[best_target]); child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds); } CCL_NAMESPACE_END