diff options
Diffstat (limited to 'intern')
42 files changed, 2313 insertions, 881 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index cb99ea3b499..35f97bf629f 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -85,10 +85,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): description="Leave out caustics, resulting in a darker image with less noise", default=False, ) - cls.blur_caustics = FloatProperty( - name="Blur Caustics", - description="Blur caustics to reduce noise", - min=0.0, max=1.0, + cls.blur_glossy = FloatProperty( + name="Filter Glossy", + description="Adaptively blur glossy shaders after blurry bounces, to reduce noise at the cost of accuracy", + min=0.0, max=10.0, default=0.0, ) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 624d00b377d..0ed08589327 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -87,11 +87,11 @@ class CyclesRender_PT_integrator(CyclesButtonsPanel, Panel): sub.prop(cscene, "diffuse_bounces", text="Diffuse") sub.prop(cscene, "glossy_bounces", text="Glossy") sub.prop(cscene, "transmission_bounces", text="Transmission") - sub.prop(cscene, "no_caustics") - #row = col.row() - #row.prop(cscene, "blur_caustics") - #row.active = not cscene.no_caustics + col.separator() + + col.prop(cscene, "no_caustics") + col.prop(cscene, "blur_glossy") class CyclesRender_PT_film(CyclesButtonsPanel, Panel): @@ -178,10 +178,7 @@ class CyclesRender_PT_layers(CyclesButtonsPanel, Panel): col = split.column() col.prop(scene, "layers", text="Scene") - col.label(text="Material:") - col.prop(rl, "material_override", text="") - - col.prop(rl, "use_sky", "Use Environment") + col.prop(rl, "layers_exclude", text="Exclude") col = split.column() col.prop(rl, "layers", text="Layer") @@ -191,6 +188,16 @@ class CyclesRender_PT_layers(CyclesButtonsPanel, Panel): split = layout.split() col = split.column() + col.label(text="Material:") + col.prop(rl, "material_override", text="") + + col = split.column() + col.prop(rl, "samples") + col.prop(rl, "use_sky", "Use Environment") + + split = layout.split() + + col = split.column() col.label(text="Passes:") col.prop(rl, "use_pass_combined") col.prop(rl, "use_pass_z") diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index dc6c69e2904..5ece7aa26e2 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -218,12 +218,13 @@ void BlenderSession::render() scene->film->passes = passes; scene->film->tag_update(scene); - /* update session */ - session->reset(buffer_params, session_params.samples); - /* update scene */ sync->sync_data(b_v3d, b_iter->name().c_str()); + /* update session */ + int samples = sync->get_layer_samples(); + session->reset(buffer_params, (samples == 0)? session_params.samples: samples); + /* render */ session->start(); session->wait(); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 5a286298774..41cd200d003 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -153,6 +153,8 @@ void BlenderSync::sync_integrator() integrator->transparent_shadows = get_boolean(cscene, "use_transparent_shadows"); integrator->no_caustics = get_boolean(cscene, "no_caustics"); + integrator->filter_glossy = get_float(cscene, "blur_glossy"); + integrator->seed = get_int(cscene, "seed"); integrator->layer_flag = render_layer.layer; @@ -208,6 +210,7 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer) render_layer.holdout_layer = 0; render_layer.material_override = PointerRNA_NULL; render_layer.use_background = true; + render_layer.samples = 0; return; } } @@ -220,12 +223,13 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer) for(r.layers.begin(b_rlay); b_rlay != r.layers.end(); ++b_rlay) { if((!layer && first_layer) || (layer && b_rlay->name() == layer)) { render_layer.name = b_rlay->name(); - render_layer.scene_layer = get_layer(b_scene.layers()); + render_layer.scene_layer = get_layer(b_scene.layers()) & ~get_layer(b_rlay->layers_exclude()); render_layer.layer = get_layer(b_rlay->layers()); render_layer.holdout_layer = get_layer(b_rlay->layers_zmask()); render_layer.layer |= render_layer.holdout_layer; render_layer.material_override = b_rlay->material_override(); render_layer.use_background = b_rlay->use_sky(); + render_layer.samples = b_rlay->samples(); } first_layer = false; diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index d2550a1ffd7..ab8e4bd8d00 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -57,6 +57,7 @@ public: void sync_data(BL::SpaceView3D b_v3d, const char *layer = 0); void sync_camera(BL::Object b_override, int width, int height); void sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int width, int height); + int get_layer_samples() { return render_layer.samples; } /* get parameters */ static SceneParams get_scene_params(BL::Scene b_scene, bool background); @@ -108,7 +109,8 @@ private: RenderLayerInfo() : scene_layer(0), layer(0), holdout_layer(0), material_override(PointerRNA_NULL), - use_background(true) + use_background(true), + samples(0) {} string name; @@ -117,6 +119,7 @@ private: uint holdout_layer; BL::Material material_override; bool use_background; + int samples; } render_layer; }; diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt index decc576fe51..131a7a1f750 100644 --- a/intern/cycles/bvh/CMakeLists.txt +++ b/intern/cycles/bvh/CMakeLists.txt @@ -10,17 +10,21 @@ set(INC set(SRC bvh.cpp + bvh_binning.cpp bvh_build.cpp bvh_node.cpp bvh_sort.cpp + bvh_split.cpp ) set(SRC_HEADERS bvh.h + bvh_binning.h bvh_build.h bvh_node.h bvh_params.h bvh_sort.h + bvh_split.h ) include_directories(${INC}) diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index c9bfa964332..15695dddf45 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -530,7 +530,7 @@ void RegularBVH::refit_nodes() { assert(!params.top_level); - BoundBox bbox; + BoundBox bbox = BoundBox::empty; uint visibility = 0; refit_node(0, (pack.is_leaf[0])? true: false, bbox, visibility); } @@ -572,7 +572,7 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility } else { /* refit inner node, set bbox from children */ - BoundBox bbox0, bbox1; + BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty; uint visibility0 = 0, visibility1 = 0; refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0); diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp new file mode 100644 index 00000000000..661541a8d23 --- /dev/null +++ b/intern/cycles/bvh/bvh_binning.cpp @@ -0,0 +1,223 @@ +/* + * Adapted from code copyright 2009-2011 Intel Corporation + * Modifications Copyright 2012, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//#define __KERNEL_SSE__ + +#include <stdlib.h> + +#include "bvh_binning.h" + +#include "util_algorithm.h" +#include "util_boundbox.h" +#include "util_types.h" + +CCL_NAMESPACE_BEGIN + +/* SSE replacements */ + +__forceinline void prefetch_L1 (const void* ptr) { } +__forceinline void prefetch_L2 (const void* ptr) { } +__forceinline void prefetch_L3 (const void* ptr) { } +__forceinline void prefetch_NTA(const void* ptr) { } + +template<size_t src> __forceinline float extract(const int4& b) +{ return b[src]; } +template<size_t dst> __forceinline const float4 insert(const float4& a, const float b) +{ float4 r = a; r[dst] = b; return r; } + +__forceinline int get_best_dimension(const float4& bestSAH) +{ + // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH)); + + float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z)); + + if(bestSAH.x == minSAH) return 0; + else if(bestSAH.y == minSAH) return 1; + else return 2; +} + +/* BVH Object Binning */ + +BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims) +: BVHRange(job), splitSAH(FLT_MAX), dim(0), pos(0) +{ + /* compute number of bins to use and precompute scaling factor for binning */ + num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size())); + scale = rcp(cent_bounds().size()) * make_float3((float)num_bins); + + /* initialize binning counter and bounds */ + BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */ + int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */ + + for(size_t i = 0; i < num_bins; i++) { + bin_count[i] = make_int4(0); + bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty; + } + + /* map geometry to bins, unrolled once */ + { + ssize_t i; + + for(i = 0; i < ssize_t(size()) - 1; i += 2) { + prefetch_L2(&prims[start() + i + 8]); + + /* map even and odd primitive to bin */ + BVHReference prim0 = prims[start() + i + 0]; + BVHReference prim1 = prims[start() + i + 1]; + + int4 bin0 = get_bin(prim0.bounds()); + int4 bin1 = get_bin(prim1.bounds()); + + /* increase bounds for bins for even primitive */ + int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); + int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); + int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); + + /* increase bounds of bins for odd primitive */ + int b10 = extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds()); + int b11 = extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds()); + int b12 = extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds()); + } + + /* for uneven number of primitives */ + if(i < ssize_t(size())) { + /* map primitive to bin */ + BVHReference prim0 = prims[start() + i]; + int4 bin0 = get_bin(prim0.bounds()); + + /* increase bounds of bins */ + int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); + int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); + int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); + } + } + + /* sweep from right to left and compute parallel prefix of merged bounds */ + float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */ + float4 r_count[MAX_BINS]; /* number of primitives on the right */ + int4 count = make_int4(0); + + BoundBox bx = BoundBox::empty; + BoundBox by = BoundBox::empty; + BoundBox bz = BoundBox::empty; + + for(size_t i = num_bins - 1; i > 0; i--) { + count = count + bin_count[i]; + r_count[i] = blocks(count); + + bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area(); + by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area(); + bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area(); + } + + /* sweep from left to right and compute SAH */ + int4 ii = make_int4(1); + float4 bestSAH = make_float4(FLT_MAX); + int4 bestSplit = make_int4(-1); + + count = make_int4(0); + + bx = BoundBox::empty; + by = BoundBox::empty; + bz = BoundBox::empty; + + for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) { + count = count + bin_count[i-1]; + + bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area(); + by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area(); + bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area(); + + float4 lCount = blocks(count); + float4 lArea = make_float4(Ax,Ay,Az,Az); + float4 sah = lArea*lCount + r_area[i]*r_count[i]; + + bestSplit = select(sah < bestSAH,ii,bestSplit); + bestSAH = min(sah,bestSAH); + } + + int4 mask = float3_to_float4(cent_bounds().size()) <= make_float4(0.0f); + bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX); + + /* find best dimension */ + dim = get_best_dimension(bestSAH); + splitSAH = bestSAH[dim]; + pos = bestSplit[dim]; + leafSAH = bounds().half_area() * blocks(size()); +} + +void BVHObjectBinning::split(BVHReference* prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const +{ + size_t N = size(); + + BoundBox lgeom_bounds = BoundBox::empty; + BoundBox rgeom_bounds = BoundBox::empty; + BoundBox lcent_bounds = BoundBox::empty; + BoundBox rcent_bounds = BoundBox::empty; + + ssize_t l = 0, r = N-1; + + while(l <= r) { + prefetch_L2(&prims[start() + l + 8]); + prefetch_L2(&prims[start() + r - 8]); + + BVHReference prim = prims[start() + l]; + float3 center = prim.bounds().center2(); + + if(get_bin(center)[dim] < pos) { + lgeom_bounds.grow(prim.bounds()); + lcent_bounds.grow(center); + l++; + } + else { + rgeom_bounds.grow(prim.bounds()); + rcent_bounds.grow(center); + swap(prims[start()+l],prims[start()+r]); + r--; + } + } + + /* finish */ + if(l != 0 && N-1-r != 0) { + right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims); + left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims); + return; + } + + /* object medium split if we did not make progress, can happen when all + primitives have same centroid */ + lgeom_bounds = BoundBox::empty; + rgeom_bounds = BoundBox::empty; + lcent_bounds = BoundBox::empty; + rcent_bounds = BoundBox::empty; + + for(size_t i = 0; i < N/2; i++) { + lgeom_bounds.grow(prims[start()+i].bounds()); + lcent_bounds.grow(prims[start()+i].bounds().center2()); + } + + for(size_t i = N/2; i < N; i++) { + rgeom_bounds.grow(prims[start()+i].bounds()); + rcent_bounds.grow(prims[start()+i].bounds().center2()); + } + + right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims); + left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/bvh/bvh_binning.h b/intern/cycles/bvh/bvh_binning.h new file mode 100644 index 00000000000..60742157055 --- /dev/null +++ b/intern/cycles/bvh/bvh_binning.h @@ -0,0 +1,86 @@ +/* + * Adapted from code copyright 2009-2011 Intel Corporation + * Modifications Copyright 2012, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BVH_BINNING_H__ +#define __BVH_BINNING_H__ + +#include "bvh_params.h" + +#include "util_types.h" + +CCL_NAMESPACE_BEGIN + +/* Single threaded object binner. Finds the split with the best SAH heuristic + * by testing for each dimension multiple partitionings for regular spaced + * partition locations. A partitioning for a partition location is computed, + * by putting primitives whose centroid is on the left and right of the split + * location to different sets. The SAH is evaluated by computing the number of + * blocks occupied by the primitives in the partitions. */ + +class BVHObjectBinning : public BVHRange +{ +public: + __forceinline BVHObjectBinning() {} + BVHObjectBinning(const BVHRange& job, BVHReference *prims); + + void split(BVHReference *prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const; + + float splitSAH; /* SAH cost of the best split */ + float leafSAH; /* SAH cost of creating a leaf */ + +protected: + int dim; /* best split dimension */ + int pos; /* best split position */ + size_t num_bins; /* actual number of bins to use */ + float3 scale; /* scaling factor to compute bin */ + + enum { MAX_BINS = 32 }; + enum { LOG_BLOCK_SIZE = 2 }; + + /* computes the bin numbers for each dimension for a box. */ + __forceinline int4 get_bin(const BoundBox& box) const + { + int4 a = make_int4((box.center2() - cent_bounds().min)*scale - make_float3(0.5f)); + int4 mn = make_int4(0); + int4 mx = make_int4((int)num_bins-1); + + return clamp(a, mn, mx); + } + + /* computes the bin numbers for each dimension for a point. */ + __forceinline int4 get_bin(const float3& c) const + { + return make_int4((c - cent_bounds().min)*scale - make_float3(0.5f)); + } + + /* compute the number of blocks occupied for each dimension. */ + __forceinline float4 blocks(const int4& a) const + { + return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE); + } + + /* compute the number of blocks occupied in one dimension. */ + __forceinline int blocks(size_t a) const + { + return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE); + } +}; + +CCL_NAMESPACE_END + +#endif + diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index 38674c2c561..c5b4f1d01ae 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -15,22 +15,36 @@ * limitations under the License. */ +#include "bvh_binning.h" #include "bvh_build.h" #include "bvh_node.h" #include "bvh_params.h" -#include "bvh_sort.h" +#include "bvh_split.h" #include "mesh.h" #include "object.h" #include "scene.h" -#include "util_algorithm.h" +#include "util_debug.h" #include "util_foreach.h" #include "util_progress.h" #include "util_time.h" CCL_NAMESPACE_BEGIN +/* BVH Build Task */ + +class BVHBuildTask : public Task { +public: + BVHBuildTask(InnerNode *node_, int child_, BVHObjectBinning& range_, int level_) + : node(node_), child(child_), level(level_), range(range_) {} + + InnerNode *node; + int child; + int level; + BVHObjectBinning range; +}; + /* Constructor / Destructor */ BVHBuild::BVHBuild(const vector<Object*>& objects_, @@ -41,10 +55,10 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_, prim_object(prim_object_), params(params_), progress(progress_), - progress_start_time(0.0) + progress_start_time(0.0), + task_pool(function_bind(&BVHBuild::thread_build_node, this, _1, _2)) { spatial_min_overlap = 0.0f; - progress_num_duplicates = 0; } BVHBuild::~BVHBuild() @@ -53,57 +67,63 @@ BVHBuild::~BVHBuild() /* Adding References */ -void BVHBuild::add_reference_mesh(NodeSpec& root, Mesh *mesh, int i) +void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i) { for(uint j = 0; j < mesh->triangles.size(); j++) { Mesh::Triangle t = mesh->triangles[j]; - Reference ref; + BoundBox bounds = BoundBox::empty; for(int k = 0; k < 3; k++) { float3 pt = mesh->verts[t.v[k]]; - ref.bounds.grow(pt); + bounds.grow(pt); } - if(ref.bounds.valid()) { - ref.prim_index = j; - ref.prim_object = i; - - references.push_back(ref); - root.bounds.grow(ref.bounds); + if(bounds.valid()) { + references.push_back(BVHReference(bounds, j, i)); + root.grow(bounds); + center.grow(bounds.center2()); } } } -void BVHBuild::add_reference_object(NodeSpec& root, Object *ob, int i) +void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i) { - Reference ref; - - ref.prim_index = -1; - ref.prim_object = i; - ref.bounds = ob->bounds; - - references.push_back(ref); - root.bounds.grow(ref.bounds); + references.push_back(BVHReference(ob->bounds, -1, i)); + root.grow(ob->bounds); + center.grow(ob->bounds.center2()); } -void BVHBuild::add_references(NodeSpec& root) +void BVHBuild::add_references(BVHRange& root) { - /* init root spec */ - root.num = 0; - root.bounds = BoundBox(); + /* reserve space for references */ + size_t num_alloc_references = 0; + + foreach(Object *ob, objects) { + if(params.top_level) { + if(ob->mesh->transform_applied) + num_alloc_references += ob->mesh->triangles.size(); + else + num_alloc_references++; + } + else + num_alloc_references += ob->mesh->triangles.size(); + } + + references.reserve(num_alloc_references); - /* add objects */ + /* add references from objects */ + BoundBox bounds = BoundBox::empty, center = BoundBox::empty; int i = 0; foreach(Object *ob, objects) { if(params.top_level) { if(ob->mesh->transform_applied) - add_reference_mesh(root, ob->mesh, i); + add_reference_mesh(bounds, center, ob->mesh, i); else - add_reference_object(root, ob, i); + add_reference_object(bounds, center, ob, i); } else - add_reference_mesh(root, ob->mesh, i); + add_reference_mesh(bounds, center, ob->mesh, i); i++; @@ -111,129 +131,213 @@ void BVHBuild::add_references(NodeSpec& root) } /* happens mostly on empty meshes */ - if(!root.bounds.valid()) - root.bounds.grow(make_float3(0.0f, 0.0f, 0.0f)); + if(!bounds.valid()) + bounds.grow(make_float3(0.0f, 0.0f, 0.0f)); - root.num = references.size(); + root = BVHRange(bounds, center, 0, references.size()); } /* Build */ BVHNode* BVHBuild::run() { - NodeSpec root; + BVHRange root; /* add references */ add_references(root); - if(progress.get_cancel()) return NULL; + if(progress.get_cancel()) + return NULL; /* init spatial splits */ if(params.top_level) /* todo: get rid of this */ params.use_spatial_split = false; - spatial_min_overlap = root.bounds.area() * params.spatial_split_alpha; + spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha; spatial_right_bounds.clear(); - spatial_right_bounds.resize(max(root.num, (int)BVHParams::NUM_SPATIAL_BINS) - 1); + spatial_right_bounds.resize(max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1); /* init progress updates */ - progress_num_duplicates = 0; progress_start_time = time_dt(); + progress_count = 0; + progress_total = references.size(); + progress_original_total = progress_total; + + prim_index.resize(references.size()); + prim_object.resize(references.size()); /* build recursively */ - return build_node(root, 0, 0.0f, 1.0f); + BVHNode *rootnode; + + if(params.use_spatial_split) { + /* singlethreaded spatial split build */ + rootnode = build_node(root, 0); + } + else { + /* multithreaded binning build */ + BVHObjectBinning rootbin(root, &references[0]); + rootnode = build_node(rootbin, 0); + task_pool.wait(); + } + + /* delete if we cancelled */ + if(rootnode) { + if(progress.get_cancel()) { + rootnode->deleteSubtree(); + rootnode = NULL; + } + else if(!params.use_spatial_split) { + /*rotate(rootnode, 4, 5);*/ + rootnode->update_visibility(); + } + } + + return rootnode; } -void BVHBuild::progress_update(float progress_start, float progress_end) +void BVHBuild::progress_update() { if(time_dt() - progress_start_time < 0.25f) return; + + double progress_start = (double)progress_count/(double)progress_total; + double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total; - float duplicates = (float)progress_num_duplicates/(float)references.size(); string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%", progress_start*100.0f, duplicates*100.0f); progress.set_substatus(msg); - progress_start_time = time_dt(); + progress_start_time = time_dt(); } -BVHNode* BVHBuild::build_node(const NodeSpec& spec, int level, float progress_start, float progress_end) +void BVHBuild::thread_build_node(Task *task_, int thread_id) { - /* progress update */ - progress_update(progress_start, progress_end); - if(progress.get_cancel()) return NULL; + if(progress.get_cancel()) + return; - /* small enough or too deep => create leaf. */ - if(spec.num <= params.min_leaf_size || level >= BVHParams::MAX_DEPTH) - return create_leaf_node(spec); - - /* find split candidates. */ - float area = spec.bounds.area(); - float leafSAH = area * params.triangle_cost(spec.num); - float nodeSAH = area * params.node_cost(2); - ObjectSplit object = find_object_split(spec, nodeSAH); - SpatialSplit spatial; - - if(params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) { - BoundBox overlap = object.left_bounds; - overlap.intersect(object.right_bounds); - - if(overlap.area() >= spatial_min_overlap) - spatial = find_spatial_split(spec, nodeSAH); - } + /* build nodes */ + BVHBuildTask *task = (BVHBuildTask*)task_; + BVHNode *node = build_node(task->range, task->level); + + /* set child in inner node */ + task->node->children[task->child] = node; - /* leaf SAH is the lowest => create leaf. */ - float minSAH = min(min(leafSAH, object.sah), spatial.sah); + /* update progress */ + if(task->range.size() < THREAD_TASK_SIZE) { + /*rotate(node, INT_MAX, 5);*/ - if(minSAH == leafSAH && spec.num <= params.max_leaf_size) - return create_leaf_node(spec); + thread_scoped_lock lock(build_mutex); - /* perform split. */ - NodeSpec left, right; + progress_count += task->range.size(); + progress_update(); + } +} + +/* multithreaded binning builder */ +BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level) +{ + size_t size = range.size(); + float leafSAH = params.sah_triangle_cost * range.leafSAH; + float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_triangle_cost * range.splitSAH; - if(params.use_spatial_split && minSAH == spatial.sah) - do_spatial_split(left, right, spec, spatial); - if(!left.num || !right.num) - do_object_split(left, right, spec, object); + /* make leaf node when threshold reached or SAH tells us */ + if(params.small_enough_for_leaf(size, level) || (size <= params.max_leaf_size && leafSAH < splitSAH)) + return create_leaf_node(range); + + /* perform split */ + BVHObjectBinning left, right; + range.split(&references[0], left, right); /* create inner node. */ - progress_num_duplicates += left.num + right.num - spec.num; + InnerNode *inner; - float progress_mid = lerp(progress_start, progress_end, (float)right.num / (float)(left.num + right.num)); + if(range.size() < THREAD_TASK_SIZE) { + /* local build */ + BVHNode *leftnode = build_node(left, level + 1); + BVHNode *rightnode = build_node(right, level + 1); - BVHNode* rightNode = build_node(right, level + 1, progress_start, progress_mid); - if(progress.get_cancel()) { - if(rightNode) rightNode->deleteSubtree(); - return NULL; + inner = new InnerNode(range.bounds(), leftnode, rightnode); } + else { + /* threaded build */ + inner = new InnerNode(range.bounds()); + + task_pool.push(new BVHBuildTask(inner, 0, left, level + 1), true); + task_pool.push(new BVHBuildTask(inner, 1, right, level + 1), true); + } + + return inner; +} - BVHNode* leftNode = build_node(left, level + 1, progress_mid, progress_end); - if(progress.get_cancel()) { - if(leftNode) leftNode->deleteSubtree(); +/* single threaded spatial split builder */ +BVHNode* BVHBuild::build_node(const BVHRange& range, int level) +{ + /* progress update */ + progress_update(); + if(progress.get_cancel()) return NULL; + + /* small enough or too deep => create leaf. */ + if(params.small_enough_for_leaf(range.size(), level)) { + progress_count += range.size(); + return create_leaf_node(range); + } + + /* splitting test */ + BVHMixedSplit split(this, range, level); + + if(split.no_split) { + progress_count += range.size(); + return create_leaf_node(range); } + + /* do split */ + BVHRange left, right; + split.split(this, left, right, range); + + progress_total += left.size() + right.size() - range.size(); + size_t total = progress_total; + + /* leaft node */ + BVHNode *leftnode = build_node(left, level + 1); + + /* right node (modify start for splits) */ + right.set_start(right.start() + progress_total - total); + BVHNode *rightnode = build_node(right, level + 1); - return new InnerNode(spec.bounds, leftNode, rightNode); + /* inner node */ + return new InnerNode(range.bounds(), leftnode, rightnode); } -BVHNode *BVHBuild::create_object_leaf_nodes(const Reference *ref, int num) +/* Create Nodes */ + +BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num) { if(num == 0) { - BoundBox bounds; + BoundBox bounds = BoundBox::empty; return new LeafNode(bounds, 0, 0, 0); } else if(num == 1) { - prim_index.push_back(ref[0].prim_index); - prim_object.push_back(ref[0].prim_object); - uint visibility = objects[ref[0].prim_object]->visibility; - return new LeafNode(ref[0].bounds, visibility, prim_index.size()-1, prim_index.size()); + if(start == prim_index.size()) { + assert(params.use_spatial_split); + + prim_index.push_back(ref->prim_index()); + prim_object.push_back(ref->prim_object()); + } + else { + prim_index[start] = ref->prim_index(); + prim_object[start] = ref->prim_object(); + } + + uint visibility = objects[ref->prim_object()]->visibility; + return new LeafNode(ref->bounds(), visibility, start, start+1); } else { int mid = num/2; - BVHNode *leaf0 = create_object_leaf_nodes(ref, mid); - BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, num-mid); + BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid); + BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid); - BoundBox bounds; + BoundBox bounds = BoundBox::empty; bounds.grow(leaf0->m_bounds); bounds.grow(leaf1->m_bounds); @@ -241,310 +345,136 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const Reference *ref, int num) } } -BVHNode* BVHBuild::create_leaf_node(const NodeSpec& spec) +BVHNode* BVHBuild::create_leaf_node(const BVHRange& range) { vector<int>& p_index = prim_index; vector<int>& p_object = prim_object; - BoundBox bounds; - int num = 0; + BoundBox bounds = BoundBox::empty; + int num = 0, ob_num = 0; uint visibility = 0; - for(int i = 0; i < spec.num; i++) { - if(references.back().prim_index != -1) { - p_index.push_back(references.back().prim_index); - p_object.push_back(references.back().prim_object); - bounds.grow(references.back().bounds); - visibility |= objects[references.back().prim_object]->visibility; - references.pop_back(); + for(int i = 0; i < range.size(); i++) { + BVHReference& ref = references[range.start() + i]; + + if(ref.prim_index() != -1) { + if(range.start() + num == prim_index.size()) { + assert(params.use_spatial_split); + + p_index.push_back(ref.prim_index()); + p_object.push_back(ref.prim_object()); + } + else { + p_index[range.start() + num] = ref.prim_index(); + p_object[range.start() + num] = ref.prim_object(); + } + + bounds.grow(ref.bounds()); + visibility |= objects[ref.prim_object()]->visibility; num++; } + else { + if(ob_num < i) + references[range.start() + ob_num] = ref; + ob_num++; + } } BVHNode *leaf = NULL; if(num > 0) { - leaf = new LeafNode(bounds, visibility, p_index.size() - num, p_index.size()); + leaf = new LeafNode(bounds, visibility, range.start(), range.start() + num); - if(num == spec.num) + if(num == range.size()) return leaf; } /* while there may be multiple triangles in a leaf, for object primitives - * we want them to be the only one, so we */ - int ob_num = spec.num - num; - const Reference *ref = (ob_num)? &references.back() - (ob_num - 1): NULL; - BVHNode *oleaf = create_object_leaf_nodes(ref, ob_num); - for(int i = 0; i < ob_num; i++) - references.pop_back(); + * we want there to be the only one, so we keep splitting */ + const BVHReference *ref = (ob_num)? &references[range.start()]: NULL; + BVHNode *oleaf = create_object_leaf_nodes(ref, range.start() + num, ob_num); if(leaf) - return new InnerNode(spec.bounds, leaf, oleaf); + return new InnerNode(range.bounds(), leaf, oleaf); else return oleaf; } -/* Object Split */ +/* Tree Rotations */ -BVHBuild::ObjectSplit BVHBuild::find_object_split(const NodeSpec& spec, float nodeSAH) +void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations) { - ObjectSplit split; - const Reference *ref_ptr = &references[references.size() - spec.num]; - - for(int dim = 0; dim < 3; dim++) { - /* sort references */ - bvh_reference_sort(references.size() - spec.num, references.size(), &references[0], dim); - - /* sweep right to left and determine bounds. */ - BoundBox right_bounds; - - for(int i = spec.num - 1; i > 0; i--) { - right_bounds.grow(ref_ptr[i].bounds); - spatial_right_bounds[i - 1] = right_bounds; - } - - /* sweep left to right and select lowest SAH. */ - BoundBox left_bounds; - - for(int i = 1; i < spec.num; i++) { - left_bounds.grow(ref_ptr[i - 1].bounds); - right_bounds = spatial_right_bounds[i - 1]; - - float sah = nodeSAH + - left_bounds.area() * params.triangle_cost(i) + - right_bounds.area() * params.triangle_cost(spec.num - i); - - if(sah < split.sah) { - split.sah = sah; - split.dim = dim; - split.num_left = i; - split.left_bounds = left_bounds; - split.right_bounds = right_bounds; - } - } - } - - return split; + /* in tested scenes, this resulted in slightly slower raytracing, so disabled + * it for now. could be implementation bug, or depend on the scene */ + if(node) + for(int i = 0; i < iterations; i++) + rotate(node, max_depth); } -void BVHBuild::do_object_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const ObjectSplit& split) +void BVHBuild::rotate(BVHNode *node, int max_depth) { - /* sort references according to split */ - int start = references.size() - spec.num; - int end = references.size(); /* todo: is this right? */ - - bvh_reference_sort(start, end, &references[0], split.dim); - - /* split node specs */ - left.num = split.num_left; - left.bounds = split.left_bounds; - right.num = spec.num - split.num_left; - right.bounds = split.right_bounds; -} - -/* Spatial Split */ - -BVHBuild::SpatialSplit BVHBuild::find_spatial_split(const NodeSpec& spec, float nodeSAH) -{ - /* initialize bins. */ - float3 origin = spec.bounds.min; - float3 binSize = (spec.bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS); - float3 invBinSize = 1.0f / binSize; - - for(int dim = 0; dim < 3; dim++) { - for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) { - SpatialBin& bin = spatial_bins[dim][i]; - - bin.bounds = BoundBox(); - bin.enter = 0; - bin.exit = 0; - } - } - - /* chop references into bins. */ - for(unsigned int refIdx = references.size() - spec.num; refIdx < references.size(); refIdx++) { - const Reference& ref = references[refIdx]; - float3 firstBinf = (ref.bounds.min - origin) * invBinSize; - float3 lastBinf = (ref.bounds.max - origin) * invBinSize; - int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z); - int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z); + /* nothing to rotate if we reached a leaf node. */ + if(node->is_leaf() || max_depth < 0) + return; + + InnerNode *parent = (InnerNode*)node; - firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1); - lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1); + /* rotate all children first */ + for(size_t c = 0; c < 2; c++) + rotate(parent->children[c], max_depth-1); - for(int dim = 0; dim < 3; dim++) { - Reference currRef = ref; + /* compute current area of all children */ + BoundBox bounds0 = parent->children[0]->m_bounds; + BoundBox bounds1 = parent->children[1]->m_bounds; - for(int i = firstBin[dim]; i < lastBin[dim]; i++) { - Reference leftRef, rightRef; + float area0 = bounds0.half_area(); + float area1 = bounds1.half_area(); + float4 child_area = make_float4(area0, area1, 0.0f, 0.0f); - split_reference(leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1)); - spatial_bins[dim][i].bounds.grow(leftRef.bounds); - currRef = rightRef; - } + /* find best rotation. we pick a target child of a first child, and swap + * this with an other child. we perform the best such swap. */ + float best_cost = FLT_MAX; + int best_child = -1, bets_target = -1, best_other = -1; - spatial_bins[dim][lastBin[dim]].bounds.grow(currRef.bounds); - spatial_bins[dim][firstBin[dim]].enter++; - spatial_bins[dim][lastBin[dim]].exit++; - } - } + for(size_t c = 0; c < 2; c++) { + /* ignore leaf nodes as we cannot descent into */ + if(parent->children[c]->is_leaf()) + continue; - /* select best split plane. */ - SpatialSplit split; + InnerNode *child = (InnerNode*)parent->children[c]; + BoundBox& other = (c == 0)? bounds1: bounds0; - for(int dim = 0; dim < 3; dim++) { - /* sweep right to left and determine bounds. */ - BoundBox right_bounds; + /* transpose child bounds */ + BoundBox target0 = child->children[0]->m_bounds; + BoundBox target1 = child->children[1]->m_bounds; - for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) { - right_bounds.grow(spatial_bins[dim][i].bounds); - spatial_right_bounds[i - 1] = right_bounds; - } + /* compute cost for both possible swaps */ + float cost0 = merge(other, target1).half_area() - child_area[c]; + float cost1 = merge(target0, other).half_area() - child_area[c]; - /* sweep left to right and select lowest SAH. */ - BoundBox left_bounds; - int leftNum = 0; - int rightNum = spec.num; + if(min(cost0,cost1) < best_cost) { + best_child = (int)c; + best_other = (int)(1-c); - for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) { - left_bounds.grow(spatial_bins[dim][i - 1].bounds); - leftNum += spatial_bins[dim][i - 1].enter; - rightNum -= spatial_bins[dim][i - 1].exit; - - float sah = nodeSAH + - left_bounds.area() * params.triangle_cost(leftNum) + - spatial_right_bounds[i - 1].area() * params.triangle_cost(rightNum); - - if(sah < split.sah) { - split.sah = sah; - split.dim = dim; - split.pos = origin[dim] + binSize[dim] * (float)i; + if(cost0 < cost1) { + best_cost = cost0; + bets_target = 0; + } + else { + best_cost = cost0; + bets_target = 1; } } } - return split; -} - -void BVHBuild::do_spatial_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const SpatialSplit& split) -{ - /* Categorize references and compute bounds. - * - * Left-hand side: [left_start, left_end[ - * Uncategorized/split: [left_end, right_start[ - * Right-hand side: [right_start, refs.size()[ */ - - vector<Reference>& refs = references; - int left_start = refs.size() - spec.num; - int left_end = left_start; - int right_start = refs.size(); - - left.bounds = right.bounds = BoundBox(); - - for(int i = left_end; i < right_start; i++) { - if(refs[i].bounds.max[split.dim] <= split.pos) { - /* entirely on the left-hand side */ - left.bounds.grow(refs[i].bounds); - swap(refs[i], refs[left_end++]); - } - else if(refs[i].bounds.min[split.dim] >= split.pos) { - /* entirely on the right-hand side */ - right.bounds.grow(refs[i].bounds); - swap(refs[i--], refs[--right_start]); - } - } - - /* duplicate or unsplit references intersecting both sides. */ - while(left_end < right_start) { - /* split reference. */ - Reference lref, rref; - - split_reference(lref, rref, refs[left_end], split.dim, split.pos); - - /* compute SAH for duplicate/unsplit candidates. */ - BoundBox lub = left.bounds; // Unsplit to left: new left-hand bounds. - BoundBox rub = right.bounds; // Unsplit to right: new right-hand bounds. - BoundBox ldb = left.bounds; // Duplicate: new left-hand bounds. - BoundBox rdb = right.bounds; // Duplicate: new right-hand bounds. - - lub.grow(refs[left_end].bounds); - rub.grow(refs[left_end].bounds); - ldb.grow(lref.bounds); - rdb.grow(rref.bounds); - - float lac = params.triangle_cost(left_end - left_start); - float rac = params.triangle_cost(refs.size() - right_start); - float lbc = params.triangle_cost(left_end - left_start + 1); - float rbc = params.triangle_cost(refs.size() - right_start + 1); - - float unsplitLeftSAH = lub.area() * lbc + right.bounds.area() * rac; - float unsplitRightSAH = left.bounds.area() * lac + rub.area() * rbc; - float duplicateSAH = ldb.area() * lbc + rdb.area() * rbc; - float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH); - - if(minSAH == unsplitLeftSAH) { - /* unsplit to left */ - left.bounds = lub; - left_end++; - } - else if(minSAH == unsplitRightSAH) { - /* unsplit to right */ - right.bounds = rub; - swap(refs[left_end], refs[--right_start]); - } - else { - /* duplicate */ - left.bounds = ldb; - right.bounds = rdb; - refs[left_end++] = lref; - refs.push_back(rref); - } - } - - left.num = left_end - left_start; - right.num = refs.size() - right_start; -} + /* if we did not find a swap that improves the SAH then do nothing */ + if(best_cost >= 0) + return; -void BVHBuild::split_reference(Reference& left, Reference& right, const Reference& ref, int dim, float pos) -{ - /* initialize references. */ - left.prim_index = right.prim_index = ref.prim_index; - left.prim_object = right.prim_object = ref.prim_object; - left.bounds = right.bounds = BoundBox(); - - /* loop over vertices/edges. */ - Object *ob = objects[ref.prim_object]; - const Mesh *mesh = ob->mesh; - const int *inds = mesh->triangles[ref.prim_index].v; - const float3 *verts = &mesh->verts[0]; - const float3* v1 = &verts[inds[2]]; - - for(int i = 0; i < 3; i++) { - const float3* v0 = v1; - int vindex = inds[i]; - v1 = &verts[vindex]; - float v0p = (*v0)[dim]; - float v1p = (*v1)[dim]; - - /* insert vertex to the boxes it belongs to. */ - if(v0p <= pos) - left.bounds.grow(*v0); - - if(v0p >= pos) - right.bounds.grow(*v0); - - /* edge intersects the plane => insert intersection to both boxes. */ - if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { - float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); - left.bounds.grow(t); - right.bounds.grow(t); - } - } + /* perform the best found tree rotation */ + InnerNode *child = (InnerNode*)parent->children[best_child]; - /* intersect with original bounds. */ - left.bounds.max[dim] = pos; - right.bounds.min[dim] = pos; - left.bounds.intersect(ref.bounds); - right.bounds.intersect(ref.bounds); + swap(parent->children[best_other], child->children[bets_target]); + child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds); } CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h index 1fa1951d7f2..84e14632b4b 100644 --- a/intern/cycles/bvh/bvh_build.h +++ b/intern/cycles/bvh/bvh_build.h @@ -21,8 +21,10 @@ #include <float.h> #include "bvh.h" +#include "bvh_binning.h" #include "util_boundbox.h" +#include "util_task.h" #include "util_vector.h" CCL_NAMESPACE_BEGIN @@ -37,28 +39,7 @@ class Progress; class BVHBuild { public: - struct Reference - { - int prim_index; - int prim_object; - BoundBox bounds; - - Reference() - { - } - }; - - struct NodeSpec - { - int num; - BoundBox bounds; - - NodeSpec() - { - num = 0; - } - }; - + /* Constructor/Destructor */ BVHBuild( const vector<Object*>& objects, vector<int>& prim_index, @@ -70,63 +51,37 @@ public: BVHNode *run(); protected: + friend class BVHMixedSplit; + friend class BVHObjectSplit; + friend class BVHSpatialSplit; + /* adding references */ - void add_reference_mesh(NodeSpec& root, Mesh *mesh, int i); - void add_reference_object(NodeSpec& root, Object *ob, int i); - void add_references(NodeSpec& root); + void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i); + void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i); + void add_references(BVHRange& root); /* building */ - BVHNode *build_node(const NodeSpec& spec, int level, float progress_start, float progress_end); - BVHNode *create_leaf_node(const NodeSpec& spec); - BVHNode *create_object_leaf_nodes(const Reference *ref, int num); - - void progress_update(float progress_start, float progress_end); - - /* object splits */ - struct ObjectSplit - { - float sah; - int dim; - int num_left; - BoundBox left_bounds; - BoundBox right_bounds; - - ObjectSplit() - : sah(FLT_MAX), dim(0), num_left(0) - { - } - }; - - ObjectSplit find_object_split(const NodeSpec& spec, float nodeSAH); - void do_object_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const ObjectSplit& split); - - /* spatial splits */ - struct SpatialSplit - { - float sah; - int dim; - float pos; - - SpatialSplit() - : sah(FLT_MAX), dim(0), pos(0.0f) - { - } - }; - - struct SpatialBin - { - BoundBox bounds; - int enter; - int exit; - }; - - SpatialSplit find_spatial_split(const NodeSpec& spec, float nodeSAH); - void do_spatial_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const SpatialSplit& split); - void split_reference(Reference& left, Reference& right, const Reference& ref, int dim, float pos); + BVHNode *build_node(const BVHRange& range, int level); + BVHNode *build_node(const BVHObjectBinning& range, int level); + BVHNode *create_leaf_node(const BVHRange& range); + BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num); + + /* threads */ + enum { THREAD_TASK_SIZE = 4096 }; + void thread_build_node(Task *task_, int thread_id); + thread_mutex build_mutex; + + /* progress */ + void progress_update(); + + /* tree rotations */ + void rotate(BVHNode *node, int max_depth); + void rotate(BVHNode *node, int max_depth, int iterations); /* objects and primitive references */ vector<Object*> objects; - vector<Reference> references; + vector<BVHReference> references; + int num_original_references; /* output primitive indexes and objects */ vector<int>& prim_index; @@ -138,12 +93,17 @@ protected: /* progress reporting */ Progress& progress; double progress_start_time; - int progress_num_duplicates; + size_t progress_count; + size_t progress_total; + size_t progress_original_total; /* spatial splitting */ float spatial_min_overlap; vector<BoundBox> spatial_right_bounds; - SpatialBin spatial_bins[3][BVHParams::NUM_SPATIAL_BINS]; + BVHSpatialBin spatial_bins[3][BVHParams::NUM_SPATIAL_BINS]; + + /* threads */ + TaskPool task_pool; }; CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp index 63683bae4a3..4edfb4b70a4 100644 --- a/intern/cycles/bvh/bvh_node.cpp +++ b/intern/cycles/bvh/bvh_node.cpp @@ -24,6 +24,8 @@ CCL_NAMESPACE_BEGIN +/* BVH Node */ + int BVHNode::getSubtreeSize(BVH_STAT stat) const { int cnt = 0; @@ -59,7 +61,8 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const void BVHNode::deleteSubtree() { for(int i=0;i<num_children();i++) - get_child(i)->deleteSubtree(); + if(get_child(i)) + get_child(i)->deleteSubtree(); delete this; } @@ -70,12 +73,27 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons for(int i=0;i<num_children();i++) { BVHNode *child = get_child(i); - SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.area()/m_bounds.area()); + SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.safe_area()/m_bounds.safe_area()); } return SAH; } +uint BVHNode::update_visibility() +{ + if(!is_leaf() && m_visibility == 0) { + InnerNode *inner = (InnerNode*)this; + BVHNode *child0 = inner->children[0]; + BVHNode *child1 = inner->children[1]; + + m_visibility = child0->update_visibility()|child1->update_visibility(); + } + + return m_visibility; +} + +/* Inner Node */ + void InnerNode::print(int depth) const { for(int i = 0; i < depth; i++) diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h index 5e0a17a1193..5c00f7b7a38 100644 --- a/intern/cycles/bvh/bvh_node.h +++ b/intern/cycles/bvh/bvh_node.h @@ -49,8 +49,6 @@ public: virtual int num_triangles() const { return 0; } virtual void print(int depth = 0) const = 0; - float getArea() const { return m_bounds.area(); } - BoundBox m_bounds; uint m_visibility; @@ -58,6 +56,8 @@ public: int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const; float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const; void deleteSubtree(); + + uint update_visibility(); }; class InnerNode : public BVHNode @@ -66,9 +66,21 @@ public: InnerNode(const BoundBox& bounds, BVHNode* child0, BVHNode* child1) { m_bounds = bounds; - m_visibility = child0->m_visibility|child1->m_visibility; children[0] = child0; children[1] = child1; + + if(child0 && child1) + m_visibility = child0->m_visibility|child1->m_visibility; + else + m_visibility = 0; /* happens on build cancel */ + } + + InnerNode(const BoundBox& bounds) + { + m_bounds = bounds; + m_visibility = 0; + children[0] = NULL; + children[1] = NULL; } bool is_leaf() const { return false; } diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h index 38093438500..0cf5e905fea 100644 --- a/intern/cycles/bvh/bvh_params.h +++ b/intern/cycles/bvh/bvh_params.h @@ -18,6 +18,8 @@ #ifndef __BVH_PARAMS_H__ #define __BVH_PARAMS_H__ +#include "util_boundbox.h" + CCL_NAMESPACE_BEGIN /* BVH Parameters */ @@ -73,14 +75,97 @@ public: } /* SAH costs */ - float cost(int num_nodes, int num_tris) const + __forceinline float cost(int num_nodes, int num_tris) const { return node_cost(num_nodes) + triangle_cost(num_tris); } - float triangle_cost(int n) const + __forceinline float triangle_cost(int n) const { return n*sah_triangle_cost; } - float node_cost(int n) const + __forceinline float node_cost(int n) const { return n*sah_node_cost; } + + __forceinline bool small_enough_for_leaf(int size, int level) + { return (size <= min_leaf_size || level >= MAX_DEPTH); } +}; + +/* BVH Reference + * + * Reference to a primitive. Primitive index and object are sneakily packed + * into BoundBox to reduce memory usage and align nicely */ + +class BVHReference +{ +public: + __forceinline BVHReference() {} + + __forceinline BVHReference(const BoundBox& bounds_, int prim_index, int prim_object) + : rbounds(bounds_) + { + rbounds.min.w = __int_as_float(prim_index); + rbounds.max.w = __int_as_float(prim_object); + } + + __forceinline const BoundBox& bounds() const { return rbounds; } + __forceinline int prim_index() const { return __float_as_int(rbounds.min.w); } + __forceinline int prim_object() const { return __float_as_int(rbounds.max.w); } + +protected: + BoundBox rbounds; +}; + +/* BVH Range + * + * Build range used during construction, to indicate the bounds and place in + * the reference array of a subset of pirmitives Again uses trickery to pack + * integers into BoundBox for alignment purposes. */ + +class BVHRange +{ +public: + __forceinline BVHRange() + { + rbounds.min.w = __int_as_float(0); + rbounds.max.w = __int_as_float(0); + } + + __forceinline BVHRange(const BoundBox& bounds_, int start_, int size_) + : rbounds(bounds_) + { + rbounds.min.w = __int_as_float(start_); + rbounds.max.w = __int_as_float(size_); + } + + __forceinline BVHRange(const BoundBox& bounds_, const BoundBox& cbounds_, int start_, int size_) + : rbounds(bounds_), cbounds(cbounds_) + { + rbounds.min.w = __int_as_float(start_); + rbounds.max.w = __int_as_float(size_); + } + + __forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); } + + __forceinline const BoundBox& bounds() const { return rbounds; } + __forceinline const BoundBox& cent_bounds() const { return cbounds; } + __forceinline int start() const { return __float_as_int(rbounds.min.w); } + __forceinline int size() const { return __float_as_int(rbounds.max.w); } + __forceinline int end() const { return start() + size(); } + +protected: + BoundBox rbounds; + BoundBox cbounds; +}; + +/* BVH Spatial Bin */ + +struct BVHSpatialBin +{ + BoundBox bounds; + int enter; + int exit; + + __forceinline BVHSpatialBin() + { + } }; CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp index ee4531a4843..bef384be592 100644 --- a/intern/cycles/bvh/bvh_sort.cpp +++ b/intern/cycles/bvh/bvh_sort.cpp @@ -32,23 +32,23 @@ public: dim = dim_; } - bool operator()(const BVHBuild::Reference& ra, const BVHBuild::Reference& rb) + bool operator()(const BVHReference& ra, const BVHReference& rb) { - float ca = ra.bounds.min[dim] + ra.bounds.max[dim]; - float cb = rb.bounds.min[dim] + rb.bounds.max[dim]; + float ca = ra.bounds().min[dim] + ra.bounds().max[dim]; + float cb = rb.bounds().min[dim] + rb.bounds().max[dim]; if(ca < cb) return true; else if(ca > cb) return false; - else if(ra.prim_object < rb.prim_object) return true; - else if(ra.prim_object > rb.prim_object) return false; - else if(ra.prim_index < rb.prim_index) return true; - else if(ra.prim_index > rb.prim_index) return false; + else if(ra.prim_object() < rb.prim_object()) return true; + else if(ra.prim_object() > rb.prim_object()) return false; + else if(ra.prim_index() < rb.prim_index()) return true; + else if(ra.prim_index() > rb.prim_index()) return false; return false; } }; -void bvh_reference_sort(int start, int end, BVHBuild::Reference *data, int dim) +void bvh_reference_sort(int start, int end, BVHReference *data, int dim) { sort(data+start, data+end, BVHReferenceCompare(dim)); } diff --git a/intern/cycles/bvh/bvh_sort.h b/intern/cycles/bvh/bvh_sort.h index f0676948146..ba35ba3fae7 100644 --- a/intern/cycles/bvh/bvh_sort.h +++ b/intern/cycles/bvh/bvh_sort.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN -void bvh_reference_sort(int start, int end, BVHBuild::Reference *data, int dim); +void bvh_reference_sort(int start, int end, BVHReference *data, int dim); CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp new file mode 100644 index 00000000000..263c5834428 --- /dev/null +++ b/intern/cycles/bvh/bvh_split.cpp @@ -0,0 +1,293 @@ +/* + * Adapted from code copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bvh_build.h" +#include "bvh_split.h" +#include "bvh_sort.h" + +#include "mesh.h" +#include "object.h" + +#include "util_algorithm.h" + +CCL_NAMESPACE_BEGIN + +/* Object Split */ + +BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH) +: sah(FLT_MAX), dim(0), num_left(0), left_bounds(BoundBox::empty), right_bounds(BoundBox::empty) +{ + const BVHReference *ref_ptr = &builder->references[range.start()]; + float min_sah = FLT_MAX; + + for(int dim = 0; dim < 3; dim++) { + /* sort references */ + bvh_reference_sort(range.start(), range.end(), &builder->references[0], dim); + + /* sweep right to left and determine bounds. */ + BoundBox right_bounds = BoundBox::empty; + + for(int i = range.size() - 1; i > 0; i--) { + right_bounds.grow(ref_ptr[i].bounds()); + builder->spatial_right_bounds[i - 1] = right_bounds; + } + + /* sweep left to right and select lowest SAH. */ + BoundBox left_bounds = BoundBox::empty; + + for(int i = 1; i < range.size(); i++) { + left_bounds.grow(ref_ptr[i - 1].bounds()); + right_bounds = builder->spatial_right_bounds[i - 1]; + + float sah = nodeSAH + + left_bounds.safe_area() * builder->params.triangle_cost(i) + + right_bounds.safe_area() * builder->params.triangle_cost(range.size() - i); + + if(sah < min_sah) { + min_sah = sah; + + this->sah = sah; + this->dim = dim; + this->num_left = i; + this->left_bounds = left_bounds; + this->right_bounds = right_bounds; + } + } + } +} + +void BVHObjectSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) +{ + /* sort references according to split */ + bvh_reference_sort(range.start(), range.end(), &builder->references[0], this->dim); + + /* split node ranges */ + left = BVHRange(this->left_bounds, range.start(), this->num_left); + right = BVHRange(this->right_bounds, left.end(), range.size() - this->num_left); + +} + +/* Spatial Split */ + +BVHSpatialSplit::BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH) +: sah(FLT_MAX), dim(0), pos(0.0f) +{ + /* initialize bins. */ + float3 origin = range.bounds().min; + float3 binSize = (range.bounds().max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS); + float3 invBinSize = 1.0f / binSize; + + for(int dim = 0; dim < 3; dim++) { + for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) { + BVHSpatialBin& bin = builder->spatial_bins[dim][i]; + + bin.bounds = BoundBox::empty; + bin.enter = 0; + bin.exit = 0; + } + } + + /* chop references into bins. */ + for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) { + const BVHReference& ref = builder->references[refIdx]; + float3 firstBinf = (ref.bounds().min - origin) * invBinSize; + float3 lastBinf = (ref.bounds().max - origin) * invBinSize; + int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z); + int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z); + + firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1); + lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1); + + for(int dim = 0; dim < 3; dim++) { + BVHReference currRef = ref; + + for(int i = firstBin[dim]; i < lastBin[dim]; i++) { + BVHReference leftRef, rightRef; + + split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1)); + builder->spatial_bins[dim][i].bounds.grow(leftRef.bounds()); + currRef = rightRef; + } + + builder->spatial_bins[dim][lastBin[dim]].bounds.grow(currRef.bounds()); + builder->spatial_bins[dim][firstBin[dim]].enter++; + builder->spatial_bins[dim][lastBin[dim]].exit++; + } + } + + /* select best split plane. */ + for(int dim = 0; dim < 3; dim++) { + /* sweep right to left and determine bounds. */ + BoundBox right_bounds = BoundBox::empty; + + for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) { + right_bounds.grow(builder->spatial_bins[dim][i].bounds); + builder->spatial_right_bounds[i - 1] = right_bounds; + } + + /* sweep left to right and select lowest SAH. */ + BoundBox left_bounds = BoundBox::empty; + int leftNum = 0; + int rightNum = range.size(); + + for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) { + left_bounds.grow(builder->spatial_bins[dim][i - 1].bounds); + leftNum += builder->spatial_bins[dim][i - 1].enter; + rightNum -= builder->spatial_bins[dim][i - 1].exit; + + float sah = nodeSAH + + left_bounds.safe_area() * builder->params.triangle_cost(leftNum) + + builder->spatial_right_bounds[i - 1].safe_area() * builder->params.triangle_cost(rightNum); + + if(sah < this->sah) { + this->sah = sah; + this->dim = dim; + this->pos = origin[dim] + binSize[dim] * (float)i; + } + } + } +} + +void BVHSpatialSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) +{ + /* Categorize references and compute bounds. + * + * Left-hand side: [left_start, left_end[ + * Uncategorized/split: [left_end, right_start[ + * Right-hand side: [right_start, refs.size()[ */ + + vector<BVHReference>& refs = builder->references; + int left_start = range.start(); + int left_end = left_start; + int right_start = range.end(); + int right_end = range.end(); + BoundBox left_bounds = BoundBox::empty; + BoundBox right_bounds = BoundBox::empty; + + for(int i = left_end; i < right_start; i++) { + if(refs[i].bounds().max[this->dim] <= this->pos) { + /* entirely on the left-hand side */ + left_bounds.grow(refs[i].bounds()); + swap(refs[i], refs[left_end++]); + } + else if(refs[i].bounds().min[this->dim] >= this->pos) { + /* entirely on the right-hand side */ + right_bounds.grow(refs[i].bounds()); + swap(refs[i--], refs[--right_start]); + } + } + + /* duplicate or unsplit references intersecting both sides. */ + while(left_end < right_start) { + /* split reference. */ + BVHReference lref, rref; + + split_reference(builder, lref, rref, refs[left_end], this->dim, this->pos); + + /* compute SAH for duplicate/unsplit candidates. */ + BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds. + BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds. + BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds. + BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds. + + lub.grow(refs[left_end].bounds()); + rub.grow(refs[left_end].bounds()); + ldb.grow(lref.bounds()); + rdb.grow(rref.bounds()); + + float lac = builder->params.triangle_cost(left_end - left_start); + float rac = builder->params.triangle_cost(right_end - right_start); + float lbc = builder->params.triangle_cost(left_end - left_start + 1); + float rbc = builder->params.triangle_cost(right_end - right_start + 1); + + float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac; + float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc; + float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc; + float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH); + + if(minSAH == unsplitLeftSAH) { + /* unsplit to left */ + left_bounds = lub; + left_end++; + } + else if(minSAH == unsplitRightSAH) { + /* unsplit to right */ + right_bounds = rub; + swap(refs[left_end], refs[--right_start]); + } + else { + /* duplicate */ + left_bounds = ldb; + right_bounds = rdb; + refs[left_end++] = lref; + refs.insert(refs.begin() + right_end, rref); + right_end++; + } + } + + left = BVHRange(left_bounds, left_start, left_end - left_start); + right = BVHRange(right_bounds, right_start, right_end - right_start); +} + +void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVHReference& right, const BVHReference& ref, int dim, float pos) +{ + /* initialize boundboxes */ + BoundBox left_bounds = BoundBox::empty; + BoundBox right_bounds = BoundBox::empty; + + /* loop over vertices/edges. */ + Object *ob = builder->objects[ref.prim_object()]; + const Mesh *mesh = ob->mesh; + const int *inds = mesh->triangles[ref.prim_index()].v; + const float3 *verts = &mesh->verts[0]; + const float3* v1 = &verts[inds[2]]; + + for(int i = 0; i < 3; i++) { + const float3* v0 = v1; + int vindex = inds[i]; + v1 = &verts[vindex]; + float v0p = (*v0)[dim]; + float v1p = (*v1)[dim]; + + /* insert vertex to the boxes it belongs to. */ + if(v0p <= pos) + left_bounds.grow(*v0); + + if(v0p >= pos) + right_bounds.grow(*v0); + + /* edge intersects the plane => insert intersection to both boxes. */ + if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { + float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); + left_bounds.grow(t); + right_bounds.grow(t); + } + } + + /* intersect with original bounds. */ + left_bounds.max[dim] = pos; + right_bounds.min[dim] = pos; + left_bounds.intersect(ref.bounds()); + right_bounds.intersect(ref.bounds()); + + /* set referecnes */ + left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object()); + right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object()); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h new file mode 100644 index 00000000000..1f4befbe8e2 --- /dev/null +++ b/intern/cycles/bvh/bvh_split.h @@ -0,0 +1,110 @@ +/* + * Adapted from code copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BVH_SPLIT_H__ +#define __BVH_SPLIT_H__ + +#include "bvh_build.h" +#include "bvh_params.h" + +CCL_NAMESPACE_BEGIN + +class BVHBuild; + +/* Object Split */ + +class BVHObjectSplit +{ +public: + float sah; + int dim; + int num_left; + BoundBox left_bounds; + BoundBox right_bounds; + + BVHObjectSplit() {} + BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH); + + void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range); +}; + +/* Spatial Split */ + +class BVHSpatialSplit +{ +public: + float sah; + int dim; + float pos; + + BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f) {} + BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH); + + void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range); + void split_reference(BVHBuild *builder, BVHReference& left, BVHReference& right, const BVHReference& ref, int dim, float pos); +}; + +/* Mixed Object-Spatial Split */ + +class BVHMixedSplit +{ +public: + BVHObjectSplit object; + BVHSpatialSplit spatial; + + float leafSAH; + float nodeSAH; + float minSAH; + + bool no_split; + + __forceinline BVHMixedSplit(BVHBuild *builder, const BVHRange& range, int level) + { + /* find split candidates. */ + float area = range.bounds().safe_area(); + + leafSAH = area * builder->params.triangle_cost(range.size()); + nodeSAH = area * builder->params.node_cost(2); + + object = BVHObjectSplit(builder, range, nodeSAH); + + if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) { + BoundBox overlap = object.left_bounds; + overlap.intersect(object.right_bounds); + + if(overlap.safe_area() >= builder->spatial_min_overlap) + spatial = BVHSpatialSplit(builder, range, nodeSAH); + } + + /* leaf SAH is the lowest => create leaf. */ + minSAH = min(min(leafSAH, object.sah), spatial.sah); + no_split = (minSAH == leafSAH && range.size() <= builder->params.max_leaf_size); + } + + __forceinline void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) + { + if(builder->params.use_spatial_split && minSAH == spatial.sah) + spatial.split(builder, left, right, range); + if(!left.size() || !right.size()) + object.split(builder, left, right, range); + } +}; + +CCL_NAMESPACE_END + +#endif /* __BVH_SPLIT_H__ */ + diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index cceec8b8e5c..42dda1180c7 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -58,15 +58,6 @@ void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size) split(tasks, num); } -void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num) -{ - list<DeviceTask> tasks; - split(tasks, num); - - foreach(DeviceTask& task, tasks) - queue.push(task); -} - void DeviceTask::split(list<DeviceTask>& tasks, int num) { if(type == SHADER) { diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index af2567498d9..87f255e54e7 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -25,6 +25,7 @@ #include "util_list.h" #include "util_string.h" +#include "util_task.h" #include "util_thread.h" #include "util_types.h" #include "util_vector.h" @@ -66,7 +67,7 @@ public: /* Device Task */ -class DeviceTask { +class DeviceTask : public Task { public: typedef enum { PATH_TRACE, TONEMAP, SHADER } Type; Type type; @@ -87,7 +88,6 @@ public: DeviceTask(Type type = PATH_TRACE); void split(list<DeviceTask>& tasks, int num); - void split(ThreadQueue<DeviceTask>& tasks, int num); void split_max_size(list<DeviceTask>& tasks, int max_size); }; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index da977ed8472..ec84047c44f 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -40,35 +40,21 @@ CCL_NAMESPACE_BEGIN class CPUDevice : public Device { public: - vector<thread*> threads; - ThreadQueue<DeviceTask> tasks; + TaskPool task_pool; KernelGlobals *kg; CPUDevice(int threads_num) + : task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2)) { kg = kernel_globals_create(); /* do now to avoid thread issues */ system_cpu_support_optimized(); - - if(threads_num == 0) - threads_num = system_cpu_thread_count(); - - threads.resize(threads_num); - - for(size_t i = 0; i < threads.size(); i++) - threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i)); } ~CPUDevice() { - tasks.stop(); - - foreach(thread *t, threads) { - t->join(); - delete t; - } - + task_pool.stop(); kernel_globals_free(kg); } @@ -127,25 +113,21 @@ public: #endif } - void thread_run(int t) + void thread_run(Task *task_, int thread_id) { - DeviceTask task; - - while(tasks.worker_wait_pop(task)) { - if(task.type == DeviceTask::PATH_TRACE) - thread_path_trace(task); - else if(task.type == DeviceTask::TONEMAP) - thread_tonemap(task); - else if(task.type == DeviceTask::SHADER) - thread_shader(task); - - tasks.worker_done(); - } + DeviceTask *task = (DeviceTask*)task_; + + if(task->type == DeviceTask::PATH_TRACE) + thread_path_trace(*task); + else if(task->type == DeviceTask::TONEMAP) + thread_tonemap(*task); + else if(task->type == DeviceTask::SHADER) + thread_shader(*task); } void thread_path_trace(DeviceTask& task) { - if(tasks.worker_cancel()) + if(task_pool.cancelled()) return; #ifdef WITH_OSL @@ -160,7 +142,7 @@ public: kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y, task.offset, task.stride); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -172,7 +154,7 @@ public: kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y, task.offset, task.stride); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -214,7 +196,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -224,7 +206,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -239,17 +221,22 @@ public: { /* split task into smaller ones, more than number of threads for uneven workloads where some parts of the image render slower than others */ - task.split(tasks, threads.size()*10); + list<DeviceTask> tasks; + + task.split(tasks, TaskScheduler::num_threads()*10); + + foreach(DeviceTask& task, tasks) + task_pool.push(new DeviceTask(task)); } void task_wait() { - tasks.wait_done(); + task_pool.wait(); } void task_cancel() { - tasks.cancel(); + task_pool.cancel(); } }; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 1f69f2c53fa..9f7d65e640b 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -257,13 +257,14 @@ public: void task_add(DeviceTask& task) { - ThreadQueue<DeviceTask> tasks; + list<DeviceTask> tasks; task.split(tasks, devices.size()); foreach(SubDevice& sub, devices) { - DeviceTask subtask; + if(!tasks.empty()) { + DeviceTask subtask = tasks.front(); + tasks.pop_front(); - if(tasks.worker_wait_pop(subtask)) { if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer]; if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state]; if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba]; diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 9a52531eec0..6c3ade1c531 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -266,7 +266,7 @@ __device_inline void path_radiance_accum_background(PathRadiance *L, float3 thro #endif } -__device_inline float3 path_radiance_sum(PathRadiance *L) +__device_inline float3 path_radiance_sum(KernelGlobals *kg, PathRadiance *L) { #ifdef __PASSES__ if(L->use_light_pass) { @@ -283,9 +283,14 @@ __device_inline float3 path_radiance_sum(PathRadiance *L) L->indirect_glossy *= L->indirect; L->indirect_transmission *= L->indirect; - return L->emission + L->background + float3 L_sum = L->emission + L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission; + + if(!kernel_data.background.transparent) + L_sum += L->background; + + return L_sum; } else return L->emission; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index ff12e85375c..8ebac177277 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -223,6 +223,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R path_radiance_init(&L, kernel_data.film.use_light_pass); + float min_ray_pdf = FLT_MAX; float ray_pdf = 0.0f; PathState state; int rng_offset = PRNG_BASE_NUM; @@ -239,13 +240,17 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R /* eval background shader if nothing hit */ if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) { L_transparent += average(throughput); + +#ifdef __PASSES__ + if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) +#endif + break; } + #ifdef __BACKGROUND__ - else { - /* sample background shader */ - float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf); - path_radiance_accum_background(&L, throughput, L_background, state.bounce); - } + /* sample background shader */ + float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf); + path_radiance_accum_background(&L, throughput, L_background, state.bounce); #endif break; @@ -259,6 +264,18 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput); + /* blurring of bsdf after bounces, for rays that have a small likelihood + of following this particular path (diffuse, rough glossy) */ + if(kernel_data.integrator.filter_glossy != FLT_MAX) { + float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf; + + if(blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f; + shader_bsdf_blur(kg, &sd, blur_roughness); + } + } + + /* holdout */ #ifdef __HOLDOUT__ if((sd.flag & SD_HOLDOUT) && (state.flag & PATH_RAY_CAMERA)) { float3 holdout_weight = shader_holdout_eval(kg, &sd); @@ -378,8 +395,10 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R path_radiance_bsdf_bounce(&L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label); /* set labels */ - if(!(label & LABEL_TRANSPARENT)) + if(!(label & LABEL_TRANSPARENT)) { ray_pdf = bsdf_pdf; + min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf); + } /* update path state */ path_state_next(kg, &state, label); @@ -394,7 +413,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R #endif } - float3 L_sum = path_radiance_sum(&L); + float3 L_sum = path_radiance_sum(kg, &L); #ifdef __CLAMP_SAMPLE__ path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 391dcd12dad..102a2bb036d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -516,6 +516,7 @@ typedef struct KernelIntegrator { /* caustics */ int no_caustics; + float filter_glossy; /* seed */ int seed; @@ -525,9 +526,6 @@ typedef struct KernelIntegrator { /* clamp */ float sample_clamp; - - /* padding */ - int pad; } KernelIntegrator; typedef struct KernelBVH { diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index f494b6d66e1..98f8734aed2 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -40,6 +40,15 @@ __device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, float *stack data = sd->P; break; } + case NODE_TEXCO_NORMAL: { + if(sd->object != ~0) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + data = transform_direction(&tfm, sd->N); + } + else + data = sd->N; + break; + } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; @@ -85,6 +94,15 @@ __device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, floa data = sd->P + sd->dP.dx; break; } + case NODE_TEXCO_NORMAL: { + if(sd->object != ~0) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + data = transform_direction(&tfm, sd->N); + } + else + data = sd->N; + break; + } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; @@ -133,6 +151,15 @@ __device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, floa data = sd->P + sd->dP.dy; break; } + case NODE_TEXCO_NORMAL: { + if(sd->object != ~0) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + data = normalize(transform_direction(&tfm, sd->N)); + } + else + data = sd->N; + break; + } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 68eb39bdd29..fa7c211b5f9 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -119,6 +119,7 @@ typedef enum NodeLightPath { } NodeLightPath; typedef enum NodeTexCoord { + NODE_TEXCO_NORMAL, NODE_TEXCO_OBJECT, NODE_TEXCO_CAMERA, NODE_TEXCO_WINDOW, diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 6e6d30f3879..c1f066df10c 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -41,6 +41,7 @@ Integrator::Integrator() transparent_shadows = false; no_caustics = false; + filter_glossy = 0.0f; seed = 0; layer_flag = ~0; sample_clamp = 0.0f; @@ -81,6 +82,8 @@ void Integrator::device_update(Device *device, DeviceScene *dscene) kintegrator->transparent_shadows = transparent_shadows; kintegrator->no_caustics = no_caustics; + kintegrator->filter_glossy = (filter_glossy == 0.0f)? FLT_MAX: 1.0f/filter_glossy; + kintegrator->seed = hash_int(seed); kintegrator->layer_flag = layer_flag << PATH_RAY_LAYER_SHIFT; @@ -119,6 +122,7 @@ bool Integrator::modified(const Integrator& integrator) transparent_probalistic == integrator.transparent_probalistic && transparent_shadows == integrator.transparent_shadows && no_caustics == integrator.no_caustics && + filter_glossy == integrator.filter_glossy && layer_flag == integrator.layer_flag && seed == integrator.seed && sample_clamp == integrator.sample_clamp); diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index abbbaca894c..0817fcaa457 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -41,6 +41,7 @@ public: bool transparent_shadows; bool no_caustics; + float filter_glossy; int seed; int layer_flag; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index a7eb365f983..0ce16e65621 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -43,6 +43,7 @@ Mesh::Mesh() transform_applied = false; transform_negative_scaled = false; displacement_method = DISPLACE_BUMP; + bounds = BoundBox::empty; bvh = NULL; @@ -96,7 +97,7 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_) void Mesh::compute_bounds() { - BoundBox bnds; + BoundBox bnds = BoundBox::empty; size_t verts_size = verts.size(); for(size_t i = 0; i < verts_size; i++) @@ -697,6 +698,8 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen progress.set_status(msg, "Building BVH"); mesh->compute_bvh(&scene->params, progress); + + i++; } if(progress.get_cancel()) return; @@ -704,8 +707,6 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen mesh->need_update = false; mesh->need_update_rebuild = false; } - - i++; } foreach(Shader *shader, scene->shaders) diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index db696993737..d71438ebae1 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -1503,6 +1503,7 @@ TextureCoordinateNode::TextureCoordinateNode() { add_input("Normal", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL, true); add_output("Generated", SHADER_SOCKET_POINT); + add_output("Normal", SHADER_SOCKET_NORMAL); add_output("UV", SHADER_SOCKET_POINT); add_output("Object", SHADER_SOCKET_POINT); add_output("Camera", SHADER_SOCKET_POINT); @@ -1551,6 +1552,12 @@ void TextureCoordinateNode::compile(SVMCompiler& compiler) } } + out = output("Normal"); + if(!out->links.empty()) { + compiler.stack_assign(out); + compiler.add_node(texco_node, NODE_TEXCO_NORMAL, out->stack_offset); + } + out = output("UV"); if(!out->links.empty()) { int attr = compiler.attribute(Attribute::STD_UV); diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 5f7a5810c09..28645d856a8 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -37,6 +37,7 @@ Object::Object() tfm = transform_identity(); visibility = ~0; pass_id = 0; + bounds = BoundBox::empty; } Object::~Object() diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 676f42be790..34a0c0ff877 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -27,6 +27,7 @@ #include "util_foreach.h" #include "util_function.h" +#include "util_task.h" #include "util_time.h" CCL_NAMESPACE_BEGIN @@ -37,6 +38,8 @@ Session::Session(const SessionParams& params_) { device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background); + TaskScheduler::init(params.threads); + device = Device::create(params.device, params.background, params.threads); buffers = new RenderBuffers(device); display = new DisplayBuffer(device); @@ -88,6 +91,8 @@ Session::~Session() delete display; delete scene; delete device; + + TaskScheduler::exit(); } void Session::start() diff --git a/intern/cycles/subd/subd_patch.cpp b/intern/cycles/subd/subd_patch.cpp index ff477296c7e..f6acc358959 100644 --- a/intern/cycles/subd/subd_patch.cpp +++ b/intern/cycles/subd/subd_patch.cpp @@ -93,7 +93,7 @@ void LinearQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, float BoundBox LinearQuadPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 4; i++) bbox.grow(hull[i]); @@ -115,7 +115,7 @@ void LinearTrianglePatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, f BoundBox LinearTrianglePatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 3; i++) bbox.grow(hull[i]); @@ -132,7 +132,7 @@ void BicubicPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, float v) BoundBox BicubicPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 16; i++) bbox.grow(hull[i]); @@ -152,7 +152,7 @@ void BicubicTangentPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, f BoundBox BicubicTangentPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 16; i++) bbox.grow(hull[i]); @@ -205,7 +205,7 @@ void GregoryQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, floa BoundBox GregoryQuadPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 20; i++) bbox.grow(hull[i]); @@ -276,7 +276,7 @@ void GregoryTrianglePatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, BoundBox GregoryTrianglePatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 20; i++) bbox.grow(hull[i]); diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 9182ee4cbe1..87bd84b4e0f 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRC util_path.cpp util_string.cpp util_system.cpp + util_task.cpp util_time.cpp util_transform.cpp ) @@ -50,6 +51,7 @@ set(SRC_HEADERS util_set.h util_string.h util_system.h + util_task.h util_thread.h util_time.h util_transform.h diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h index bb1df0b220f..9511b48e103 100644 --- a/intern/cycles/util/util_boundbox.h +++ b/intern/cycles/util/util_boundbox.h @@ -23,6 +23,7 @@ #include <float.h> #include "util_math.h" +#include "util_string.h" #include "util_transform.h" #include "util_types.h" @@ -35,45 +36,81 @@ class BoundBox public: float3 min, max; - BoundBox(void) + __forceinline BoundBox() { - min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX); - max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX); } - BoundBox(const float3& min_, const float3& max_) + __forceinline BoundBox(const float3& pt) + : min(pt), max(pt) + { + } + + __forceinline BoundBox(const float3& min_, const float3& max_) : min(min_), max(max_) { } - void grow(const float3& pt) + static struct empty_t {} empty; + + __forceinline BoundBox(empty_t) + : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX)) + { + } + + __forceinline void grow(const float3& pt) { min = ccl::min(min, pt); max = ccl::max(max, pt); } - void grow(const BoundBox& bbox) + __forceinline void grow(const BoundBox& bbox) { grow(bbox.min); grow(bbox.max); } - void intersect(const BoundBox& bbox) + __forceinline void intersect(const BoundBox& bbox) { min = ccl::max(min, bbox.min); max = ccl::min(max, bbox.max); } - float area(void) const + /* todo: avoid using this */ + __forceinline float safe_area() const { - if(!valid()) + if(!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z))) return 0.0f; + return area(); + } + + __forceinline float area() const + { + return half_area()*2.0f; + } + + __forceinline float half_area() const + { float3 d = max - min; - return dot(d, d)*2.0f; + return (d.x*d.z + d.y*d.z + d.x*d.y); + } + + __forceinline float3 center() const + { + return 0.5f*(min + max); } - bool valid(void) const + __forceinline float3 center2() const + { + return min + max; + } + + __forceinline float3 size() const + { + return max - min; + } + + __forceinline bool valid() const { return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) && (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) && @@ -82,7 +119,7 @@ public: BoundBox transformed(const Transform *tfm) { - BoundBox result; + BoundBox result = BoundBox::empty; for(int i = 0; i < 8; i++) { float3 p; @@ -98,6 +135,31 @@ public: } }; +__forceinline BoundBox merge(const BoundBox& bbox, const float3& pt) +{ + return BoundBox(min(bbox.min, pt), max(bbox.max, pt)); +} + +__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b) +{ + return BoundBox(min(a.min, b.min), max(a.max, b.max)); +} + +__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b, const BoundBox& c, const BoundBox& d) +{ + return merge(merge(a, b), merge(c, d)); +} + +__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b) +{ + return BoundBox(max(a.min, b.min), min(a.max, b.max)); +} + +__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b, const BoundBox& c) +{ + return intersect(a, intersect(b, c)); +} + CCL_NAMESPACE_END #endif /* __UTIL_BOUNDBOX_H__ */ diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 019dede07fa..25d81481d12 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -182,93 +182,74 @@ __device_inline float average(const float2 a) __device_inline float2 operator-(const float2 a) { - float2 r = {-a.x, -a.y}; - return r; + return make_float2(-a.x, -a.y); } __device_inline float2 operator*(const float2 a, const float2 b) { - float2 r = {a.x*b.x, a.y*b.y}; - return r; + return make_float2(a.x*b.x, a.y*b.y); } __device_inline float2 operator*(const float2 a, float f) { - float2 r = {a.x*f, a.y*f}; - return r; + return make_float2(a.x*f, a.y*f); } __device_inline float2 operator*(float f, const float2 a) { - float2 r = {a.x*f, a.y*f}; - return r; + return make_float2(a.x*f, a.y*f); } __device_inline float2 operator/(float f, const float2 a) { - float2 r = {f/a.x, f/a.y}; - return r; + return make_float2(f/a.x, f/a.y); } __device_inline float2 operator/(const float2 a, float f) { float invf = 1.0f/f; - float2 r = {a.x*invf, a.y*invf}; - return r; + return make_float2(a.x*invf, a.y*invf); } __device_inline float2 operator/(const float2 a, const float2 b) { - float2 r = {a.x/b.x, a.y/b.y}; - return r; + return make_float2(a.x/b.x, a.y/b.y); } __device_inline float2 operator+(const float2 a, const float2 b) { - float2 r = {a.x+b.x, a.y+b.y}; - return r; + return make_float2(a.x+b.x, a.y+b.y); } __device_inline float2 operator-(const float2 a, const float2 b) { - float2 r = {a.x-b.x, a.y-b.y}; - return r; + return make_float2(a.x-b.x, a.y-b.y); } __device_inline float2 operator+=(float2& a, const float2 b) { - a.x += b.x; - a.y += b.y; - return a; + return a = a + b; } __device_inline float2 operator*=(float2& a, const float2 b) { - a.x *= b.x; - a.y *= b.y; - return a; + return a = a * b; } __device_inline float2 operator*=(float2& a, float f) { - a.x *= f; - a.y *= f; - return a; + return a = a * f; } __device_inline float2 operator/=(float2& a, const float2 b) { - a.x /= b.x; - a.y /= b.y; - return a; + return a = a / b; } __device_inline float2 operator/=(float2& a, float f) { float invf = 1.0f/f; - a.x *= invf; - a.y *= invf; - return a; + return a = a * invf; } @@ -314,14 +295,12 @@ __device_inline bool operator!=(const float2 a, const float2 b) __device_inline float2 min(float2 a, float2 b) { - float2 r = {min(a.x, b.x), min(a.y, b.y)}; - return r; + return make_float2(min(a.x, b.x), min(a.y, b.y)); } __device_inline float2 max(float2 a, float2 b) { - float2 r = {max(a.x, b.x), max(a.y, b.y)}; - return r; + return make_float2(max(a.x, b.x), max(a.y, b.y)); } __device_inline float2 clamp(float2 a, float2 mn, float2 mx) @@ -361,112 +340,78 @@ __device_inline float2 interp(float2 a, float2 b, float t) /* Float3 Vector */ -__device_inline bool is_zero(const float3 a) -{ - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); -} - -__device_inline float average(const float3 a) -{ - return (a.x + a.y + a.z)*(1.0f/3.0f); -} - #ifndef __KERNEL_OPENCL__ __device_inline float3 operator-(const float3 a) { - float3 r = make_float3(-a.x, -a.y, -a.z); - return r; + return make_float3(-a.x, -a.y, -a.z); } __device_inline float3 operator*(const float3 a, const float3 b) { - float3 r = make_float3(a.x*b.x, a.y*b.y, a.z*b.z); - return r; + return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); } __device_inline float3 operator*(const float3 a, float f) { - float3 r = make_float3(a.x*f, a.y*f, a.z*f); - return r; + return make_float3(a.x*f, a.y*f, a.z*f); } __device_inline float3 operator*(float f, const float3 a) { - float3 r = make_float3(a.x*f, a.y*f, a.z*f); - return r; + return make_float3(a.x*f, a.y*f, a.z*f); } __device_inline float3 operator/(float f, const float3 a) { - float3 r = make_float3(f/a.x, f/a.y, f/a.z); - return r; + return make_float3(f/a.x, f/a.y, f/a.z); } __device_inline float3 operator/(const float3 a, float f) { float invf = 1.0f/f; - float3 r = make_float3(a.x*invf, a.y*invf, a.z*invf); - return r; + return make_float3(a.x*invf, a.y*invf, a.z*invf); } __device_inline float3 operator/(const float3 a, const float3 b) { - float3 r = make_float3(a.x/b.x, a.y/b.y, a.z/b.z); - return r; + return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); } __device_inline float3 operator+(const float3 a, const float3 b) { - float3 r = make_float3(a.x+b.x, a.y+b.y, a.z+b.z); - return r; + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); } __device_inline float3 operator-(const float3 a, const float3 b) { - float3 r = make_float3(a.x-b.x, a.y-b.y, a.z-b.z); - return r; + return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); } __device_inline float3 operator+=(float3& a, const float3 b) { - a.x += b.x; - a.y += b.y; - a.z += b.z; - return a; + return a = a + b; } __device_inline float3 operator*=(float3& a, const float3 b) { - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - return a; + return a = a * b; } __device_inline float3 operator*=(float3& a, float f) { - a.x *= f; - a.y *= f; - a.z *= f; - return a; + return a = a * f; } __device_inline float3 operator/=(float3& a, const float3 b) { - a.x /= b.x; - a.y /= b.y; - a.z /= b.z; - return a; + return a = a / b; } __device_inline float3 operator/=(float3& a, float f) { float invf = 1.0f/f; - a.x *= invf; - a.y *= invf; - a.z *= invf; - return a; + return a = a * invf; } __device_inline float dot(const float3 a, const float3 b) @@ -506,7 +451,11 @@ __device_inline float3 normalize_len(const float3 a, float *t) __device_inline bool operator==(const float3 a, const float3 b) { +#ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; +#else return (a.x == b.x && a.y == b.y && a.z == b.z); +#endif } __device_inline bool operator!=(const float3 a, const float3 b) @@ -516,14 +465,20 @@ __device_inline bool operator!=(const float3 a, const float3 b) __device_inline float3 min(float3 a, float3 b) { - float3 r = make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); - return r; +#ifdef __KERNEL_SSE__ + return _mm_min_ps(a.m128, b.m128); +#else + return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +#endif } __device_inline float3 max(float3 a, float3 b) { - float3 r = make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); - return r; +#ifdef __KERNEL_SSE__ + return _mm_max_ps(a.m128, b.m128); +#else + return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +#endif } __device_inline float3 clamp(float3 a, float3 mn, float3 mx) @@ -533,7 +488,12 @@ __device_inline float3 clamp(float3 a, float3 mn, float3 mx) __device_inline float3 fabs(float3 a) { +#ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + return _mm_and_ps(a.m128, mask); +#else return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); +#endif } #endif @@ -555,6 +515,25 @@ __device_inline void print_float3(const char *label, const float3& a) printf("%s: %.8f %.8f %.8f\n", label, a.x, a.y, a.z); } +__device_inline float reduce_add(const float3& a) +{ +#ifdef __KERNEL_SSE__ + return (a.x + a.y + a.z); +#else + return (a.x + a.y + a.z); +#endif +} + +__device_inline float3 rcp(const float3& a) +{ +#ifdef __KERNEL_SSE__ + float4 r = _mm_rcp_ps(a.m128); + return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +#else + return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); +#endif +} + #endif __device_inline float3 interp(float3 a, float3 b, float t) @@ -562,122 +541,258 @@ __device_inline float3 interp(float3 a, float3 b, float t) return a + t*(b - a); } +__device_inline bool is_zero(const float3 a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float3(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); +#endif +} + +__device_inline float average(const float3 a) +{ + return reduce_add(a)*(1.0f/3.0f); +} + /* Float4 Vector */ -#ifndef __KERNEL_OPENCL__ +#ifdef __KERNEL_SSE__ -__device_inline bool is_zero(const float4& a) +template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> __forceinline const float4 shuffle(const float4& b) { - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); + return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0))); } -__device_inline float average(const float4& a) +template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) { - return (a.x + a.y + a.z + a.w)*(1.0f/4.0f); + return _mm_moveldup_ps(b); } +template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) +{ + return _mm_movehdup_ps(b); +} + +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) +{ + return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))); +} + +#endif + +#ifndef __KERNEL_OPENCL__ + __device_inline float4 operator-(const float4& a) { - float4 r = {-a.x, -a.y, -a.z, -a.w}; - return r; +#ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + return _mm_xor_ps(a.m128, mask); +#else + return make_float4(-a.x, -a.y, -a.z, -a.w); +#endif } __device_inline float4 operator*(const float4& a, const float4& b) { - float4 r = {a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_mul_ps(a.m128, b.m128); +#else + return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); +#endif } __device_inline float4 operator*(const float4& a, float f) { - float4 r = {a.x*f, a.y*f, a.z*f, a.w*f}; - return r; +#ifdef __KERNEL_SSE__ + return a * make_float4(f); +#else + return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); +#endif } __device_inline float4 operator*(float f, const float4& a) { - float4 r = {a.x*f, a.y*f, a.z*f, a.w*f}; - return r; + return a * f; +} + +__device_inline float4 rcp(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 r = _mm_rcp_ps(a.m128); + return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +#else + return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); +#endif } __device_inline float4 operator/(const float4& a, float f) { - float invf = 1.0f/f; - float4 r = {a.x*invf, a.y*invf, a.z*invf, a.w*invf}; - return r; + return a * (1.0f/f); } __device_inline float4 operator/(const float4& a, const float4& b) { - float4 r = {a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w}; - return r; +#ifdef __KERNEL_SSE__ + return a * rcp(b); +#else + return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); +#endif + } __device_inline float4 operator+(const float4& a, const float4& b) { - float4 r = {a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_add_ps(a.m128, b.m128); +#else + return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); +#endif } __device_inline float4 operator-(const float4& a, const float4& b) { - float4 r = {a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_sub_ps(a.m128, b.m128); +#else + return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); +#endif } __device_inline float4 operator+=(float4& a, const float4& b) { - a.x += b.x; - a.y += b.y; - a.z += b.z; - a.w += b.w; - return a; + return a = a + b; } __device_inline float4 operator*=(float4& a, const float4& b) { - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - a.w *= b.w; - return a; + return a = a * b; } __device_inline float4 operator/=(float4& a, float f) { - float invf = 1.0f/f; - a.x *= invf; - a.y *= invf; - a.z *= invf; - a.w *= invf; - return a; + return a = a / f; } -__device_inline float dot(const float4& a, const float4& b) +__device_inline int4 operator<(const float4& a, const float4& b) { - return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; +#ifdef __KERNEL_SSE__ + return _mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128)); /* todo: avoid cvt */ +#else + return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); +#endif +} + +__device_inline int4 operator>=(float4 a, float4 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)); /* todo: avoid cvt */ +#else + return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +#endif +} + +__device_inline int4 operator<=(const float4& a, const float4& b) +{ +#ifdef __KERNEL_SSE__ + return _mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128)); /* todo: avoid cvt */ +#else + return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); +#endif +} + +__device_inline bool operator==(const float4 a, const float4 b) +{ +#ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; +#else + return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); +#endif } __device_inline float4 cross(const float4& a, const float4& b) { - float4 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f}; - return r; +#ifdef __KERNEL_SSE__ + return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); +#else + return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f); +#endif } __device_inline float4 min(float4 a, float4 b) { +#ifdef __KERNEL_SSE__ + return _mm_min_ps(a.m128, b.m128); +#else return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +#endif } __device_inline float4 max(float4 a, float4 b) { +#ifdef __KERNEL_SSE__ + return _mm_max_ps(a.m128, b.m128); +#else return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +#endif } #endif #ifndef __KERNEL_GPU__ +__device_inline float4 select(const int4& mask, const float4& a, const float4& b) +{ +#ifdef __KERNEL_SSE__ + /* blendv is sse4, and apparently broken on vs2008 */ + return _mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b)); /* todo: avoid cvt */ +#else + return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); +#endif +} + +__device_inline float4 reduce_min(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = min(shuffle<1,0,3,2>(a), a); + return min(shuffle<2,3,0,1>(h), h); +#else + return make_float4(min(min(a.x, a.y), min(a.z, a.w))); +#endif +} + +__device_inline float4 reduce_max(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = max(shuffle<1,0,3,2>(a), a); + return max(shuffle<2,3,0,1>(h), h); +#else + return make_float4(max(max(a.x, a.y), max(a.z, a.w))); +#endif +} + +#if 0 +__device_inline float4 reduce_add(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = shuffle<1,0,3,2>(a) + a; + return shuffle<2,3,0,1>(h) + h; +#else + return make_float4((a.x + a.y) + (a.z + a.w)); +#endif +} +#endif + +__device_inline float reduce_add(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = shuffle<1,0,3,2>(a) + a; + return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */ +#else + return ((a.x + a.y) + (a.z + a.w)); +#endif +} + __device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, a.x, a.y, a.z, a.w); @@ -685,26 +800,67 @@ __device_inline void print_float4(const char *label, const float4& a) #endif +#ifndef __KERNEL_OPENCL__ + +__device_inline bool is_zero(const float4& a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float4(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); +#endif +} + +__device_inline float average(const float4& a) +{ + return reduce_add(a) * 0.25f; +} + +__device_inline float dot(const float4& a, const float4& b) +{ + return reduce_add(a * b); +} + +#endif + /* Int3 */ #ifndef __KERNEL_OPENCL__ +__device_inline int3 min(int3 a, int3 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_min_epi32(a.m128, b.m128); +#else + return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +#endif +} + __device_inline int3 max(int3 a, int3 b) { - int3 r = {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_max_epi32(a.m128, b.m128); +#else + return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +#endif } __device_inline int3 clamp(const int3& a, int mn, int mx) { - int3 r = {clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)}; - return r; +#ifdef __KERNEL_SSE__ + return min(max(a, make_int3(mn)), make_int3(mx)); +#else + return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); +#endif } __device_inline int3 clamp(const int3& a, int3& mn, int mx) { - int3 r = {clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)}; - return r; +#ifdef __KERNEL_SSE__ + return min(max(a, mn), make_int3(mx)); +#else + return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); +#endif } #endif @@ -720,16 +876,63 @@ __device_inline void print_int3(const char *label, const int3& a) /* Int4 */ -#ifndef __KERNEL_OPENCL__ +#ifndef __KERNEL_GPU__ -__device_inline int4 operator>=(float4 a, float4 b) +__device_inline int4 operator+(const int4& a, const int4& b) { - return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +#ifdef __KERNEL_SSE__ + return _mm_add_epi32(a.m128, b.m128); +#else + return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); +#endif +} + +__device_inline int4 operator+=(int4& a, const int4& b) +{ + return a = a + b; } +__device_inline int4 operator>>(const int4& a, int i) +{ +#ifdef __KERNEL_SSE__ + return _mm_srai_epi32(a.m128, i); +#else + return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); #endif +} -#ifndef __KERNEL_GPU__ +__device_inline int4 min(int4 a, int4 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_min_epi32(a.m128, b.m128); +#else + return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +#endif +} + +__device_inline int4 max(int4 a, int4 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_max_epi32(a.m128, b.m128); +#else + return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +#endif +} + +__device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) +{ + return min(max(a, mn), mx); +} + +__device_inline int4 select(const int4& mask, const int4& a, const int4& b) +{ +#ifdef __KERNEL_SSE__ + __m128 m = _mm_cvtepi32_ps(mask); + return _mm_castps_si128(_mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b)))); /* todo: avoid cvt */ +#else + return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); +#endif +} __device_inline void print_int4(const char *label, const int4& a) { diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp new file mode 100644 index 00000000000..6da9a70ec0c --- /dev/null +++ b/intern/cycles/util/util_task.cpp @@ -0,0 +1,223 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "util_debug.h" +#include "util_foreach.h" +#include "util_system.h" +#include "util_task.h" + +CCL_NAMESPACE_BEGIN + +/* Task Pool */ + +TaskPool::TaskPool(const TaskRunFunction& run_) +{ + num = 0; + num_done = 0; + + do_cancel = false; + + run = run_; +} + +TaskPool::~TaskPool() +{ + stop(); +} + +void TaskPool::push(Task *task, bool front) +{ + TaskScheduler::Entry entry; + + entry.task = task; + entry.pool = this; + + TaskScheduler::push(entry, front); +} + +void TaskPool::wait() +{ + thread_scoped_lock lock(done_mutex); + + while(num_done != num) + done_cond.wait(lock); +} + +void TaskPool::cancel() +{ + TaskScheduler::clear(this); + + do_cancel = true; + wait(); + do_cancel = false; +} + +void TaskPool::stop() +{ + TaskScheduler::clear(this); + + assert(num_done == num); +} + +bool TaskPool::cancelled() +{ + return do_cancel; +} + +void TaskPool::done_increase(int done) +{ + done_mutex.lock(); + num_done += done; + done_mutex.unlock(); + + assert(num_done <= num); + done_cond.notify_all(); +} + +/* Task Scheduler */ + +thread_mutex TaskScheduler::mutex; +int TaskScheduler::users = 0; +vector<thread*> TaskScheduler::threads; +volatile bool TaskScheduler::do_exit = false; + +list<TaskScheduler::Entry> TaskScheduler::queue; +thread_mutex TaskScheduler::queue_mutex; +thread_condition_variable TaskScheduler::queue_cond; + +void TaskScheduler::init(int num_threads) +{ + thread_scoped_lock lock(mutex); + + /* multiple cycles instances can use this task scheduler, sharing the same + threads, so we keep track of the number of users. */ + if(users == 0) { + do_exit = false; + + /* launch threads that will be waiting for work */ + if(num_threads == 0) + num_threads = system_cpu_thread_count(); + + threads.resize(num_threads); + + for(size_t i = 0; i < threads.size(); i++) + threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i)); + } + + users++; +} + +void TaskScheduler::exit() +{ + thread_scoped_lock lock(mutex); + + users--; + + if(users == 0) { + /* stop all waiting threads */ + do_exit = true; + TaskScheduler::queue_cond.notify_all(); + + /* delete threads */ + foreach(thread *t, threads) { + t->join(); + delete t; + } + + threads.clear(); + } +} + +bool TaskScheduler::thread_wait_pop(Entry& entry) +{ + thread_scoped_lock lock(queue_mutex); + + while(queue.empty() && !do_exit) + queue_cond.wait(lock); + + if(queue.empty()) { + assert(do_exit); + return false; + } + + entry = queue.front(); + queue.pop_front(); + + return true; +} + +void TaskScheduler::thread_run(int thread_id) +{ + Entry entry; + + /* todo: test affinity/denormal mask */ + + /* keep popping off tasks */ + while(thread_wait_pop(entry)) { + /* run task */ + entry.pool->run(entry.task, thread_id); + + /* delete task */ + delete entry.task; + + /* notify pool task was done */ + entry.pool->done_increase(1); + } +} + +void TaskScheduler::push(Entry& entry, bool front) +{ + /* add entry to queue */ + TaskScheduler::queue_mutex.lock(); + if(front) + TaskScheduler::queue.push_front(entry); + else + TaskScheduler::queue.push_back(entry); + entry.pool->num++; + TaskScheduler::queue_mutex.unlock(); + + TaskScheduler::queue_cond.notify_one(); +} + +void TaskScheduler::clear(TaskPool *pool) +{ + thread_scoped_lock lock(TaskScheduler::queue_mutex); + + /* erase all tasks from this pool from the queue */ + list<TaskScheduler::Entry>::iterator it = TaskScheduler::queue.begin(); + int done = 0; + + while(it != TaskScheduler::queue.end()) { + TaskScheduler::Entry& entry = *it; + + if(entry.pool == pool) { + done++; + delete entry.task; + + it = TaskScheduler::queue.erase(it); + } + else + it++; + } + + /* notify done */ + pool->done_increase(done); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h new file mode 100644 index 00000000000..acdb2cb50a2 --- /dev/null +++ b/intern/cycles/util/util_task.h @@ -0,0 +1,122 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __UTIL_TASK_H__ +#define __UTIL_TASK_H__ + +#include "util_list.h" +#include "util_thread.h" +#include "util_vector.h" + +CCL_NAMESPACE_BEGIN + +class Task; +class TaskPool; +class TaskScheduler; + +typedef boost::function<void(Task*,int)> TaskRunFunction; + +/* Task + * + * Base class for tasks to be executed in threads. */ + +class Task +{ +public: + Task() {}; + virtual ~Task() {} +}; + +/* Task Pool + * + * Pool of tasks that will be executed by the central TaskScheduler.For each + * pool, we can wait for all tasks to be done, or cancel them before they are + * done. + * + * The run callback that actually executes the task may be create like this: + * function_bind(&MyClass::task_execute, this, _1, _2) */ + +class TaskPool +{ +public: + TaskPool(const TaskRunFunction& run); + ~TaskPool(); + + void push(Task *task, bool front = false); + + void wait(); /* wait until all tasks are done */ + void cancel(); /* cancel all tasks, keep worker threads running */ + void stop(); /* stop all worker threads */ + + bool cancelled(); /* for worker threads, test if cancelled */ + +protected: + friend class TaskScheduler; + + void done_increase(int done); + + TaskRunFunction run; + + thread_mutex done_mutex; + thread_condition_variable done_cond; + + volatile int num, num_done; + volatile bool do_cancel; +}; + +/* Task Scheduler + * + * Central scheduler that holds running threads ready to execute tasks. A singe + * queue holds the task from all pools. */ + +class TaskScheduler +{ +public: + static void init(int num_threads = 0); + static void exit(); + + static int num_threads() { return threads.size(); } + +protected: + friend class TaskPool; + + struct Entry { + Task *task; + TaskPool *pool; + }; + + static thread_mutex mutex; + static int users; + static vector<thread*> threads; + static volatile bool do_exit; + + static list<Entry> queue; + static thread_mutex queue_mutex; + static thread_condition_variable queue_cond; + + static void thread_run(int thread_id); + static bool thread_wait_pop(Entry& entry); + + static void push(Entry& entry, bool front); + static void clear(TaskPool *pool); +}; + +CCL_NAMESPACE_END + +#endif + diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index 6836be203f5..3d15b342fe5 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -69,133 +69,6 @@ protected: bool joined; }; -/* Thread Safe Queue to pass tasks from one thread to another. Tasks should be - * pushed into the queue, while the worker thread waits to pop the next task - * off the queue. Once all tasks are into the queue, calling stop() will stop - * the worker threads from waiting for more tasks once all tasks are done. */ - -template<typename T> class ThreadQueue -{ -public: - ThreadQueue() - { - tot = 0; - tot_done = 0; - do_stop = false; - do_cancel = false; - } - - /* Main thread functions */ - - /* push a task to be executed */ - void push(const T& value) - { - thread_scoped_lock lock(queue_mutex); - queue.push(value); - tot++; - lock.unlock(); - - queue_cond.notify_one(); - } - - /* wait until all tasks are done */ - void wait_done() - { - thread_scoped_lock lock(done_mutex); - - while(tot_done != tot) - done_cond.wait(lock); - } - - /* stop all worker threads */ - void stop() - { - clear(); - do_stop = true; - queue_cond.notify_all(); - } - - /* cancel all tasks, but keep worker threads running */ - void cancel() - { - clear(); - do_cancel = true; - wait_done(); - do_cancel = false; - } - - /* Worker thread functions - * - * while(queue.worker_wait_pop(task)) { - * for(..) { - * ... do work ... - * - * if(queue.worker_cancel()) - * break; - * } - * - * queue.worker_done(); - * } - */ - - bool worker_wait_pop(T& value) - { - thread_scoped_lock lock(queue_mutex); - - while(queue.empty() && !do_stop) - queue_cond.wait(lock); - - if(queue.empty()) - return false; - - value = queue.front(); - queue.pop(); - - return true; - } - - void worker_done() - { - thread_scoped_lock lock(done_mutex); - tot_done++; - lock.unlock(); - - assert(tot_done <= tot); - - done_cond.notify_all(); - } - - bool worker_cancel() - { - return do_cancel; - } - -protected: - void clear() - { - thread_scoped_lock lock(queue_mutex); - - while(!queue.empty()) { - thread_scoped_lock done_lock(done_mutex); - tot_done++; - done_lock.unlock(); - - queue.pop(); - } - - done_cond.notify_all(); - } - - std::queue<T> queue; - thread_mutex queue_mutex; - thread_mutex done_mutex; - thread_condition_variable queue_cond; - thread_condition_variable done_cond; - volatile bool do_stop; - volatile bool do_cancel; - volatile int tot, tot_done; -}; - /* Thread Local Storage * * Boost implementation is a bit slow, and Mac OS X __thread is not supported diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 61bc36ae888..0fd26825911 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -129,23 +129,26 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) Transform transform_inverse(const Transform& tfm) { - union { Transform T; float M[4][4]; } R, M; - - R.T = transform_identity(); - M.T = tfm; + Transform tfmR = transform_identity(); + float M[4][4], R[4][4]; - if(!transform_matrix4_gj_inverse(R.M, M.M)) { + memcpy(R, &tfmR, sizeof(R)); + memcpy(M, &tfm, sizeof(M)); + + if(!transform_matrix4_gj_inverse(R, M)) { /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should never be in this situation, but try to invert it anyway with tweak */ - M.M[0][0] += 1e-8f; - M.M[1][1] += 1e-8f; - M.M[2][2] += 1e-8f; + M[0][0] += 1e-8f; + M[1][1] += 1e-8f; + M[2][2] += 1e-8f; - if(!transform_matrix4_gj_inverse(R.M, M.M)) + if(!transform_matrix4_gj_inverse(R, M)) return transform_identity(); } - return R.T; + memcpy(&tfmR, R, sizeof(R)); + + return tfmR; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index efdda98571a..cf167707e47 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -36,23 +36,37 @@ #define __shared #define __constant -#ifdef __GNUC__ -#define __device_inline static inline __attribute__((always_inline)) -#else +#ifdef _WIN32 #define __device_inline static __forceinline +#define __align(...) __declspec(align(__VA_ARGS__)) +#else +#define __device_inline static inline __attribute__((always_inline)) +#define __forceinline inline __attribute__((always_inline)) +#define __align(...) __attribute__((aligned(__VA_ARGS__))) #endif #endif +/* Bitness */ + +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) +#define __KERNEL_64_BIT__ +#endif + /* SIMD Types */ -/* not needed yet, will be for qbvh -#ifndef __KERNEL_GPU__ +/* not enabled, globally applying it just gives slowdown, + * but useful for testing. */ +//#define __KERNEL_SSE__ +#ifdef __KERNEL_SSE__ -#include <emmintrin.h> -#include <xmmintrin.h> +#include <xmmintrin.h> /* SSE 1 */ +#include <emmintrin.h> /* SSE 2 */ +#include <pmmintrin.h> /* SSE 3 */ +#include <tmmintrin.h> /* SSE 3 */ +#include <smmintrin.h> /* SSE 4 */ -#endif*/ +#endif #ifndef _WIN32 #ifndef __KERNEL_GPU__ @@ -97,6 +111,12 @@ typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; +#ifdef __KERNEL_64_BIT__ +typedef int64_t ssize_t; +#else +typedef int32_t ssize_t; +#endif + #endif /* Generic Memory Pointer */ @@ -108,89 +128,137 @@ typedef uint64_t device_ptr; struct uchar2 { uchar x, y; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct uchar3 { uchar x, y, z; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct uchar4 { uchar x, y, z, w; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct int2 { int x, y; - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) int3 { + union { + __m128i m128; + struct { int x, y, z, w; }; + }; + + __forceinline int3() {} + __forceinline int3(const __m128i a) : m128(a) {} + __forceinline operator const __m128i&(void) const { return m128; } + __forceinline operator __m128i&(void) { return m128; } +#else struct int3 { - int x, y, z; + int x, y, z, w; +#endif - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) int4 { + union { + __m128i m128; + struct { int x, y, z, w; }; + }; + + __forceinline int4() {} + __forceinline int4(const __m128i a) : m128(a) {} + __forceinline operator const __m128i&(void) const { return m128; } + __forceinline operator __m128i&(void) { return m128; } +#else struct int4 { int x, y, z, w; +#endif - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; struct uint2 { uint x, y; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct uint3 { uint x, y, z; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct uint4 { uint x, y, z, w; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct float2 { float x, y; - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) float3 { + union { + __m128 m128; + struct { float x, y, z, w; }; + }; + + __forceinline float3() {} + __forceinline float3(const __m128 a) : m128(a) {} + __forceinline operator const __m128&(void) const { return m128; } + __forceinline operator __m128&(void) { return m128; } +#else struct float3 { - float x, y, z; - -#ifdef WITH_OPENCL - float w; + float x, y, z, w; #endif - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) float4 { + union { + __m128 m128; + struct { float x, y, z, w; }; + }; + + __forceinline float4() {} + __forceinline float4(const __m128 a) : m128(a) {} + __forceinline operator const __m128&(void) const { return m128; } + __forceinline operator __m128&(void) { return m128; } +#else struct float4 { float x, y, z, w; +#endif - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; #endif @@ -201,87 +269,179 @@ struct float4 { * * OpenCL does not support C++ class, so we use these instead. */ -__device uchar2 make_uchar2(uchar x, uchar y) +__device_inline uchar2 make_uchar2(uchar x, uchar y) { uchar2 a = {x, y}; return a; } -__device uchar3 make_uchar3(uchar x, uchar y, uchar z) +__device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) { uchar3 a = {x, y, z}; return a; } -__device uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) +__device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) { uchar4 a = {x, y, z, w}; return a; } -__device int2 make_int2(int x, int y) +__device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } -__device int3 make_int3(int x, int y, int z) +__device_inline int3 make_int3(int x, int y, int z) { - int3 a = {x, y, z}; +#ifdef __KERNEL_SSE__ + int3 a; + a.m128 = _mm_set_epi32(0, z, y, x); +#else + int3 a = {x, y, z, 0}; +#endif + return a; } -__device int4 make_int4(int x, int y, int z, int w) +__device_inline int4 make_int4(int x, int y, int z, int w) { +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_set_epi32(w, z, y, x); +#else int4 a = {x, y, z, w}; +#endif + return a; } -__device uint2 make_uint2(uint x, uint y) +__device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } -__device uint3 make_uint3(uint x, uint y, uint z) +__device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } -__device uint4 make_uint4(uint x, uint y, uint z, uint w) +__device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } -__device float2 make_float2(float x, float y) +__device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } -__device float3 make_float3(float x, float y, float z) +__device_inline float3 make_float3(float x, float y, float z) { -#ifdef WITH_OPENCL - float3 a = {x, y, z, 0.0f}; +#ifdef __KERNEL_SSE__ + float3 a; + a.m128 = _mm_set_ps(0.0f, z, y, x); #else - float3 a = {x, y, z}; + float3 a = {x, y, z, 0.0f}; #endif + return a; } -__device float4 make_float4(float x, float y, float z, float w) +__device_inline float4 make_float4(float x, float y, float z, float w) { +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_set_ps(w, z, y, x); +#else float4 a = {x, y, z, w}; +#endif + return a; } -__device int align_up(int offset, int alignment) +__device_inline int align_up(int offset, int alignment) { return (offset + alignment - 1) & ~(alignment - 1); } +__device_inline int3 make_int3(int i) +{ +#ifdef __KERNEL_SSE__ + int3 a; + a.m128 = _mm_set1_epi32(i); +#else + int3 a = {i, i, i, i}; +#endif + + return a; +} + +__device_inline int4 make_int4(int i) +{ +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_set1_epi32(i); +#else + int4 a = {i, i, i, i}; +#endif + + return a; +} + +__device_inline float3 make_float3(float f) +{ +#ifdef __KERNEL_SSE__ + float3 a; + a.m128 = _mm_set1_ps(f); +#else + float3 a = {f, f, f, f}; +#endif + + return a; +} + +__device_inline float4 make_float4(float f) +{ +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_set1_ps(f); +#else + float4 a = {f, f, f, f}; +#endif + + return a; +} + +__device_inline float4 make_float4(const int4& i) +{ +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_cvtepi32_ps(i.m128); +#else + float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; +#endif + + return a; +} + +__device_inline int4 make_int4(const float3& f) +{ +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_cvtps_epi32(f.m128); +#else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +#endif + + return a; +} + #endif CCL_NAMESPACE_END |