diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh_build.cpp | 35 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh_build.h | 12 | ||||
-rw-r--r-- | intern/cycles/render/image_sky.cpp | 36 | ||||
-rw-r--r-- | intern/cycles/render/light.cpp | 30 | ||||
-rw-r--r-- | intern/cycles/render/object.cpp | 69 | ||||
-rw-r--r-- | intern/cycles/render/svm.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/util/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_task.h | 7 |
10 files changed, 58 insertions, 138 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index fee428a3cf6..ac6f54cd5b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -654,6 +654,7 @@ if(WITH_BOOST AND NOT (WITH_CYCLES OR WITH_OPENIMAGEIO OR WITH_INTERNATIONAL OR set(WITH_BOOST OFF) endif() +set_and_warn_dependency(WITH_TBB WITH_CYCLES OFF) set_and_warn_dependency(WITH_TBB WITH_USD OFF) set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE OFF) set_and_warn_dependency(WITH_TBB WITH_OPENVDB OFF) diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 121c8bdad6e..e5a5e9773d3 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -286,6 +286,7 @@ include_directories( ${OPENEXR_INCLUDE_DIR} ${OPENEXR_INCLUDE_DIRS} ${PUGIXML_INCLUDE_DIR} + ${TBB_INCLUDE_DIRS} ) if(CYCLES_STANDALONE_REPOSITORY) diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index 814b5ced5d2..ad555535a17 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -423,22 +423,6 @@ BVHNode *BVHBuild::run() } spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha; - if (params.use_spatial_split) { - /* NOTE: The API here tries to be as much ready for multi-threaded build - * as possible, but at the same time it tries not to introduce any - * changes in behavior for until all refactoring needed for threading is - * finished. - * - * So we currently allocate single storage for now, which is only used by - * the only thread working on the spatial BVH build. - */ - spatial_storage.resize(TaskScheduler::num_threads() + 1); - size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1; - foreach (BVHSpatialStorage &storage, spatial_storage) { - storage.right_bounds.clear(); - } - spatial_storage[0].right_bounds.resize(num_bins); - } spatial_free_index = 0; need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0; @@ -475,6 +459,9 @@ BVHNode *BVHBuild::run() task_pool.wait_work(); } + /* clean up temporary memory usage by threads */ + spatial_storage.clear(); + /* delete if we canceled */ if (rootnode) { if (progress.get_cancel()) { @@ -551,19 +538,18 @@ void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning * } } -void BVHBuild::thread_build_spatial_split_node(InnerNode *inner, - int child, - BVHRange *range, - vector<BVHReference> *references, - int level, - int thread_id) +void BVHBuild::thread_build_spatial_split_node( + InnerNode *inner, int child, BVHRange *range, vector<BVHReference> *references, int level) { if (progress.get_cancel()) { return; } + /* Get per-thread memory for spatial split. */ + BVHSpatialStorage *local_storage = &spatial_storage.local(); + /* build nodes */ - BVHNode *node = build_node(*range, references, level, thread_id); + BVHNode *node = build_node(*range, references, level, local_storage); /* set child in inner node */ inner->children[child] = node; @@ -690,7 +676,7 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level) BVHNode *BVHBuild::build_node(const BVHRange &range, vector<BVHReference> *references, int level, - int thread_id) + BVHSpatialStorage *storage) { /* Update progress. * @@ -712,7 +698,6 @@ BVHNode *BVHBuild::build_node(const BVHRange &range, } /* Perform splitting test. */ - BVHSpatialStorage *storage = &spatial_storage[thread_id]; BVHMixedSplit split(this, storage, range, references, level); if (!(range.size() > 0 && params.top_level && level == 0)) { diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h index 3fe4c3799e2..df2aa2ae1a7 100644 --- a/intern/cycles/bvh/bvh_build.h +++ b/intern/cycles/bvh/bvh_build.h @@ -76,7 +76,7 @@ class BVHBuild { BVHNode *build_node(const BVHRange &range, vector<BVHReference> *references, int level, - int thread_id); + BVHSpatialStorage *storage); BVHNode *build_node(const BVHObjectBinning &range, int level); BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references); BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num); @@ -87,12 +87,8 @@ class BVHBuild { /* Threads. */ enum { THREAD_TASK_SIZE = 4096 }; void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level); - void thread_build_spatial_split_node(InnerNode *node, - int child, - BVHRange *range, - vector<BVHReference> *references, - int level, - int thread_id); + void thread_build_spatial_split_node( + InnerNode *node, int child, BVHRange *range, vector<BVHReference> *references, int level); thread_mutex build_mutex; /* Progress. */ @@ -127,7 +123,7 @@ class BVHBuild { /* Spatial splitting. */ float spatial_min_overlap; - vector<BVHSpatialStorage> spatial_storage; + enumerable_thread_specific<BVHSpatialStorage> spatial_storage; size_t spatial_free_index; thread_spin_lock spatial_spin_lock; diff --git a/intern/cycles/render/image_sky.cpp b/intern/cycles/render/image_sky.cpp index 3e7b491f609..442e1d7941f 100644 --- a/intern/cycles/render/image_sky.cpp +++ b/intern/cycles/render/image_sky.cpp @@ -20,6 +20,7 @@ #include "util/util_logging.h" #include "util/util_path.h" #include "util/util_sky_model.h" +#include "util/util_task.h" CCL_NAMESPACE_BEGIN @@ -58,26 +59,21 @@ bool SkyLoader::load_pixels(const ImageMetaData &metadata, float altitude_f = (float)altitude; /* precompute sky texture */ - const int num_chunks = TaskScheduler::num_threads(); - const int chunk_size = height / num_chunks; - TaskPool pool; - for (int chunk = 0; chunk < num_chunks; chunk++) { - const int chunk_start = chunk * chunk_size; - const int chunk_end = (chunk + 1 < num_chunks) ? (chunk + 1) * chunk_size : height; - pool.push(function_bind(&nishita_skymodel_precompute_texture, - pixel_data, - metadata.channels, - chunk_start, - chunk_end, - width, - height, - sun_elevation, - altitude_f, - air_density, - dust_density, - ozone_density)); - } - pool.wait_work(); + const int rows_per_task = divide_up(1024, width); + parallel_for(blocked_range<size_t>(0, height, rows_per_task), + [&](const blocked_range<size_t> &r) { + nishita_skymodel_precompute_texture(pixel_data, + metadata.channels, + r.begin(), + r.end(), + width, + height, + sun_elevation, + altitude_f, + air_density, + dust_density, + ozone_density); + }); return true; } diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 25c91a76d58..c0615c6217b 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -680,29 +680,13 @@ void LightManager::device_update_background(Device *device, float2 *cond_cdf = dscene->light_background_conditional_cdf.alloc(cdf_width * res.y); double time_start = time_dt(); - if (max(res.x, res.y) < 512) { - /* Small enough resolution, faster to do single-threaded. */ - background_cdf(0, res.y, res.x, res.y, &pixels, cond_cdf); - } - else { - /* Threaded evaluation for large resolution. */ - const int num_blocks = TaskScheduler::num_threads(); - const int chunk_size = res.y / num_blocks; - int start_row = 0; - TaskPool pool; - for (int i = 0; i < num_blocks; ++i) { - const int current_chunk_size = (i != num_blocks - 1) ? chunk_size : (res.y - i * chunk_size); - pool.push(function_bind(&background_cdf, - start_row, - start_row + current_chunk_size, - res.x, - res.y, - &pixels, - cond_cdf)); - start_row += current_chunk_size; - } - pool.wait_work(); - } + + /* Create CDF in parallel. */ + const int rows_per_task = divide_up(10240, res.x); + parallel_for(blocked_range<size_t>(0, res.y, rows_per_task), + [&](const blocked_range<size_t> &r) { + background_cdf(r.begin(), r.end(), res.x, res.y, &pixels, cond_cdf); + }); /* marginal CDFs (column, V direction, sum of rows) */ marg_cdf[0].x = cond_cdf[res.x].x; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 752350ad76e..28337ef1a21 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -78,7 +78,6 @@ struct UpdateObjectTransformState { Scene *scene; /* Some locks to keep everything thread-safe. */ - thread_spin_lock queue_lock; thread_spin_lock surface_area_lock; /* First unused object index in the queue. */ @@ -551,41 +550,6 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s } } -bool ObjectManager::device_update_object_transform_pop_work(UpdateObjectTransformState *state, - int *start_index, - int *num_objects) -{ - /* Tweakable parameter, number of objects per chunk. - * Too small value will cause some extra overhead due to spin lock, - * too big value might not use all threads nicely. - */ - static const int OBJECTS_PER_TASK = 32; - bool have_work = false; - state->queue_lock.lock(); - int num_scene_objects = state->scene->objects.size(); - if (state->queue_start_object < num_scene_objects) { - int count = min(OBJECTS_PER_TASK, num_scene_objects - state->queue_start_object); - *start_index = state->queue_start_object; - *num_objects = count; - state->queue_start_object += count; - have_work = true; - } - state->queue_lock.unlock(); - return have_work; -} - -void ObjectManager::device_update_object_transform_task(UpdateObjectTransformState *state) -{ - int start_index, num_objects; - while (device_update_object_transform_pop_work(state, &start_index, &num_objects)) { - for (int i = 0; i < num_objects; ++i) { - const int object_index = start_index + i; - Object *ob = state->scene->objects[object_index]; - device_update_object_transform(state, ob); - } - } -} - void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress) { UpdateObjectTransformState state; @@ -631,29 +595,16 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, numparticles += psys->particles.size(); } - /* NOTE: If it's just a handful of objects we deal with them in a single - * thread to avoid threading overhead. However, this threshold is might - * need some tweaks to make mid-complex scenes optimal. - */ - if (scene->objects.size() < 64) { - foreach (Object *ob, scene->objects) { - device_update_object_transform(&state, ob); - if (progress.get_cancel()) { - return; - } - } - } - else { - const int num_threads = TaskScheduler::num_threads(); - TaskPool pool; - for (int i = 0; i < num_threads; ++i) { - pool.push(function_bind(&ObjectManager::device_update_object_transform_task, this, &state)); - } - pool.wait_work(); - if (progress.get_cancel()) { - return; - } - } + /* Parallel object update, with grain size to avoid too much threadng overhead + * for individual objects. */ + static const int OBJECTS_PER_TASK = 32; + parallel_for(blocked_range<size_t>(0, scene->objects.size(), OBJECTS_PER_TASK), + [&](const blocked_range<size_t> &r) { + for (size_t i = r.begin(); i != r.end(); i++) { + Object *ob = state.scene->objects[i]; + device_update_object_transform(&state, ob); + } + }); dscene->objects.copy_to_device(); if (state.need_motion == Scene::MOTION_PASS) { diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index ea3dbaf8e03..88714e20a90 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -94,8 +94,7 @@ void SVMShaderManager::device_update(Device *device, scene, scene->shaders[i], &progress, - &shader_svm_nodes[i]), - false); + &shader_svm_nodes[i])); } task_pool.wait_work(); diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 16d47d57e69..2ba30cdb8af 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -29,7 +29,7 @@ set(SRC ) set(LIB - + ${TBB_LIBRARIES} ) if(WITH_CYCLES_STANDALONE) diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h index 17ff47cb2d8..dcb6733e9fa 100644 --- a/intern/cycles/util/util_task.h +++ b/intern/cycles/util/util_task.h @@ -22,8 +22,15 @@ #include "util/util_thread.h" #include "util/util_vector.h" +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 +#include <tbb/tbb.h> + CCL_NAMESPACE_BEGIN +using tbb::blocked_range; +using tbb::enumerable_thread_specific; +using tbb::parallel_for; + class Task; class TaskPool; class TaskScheduler; |