diff options
author | Manuel Castilla <manzanillawork@gmail.com> | 2021-06-01 11:25:38 +0300 |
---|---|---|
committer | Manuel Castilla <manzanillawork@gmail.com> | 2021-06-01 11:51:53 +0300 |
commit | 9adfd278f7487798f1b0124c7e44cf9934b4ba54 (patch) | |
tree | 5061ed25bdace84562707533aeaf0f550510313f /source/blender/compositor/intern | |
parent | 930ad9257d00a1891a948ff71756ffe8acb61686 (diff) |
Compositor: Full-frame base system
This patch adds the base code needed to make the full-frame system work for both current tiled/per-pixel implementation of operations and full-frame.
Two execution models:
- Tiled: Current implementation. Renders execution groups in tiles from outputs to input. Not all operations are buffered. Runs the tiled/per-pixel implementation.
- FullFrame: All operations are buffered. Fully renders operations from inputs to outputs. Runs full-frame implementation of operations if available otherwise the current tiled/per-pixel. Creates output buffers on first read and free them as soon as all its readers have finished, reducing peak memory usage of complex/long trees. Operations are multi-threaded but do not run in parallel as Tiled (will be done in another patch).
This should allow us to convert operations to full-frame in small steps with the system already working and solve the problem of high memory usage.
FullFrame breaking changes respect Tiled system, mainly:
- Translate, Rotate, Scale, and Transform take effect immediately instead of next buffered operation.
- Any sampling is always done over inputs instead of last buffered operation.
Reviewed By: jbakker
Differential Revision: https://developer.blender.org/D11113
Diffstat (limited to 'source/blender/compositor/intern')
27 files changed, 1560 insertions, 155 deletions
diff --git a/source/blender/compositor/intern/COM_BufferOperation.cc b/source/blender/compositor/intern/COM_BufferOperation.cc new file mode 100644 index 00000000000..c1e64142443 --- /dev/null +++ b/source/blender/compositor/intern/COM_BufferOperation.cc @@ -0,0 +1,65 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_BufferOperation.h" + +namespace blender::compositor { + +BufferOperation::BufferOperation(MemoryBuffer *buffer, DataType data_type) : NodeOperation() +{ + buffer_ = buffer; + /* TODO: Implement a MemoryBuffer get_size() method returning a Size2d type. Shorten following + * code to: set_resolution(buffer.get_size()) */ + unsigned int resolution[2]; + resolution[0] = buffer->getWidth(); + resolution[1] = buffer->getHeight(); + setResolution(resolution); + addOutputSocket(data_type); +} + +void *BufferOperation::initializeTileData(rcti * /*rect*/) +{ + return buffer_; +} + +void BufferOperation::executePixelSampled(float output[4], float x, float y, PixelSampler sampler) +{ + switch (sampler) { + case PixelSampler::Nearest: + buffer_->read(output, x, y); + break; + case PixelSampler::Bilinear: + default: + buffer_->readBilinear(output, x, y); + break; + case PixelSampler::Bicubic: + /* No bicubic. Same implementation as ReadBufferOperation. */ + buffer_->readBilinear(output, x, y); + break; + } +} + +void BufferOperation::executePixelFiltered( + float output[4], float x, float y, float dx[2], float dy[2]) +{ + const float uv[2] = {x, y}; + const float deriv[2][2] = {{dx[0], dx[1]}, {dy[0], dy[1]}}; + buffer_->readEWA(output, uv, deriv); +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_BufferOperation.h b/source/blender/compositor/intern/COM_BufferOperation.h new file mode 100644 index 00000000000..f87cd4db94e --- /dev/null +++ b/source/blender/compositor/intern/COM_BufferOperation.h @@ -0,0 +1,37 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "COM_NodeOperation.h" + +namespace blender::compositor { + +class BufferOperation : public NodeOperation { + private: + MemoryBuffer *buffer_; + + public: + BufferOperation(MemoryBuffer *buffer, DataType data_type); + + void *initializeTileData(rcti *rect) override; + void executePixelSampled(float output[4], float x, float y, PixelSampler sampler) override; + void executePixelFiltered(float output[4], float x, float y, float dx[2], float dy[2]) override; +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_CPUDevice.cc b/source/blender/compositor/intern/COM_CPUDevice.cc index 29a82bec636..2ca5557e278 100644 --- a/source/blender/compositor/intern/COM_CPUDevice.cc +++ b/source/blender/compositor/intern/COM_CPUDevice.cc @@ -30,11 +30,24 @@ CPUDevice::CPUDevice(int thread_id) : m_thread_id(thread_id) void CPUDevice::execute(WorkPackage *work_package) { - const unsigned int chunkNumber = work_package->chunk_number; - ExecutionGroup *executionGroup = work_package->execution_group; - - executionGroup->getOutputOperation()->executeRegion(&work_package->rect, chunkNumber); - executionGroup->finalizeChunkExecution(chunkNumber, nullptr); + switch (work_package->type) { + case eWorkPackageType::Tile: { + const unsigned int chunkNumber = work_package->chunk_number; + ExecutionGroup *executionGroup = work_package->execution_group; + + executionGroup->getOutputOperation()->executeRegion(&work_package->rect, chunkNumber); + executionGroup->finalizeChunkExecution(chunkNumber, nullptr); + break; + } + case eWorkPackageType::CustomFunction: { + work_package->execute_fn(); + break; + } + } + + if (work_package->executed_fn) { + work_package->executed_fn(); + } } } // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_CompositorContext.cc b/source/blender/compositor/intern/COM_CompositorContext.cc index f70f3a8ebfc..61e299c045e 100644 --- a/source/blender/compositor/intern/COM_CompositorContext.cc +++ b/source/blender/compositor/intern/COM_CompositorContext.cc @@ -21,6 +21,7 @@ #include <cstdio> #include "BLI_assert.h" +#include "DNA_userdef_types.h" namespace blender::compositor { @@ -33,6 +34,7 @@ CompositorContext::CompositorContext() this->m_fastCalculation = false; this->m_viewSettings = nullptr; this->m_displaySettings = nullptr; + this->m_bnodetree = nullptr; } int CompositorContext::getFramenumber() const @@ -41,4 +43,20 @@ int CompositorContext::getFramenumber() const return m_rd->cfra; } +eExecutionModel CompositorContext::get_execution_model() const +{ + if (U.experimental.use_full_frame_compositor) { + BLI_assert(m_bnodetree != nullptr); + switch (m_bnodetree->execution_mode) { + case 1: + return eExecutionModel::FullFrame; + case 0: + return eExecutionModel::Tiled; + default: + BLI_assert(!"Invalid execution mode"); + } + } + return eExecutionModel::Tiled; +} + } // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_CompositorContext.h b/source/blender/compositor/intern/COM_CompositorContext.h index e6164246bdd..56251511576 100644 --- a/source/blender/compositor/intern/COM_CompositorContext.h +++ b/source/blender/compositor/intern/COM_CompositorContext.h @@ -281,6 +281,11 @@ class CompositorContext { { return m_rd->size * 0.01f; } + + /** + * Get active execution model. + */ + eExecutionModel get_execution_model() const; }; } // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_Debug.cc b/source/blender/compositor/intern/COM_Debug.cc index dfb4f53fee5..4cf7e09a7d8 100644 --- a/source/blender/compositor/intern/COM_Debug.cc +++ b/source/blender/compositor/intern/COM_Debug.cc @@ -211,12 +211,14 @@ int DebugInfo::graphviz_legend_group( return len; } -int DebugInfo::graphviz_legend(char *str, int maxlen) +int DebugInfo::graphviz_legend(char *str, int maxlen, const bool has_execution_groups) { int len = 0; len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "{\r\n"); - len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "rank = sink;\r\n"); + if (has_execution_groups) { + len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "rank = sink;\r\n"); + } len += snprintf( str + len, maxlen > len ? maxlen - len : 0, "Legend [shape=none, margin=0, label=<\r\n"); @@ -236,21 +238,24 @@ int DebugInfo::graphviz_legend(char *str, int maxlen) "Viewer", "lightskyblue3", str + len, maxlen > len ? maxlen - len : 0); len += graphviz_legend_color( "Active Viewer", "lightskyblue1", str + len, maxlen > len ? maxlen - len : 0); - len += graphviz_legend_color( - "Write Buffer", "darkorange", str + len, maxlen > len ? maxlen - len : 0); - len += graphviz_legend_color( - "Read Buffer", "darkolivegreen3", str + len, maxlen > len ? maxlen - len : 0); + if (has_execution_groups) { + len += graphviz_legend_color( + "Write Buffer", "darkorange", str + len, maxlen > len ? maxlen - len : 0); + len += graphviz_legend_color( + "Read Buffer", "darkolivegreen3", str + len, maxlen > len ? maxlen - len : 0); + } len += graphviz_legend_color( "Input Value", "khaki1", str + len, maxlen > len ? maxlen - len : 0); - len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "<TR><TD></TD></TR>\r\n"); - - len += graphviz_legend_group( - "Group Waiting", "white", "dashed", str + len, maxlen > len ? maxlen - len : 0); - len += graphviz_legend_group( - "Group Running", "firebrick1", "solid", str + len, maxlen > len ? maxlen - len : 0); - len += graphviz_legend_group( - "Group Finished", "chartreuse4", "solid", str + len, maxlen > len ? maxlen - len : 0); + if (has_execution_groups) { + len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "<TR><TD></TD></TR>\r\n"); + len += graphviz_legend_group( + "Group Waiting", "white", "dashed", str + len, maxlen > len ? maxlen - len : 0); + len += graphviz_legend_group( + "Group Running", "firebrick1", "solid", str + len, maxlen > len ? maxlen - len : 0); + len += graphviz_legend_group( + "Group Finished", "chartreuse4", "solid", str + len, maxlen > len ? maxlen - len : 0); + } len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "</TABLE>\r\n"); len += snprintf(str + len, maxlen > len ? maxlen - len : 0, ">];\r\n"); @@ -387,7 +392,9 @@ bool DebugInfo::graphviz_system(const ExecutionSystem *system, char *str, int ma } } - len += graphviz_legend(str + len, maxlen > len ? maxlen - len : 0); + const bool has_execution_groups = system->getContext().get_execution_model() == + eExecutionModel::Tiled; + len += graphviz_legend(str + len, maxlen > len ? maxlen - len : 0, has_execution_groups); len += snprintf(str + len, maxlen > len ? maxlen - len : 0, "}\r\n"); diff --git a/source/blender/compositor/intern/COM_Debug.h b/source/blender/compositor/intern/COM_Debug.h index e1aea69e481..0de3a5e39dc 100644 --- a/source/blender/compositor/intern/COM_Debug.h +++ b/source/blender/compositor/intern/COM_Debug.h @@ -129,7 +129,7 @@ class DebugInfo { const char *name, const char *color, const char *style, char *str, int maxlen); static int graphviz_legend_group( const char *name, const char *color, const char *style, char *str, int maxlen); - static int graphviz_legend(char *str, int maxlen); + static int graphviz_legend(char *str, int maxlen, bool has_execution_groups); static bool graphviz_system(const ExecutionSystem *system, char *str, int maxlen); }; diff --git a/source/blender/compositor/intern/COM_Enums.h b/source/blender/compositor/intern/COM_Enums.h index f65ce3e856e..519e7df940e 100644 --- a/source/blender/compositor/intern/COM_Enums.h +++ b/source/blender/compositor/intern/COM_Enums.h @@ -70,6 +70,21 @@ enum class eWorkPackageState { Executed = 2, }; +/** + * \brief Work type to execute. + * \ingroup Execution + */ +enum class eWorkPackageType { + /** + * \brief Executes an execution group tile. + */ + Tile = 0, + /** + * \brief Executes a custom function. + */ + CustomFunction = 1 +}; + std::ostream &operator<<(std::ostream &os, const eCompositorPriority &priority); std::ostream &operator<<(std::ostream &os, const eWorkPackageState &execution_state); diff --git a/source/blender/compositor/intern/COM_ExecutionGroup.cc b/source/blender/compositor/intern/COM_ExecutionGroup.cc index 80d453bf7f9..68bda8c70d6 100644 --- a/source/blender/compositor/intern/COM_ExecutionGroup.cc +++ b/source/blender/compositor/intern/COM_ExecutionGroup.cc @@ -157,6 +157,7 @@ void ExecutionGroup::init_work_packages() if (this->m_chunks_len != 0) { m_work_packages.resize(this->m_chunks_len); for (unsigned int index = 0; index < m_chunks_len; index++) { + m_work_packages[index].type = eWorkPackageType::Tile; m_work_packages[index].state = eWorkPackageState::NotScheduled; m_work_packages[index].execution_group = this; m_work_packages[index].chunk_number = index; diff --git a/source/blender/compositor/intern/COM_ExecutionModel.cc b/source/blender/compositor/intern/COM_ExecutionModel.cc new file mode 100644 index 00000000000..4d7f62e091b --- /dev/null +++ b/source/blender/compositor/intern/COM_ExecutionModel.cc @@ -0,0 +1,48 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_ExecutionModel.h" + +namespace blender::compositor { + +ExecutionModel::ExecutionModel(CompositorContext &context, Span<NodeOperation *> operations) + : context_(context), operations_(operations) +{ + const bNodeTree *node_tree = context_.getbNodeTree(); + + const rctf *viewer_border = &node_tree->viewer_border; + border_.use_viewer_border = (node_tree->flag & NTREE_VIEWER_BORDER) && + viewer_border->xmin < viewer_border->xmax && + viewer_border->ymin < viewer_border->ymax; + border_.viewer_border = viewer_border; + + const RenderData *rd = context_.getRenderData(); + /* Case when cropping to render border happens is handled in + * compositor output and render layer nodes. */ + border_.use_render_border = context.isRendering() && (rd->mode & R_BORDER) && + !(rd->mode & R_CROP); + border_.render_border = &rd->border; +} + +bool ExecutionModel::is_breaked() const +{ + const bNodeTree *btree = context_.getbNodeTree(); + return btree->test_break(btree->tbh); +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_ExecutionModel.h b/source/blender/compositor/intern/COM_ExecutionModel.h new file mode 100644 index 00000000000..9e8466b9282 --- /dev/null +++ b/source/blender/compositor/intern/COM_ExecutionModel.h @@ -0,0 +1,84 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "BLI_rect.h" +#include "BLI_vector.hh" + +#include "COM_ExecutionSystem.h" + +#include <functional> + +#ifdef WITH_CXX_GUARDEDALLOC +# include "MEM_guardedalloc.h" +#endif + +namespace blender::compositor { + +class NodeOperation; + +/** + * Base class for execution models. Contains shared implementation. + */ +class ExecutionModel { + protected: + /** + * Render and viewer border info. Coordinates are normalized. + */ + struct { + bool use_render_border; + const rctf *render_border; + bool use_viewer_border; + const rctf *viewer_border; + } border_; + + /** + * Context used during execution. + */ + CompositorContext &context_; + + /** + * All operations being executed. + */ + Span<NodeOperation *> operations_; + + public: + ExecutionModel(CompositorContext &context, Span<NodeOperation *> operations); + + virtual ~ExecutionModel() + { + } + + virtual void execute(ExecutionSystem &exec_system) = 0; + + virtual void execute_work(const rcti &UNUSED(work_rect), + std::function<void(const rcti &split_rect)> UNUSED(work_func)) + { + BLI_assert(!"Method not supported by current execution model"); + } + + protected: + bool is_breaked() const; + +#ifdef WITH_CXX_GUARDEDALLOC + MEM_CXX_CLASS_ALLOC_FUNCS("COM:BaseExecutionModel") +#endif +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_ExecutionSystem.cc b/source/blender/compositor/intern/COM_ExecutionSystem.cc index e22dc17837b..a12ec774032 100644 --- a/source/blender/compositor/intern/COM_ExecutionSystem.cc +++ b/source/blender/compositor/intern/COM_ExecutionSystem.cc @@ -21,16 +21,11 @@ #include "BLI_utildefines.h" #include "PIL_time.h" -#include "BKE_node.h" - -#include "BLT_translation.h" - -#include "COM_Converter.h" #include "COM_Debug.h" -#include "COM_ExecutionGroup.h" +#include "COM_FullFrameExecutionModel.h" #include "COM_NodeOperation.h" #include "COM_NodeOperationBuilder.h" -#include "COM_ReadBufferOperation.h" +#include "COM_TiledExecutionModel.h" #include "COM_WorkScheduler.h" #ifdef WITH_CXX_GUARDEDALLOC @@ -73,41 +68,23 @@ ExecutionSystem::ExecutionSystem(RenderData *rd, builder.convertToOperations(this); } - unsigned int resolution[2]; - - rctf *viewer_border = &editingtree->viewer_border; - bool use_viewer_border = (editingtree->flag & NTREE_VIEWER_BORDER) && - viewer_border->xmin < viewer_border->xmax && - viewer_border->ymin < viewer_border->ymax; - - editingtree->stats_draw(editingtree->sdh, TIP_("Compositing | Determining resolution")); - - for (ExecutionGroup *executionGroup : m_groups) { - resolution[0] = 0; - resolution[1] = 0; - executionGroup->determineResolution(resolution); - - if (rendering) { - /* case when cropping to render border happens is handled in - * compositor output and render layer nodes - */ - if ((rd->mode & R_BORDER) && !(rd->mode & R_CROP)) { - executionGroup->setRenderBorder( - rd->border.xmin, rd->border.xmax, rd->border.ymin, rd->border.ymax); - } - } - - if (use_viewer_border) { - executionGroup->setViewerBorder( - viewer_border->xmin, viewer_border->xmax, viewer_border->ymin, viewer_border->ymax); - } + switch (m_context.get_execution_model()) { + case eExecutionModel::Tiled: + execution_model_ = new TiledExecutionModel(m_context, m_operations, m_groups); + break; + case eExecutionModel::FullFrame: + execution_model_ = new FullFrameExecutionModel(m_context, active_buffers_, m_operations); + break; + default: + BLI_assert(!"Non implemented execution model"); + break; } - - // DebugInfo::graphviz(this); } ExecutionSystem::~ExecutionSystem() { + delete execution_model_; + for (NodeOperation *operation : m_operations) { delete operation; } @@ -126,100 +103,16 @@ void ExecutionSystem::set_operations(const Vector<NodeOperation *> &operations, m_groups = groups; } -static void update_read_buffer_offset(Vector<NodeOperation *> &operations) -{ - unsigned int order = 0; - for (NodeOperation *operation : operations) { - if (operation->get_flags().is_read_buffer_operation) { - ReadBufferOperation *readOperation = (ReadBufferOperation *)operation; - readOperation->setOffset(order); - order++; - } - } -} - -static void init_write_operations_for_execution(Vector<NodeOperation *> &operations, - const bNodeTree *bTree) -{ - for (NodeOperation *operation : operations) { - if (operation->get_flags().is_write_buffer_operation) { - operation->setbNodeTree(bTree); - operation->initExecution(); - } - } -} - -static void link_write_buffers(Vector<NodeOperation *> &operations) -{ - for (NodeOperation *operation : operations) { - if (operation->get_flags().is_read_buffer_operation) { - ReadBufferOperation *readOperation = static_cast<ReadBufferOperation *>(operation); - readOperation->updateMemoryBuffer(); - } - } -} - -static void init_non_write_operations_for_execution(Vector<NodeOperation *> &operations, - const bNodeTree *bTree) -{ - for (NodeOperation *operation : operations) { - if (!operation->get_flags().is_write_buffer_operation) { - operation->setbNodeTree(bTree); - operation->initExecution(); - } - } -} - -static void init_execution_groups_for_execution(Vector<ExecutionGroup *> &groups, - const int chunk_size) -{ - for (ExecutionGroup *execution_group : groups) { - execution_group->setChunksize(chunk_size); - execution_group->initExecution(); - } -} - void ExecutionSystem::execute() { - const bNodeTree *editingtree = this->m_context.getbNodeTree(); - editingtree->stats_draw(editingtree->sdh, TIP_("Compositing | Initializing execution")); - DebugInfo::execute_started(this); - update_read_buffer_offset(m_operations); - - init_write_operations_for_execution(m_operations, m_context.getbNodeTree()); - link_write_buffers(m_operations); - init_non_write_operations_for_execution(m_operations, m_context.getbNodeTree()); - init_execution_groups_for_execution(m_groups, m_context.getChunksize()); - - WorkScheduler::start(this->m_context); - execute_groups(eCompositorPriority::High); - if (!this->getContext().isFastCalculation()) { - execute_groups(eCompositorPriority::Medium); - execute_groups(eCompositorPriority::Low); - } - WorkScheduler::finish(); - WorkScheduler::stop(); - - editingtree->stats_draw(editingtree->sdh, TIP_("Compositing | De-initializing execution")); - - for (NodeOperation *operation : m_operations) { - operation->deinitExecution(); - } - - for (ExecutionGroup *execution_group : m_groups) { - execution_group->deinitExecution(); - } + execution_model_->execute(*this); } -void ExecutionSystem::execute_groups(eCompositorPriority priority) +void ExecutionSystem::execute_work(const rcti &work_rect, + std::function<void(const rcti &split_rect)> work_func) { - for (ExecutionGroup *execution_group : m_groups) { - if (execution_group->get_flags().is_output && - execution_group->getRenderPriority() == priority) { - execution_group->execute(this); - } - } + execution_model_->execute_work(work_rect, work_func); } } // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_ExecutionSystem.h b/source/blender/compositor/intern/COM_ExecutionSystem.h index e6170c48778..e106209651c 100644 --- a/source/blender/compositor/intern/COM_ExecutionSystem.h +++ b/source/blender/compositor/intern/COM_ExecutionSystem.h @@ -25,6 +25,7 @@ class ExecutionGroup; #include "COM_ExecutionGroup.h" #include "COM_Node.h" #include "COM_NodeOperation.h" +#include "COM_SharedOperationBuffers.h" #include "DNA_color_types.h" #include "DNA_node_types.h" @@ -115,13 +116,21 @@ namespace blender::compositor { * \see ExecutionGroup class representing the ExecutionGroup */ +/* Forward declarations. */ +class ExecutionModel; + /** * \brief the ExecutionSystem contains the whole compositor tree. */ class ExecutionSystem { - private: /** + * Contains operations active buffers data. Buffers will be disposed once reader operations are + * finished. + */ + SharedOperationBuffers active_buffers_; + + /** * \brief the context used during execution */ CompositorContext m_context; @@ -136,6 +145,11 @@ class ExecutionSystem { */ Vector<ExecutionGroup *> m_groups; + /** + * Active execution model implementation. + */ + ExecutionModel *execution_model_; + private: // methods public: /** @@ -178,9 +192,14 @@ class ExecutionSystem { return this->m_context; } - private: - void execute_groups(eCompositorPriority priority); + SharedOperationBuffers &get_active_buffers() + { + return active_buffers_; + } + + void execute_work(const rcti &work_rect, std::function<void(const rcti &split_rect)> work_func); + private: /* allow the DebugInfo class to look at internals */ friend class DebugInfo; diff --git a/source/blender/compositor/intern/COM_FullFrameExecutionModel.cc b/source/blender/compositor/intern/COM_FullFrameExecutionModel.cc new file mode 100644 index 00000000000..1099aadd89d --- /dev/null +++ b/source/blender/compositor/intern/COM_FullFrameExecutionModel.cc @@ -0,0 +1,328 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_FullFrameExecutionModel.h" +#include "COM_Debug.h" +#include "COM_ExecutionGroup.h" +#include "COM_ReadBufferOperation.h" +#include "COM_WorkScheduler.h" + +#include "BLT_translation.h" + +#ifdef WITH_CXX_GUARDEDALLOC +# include "MEM_guardedalloc.h" +#endif + +namespace blender::compositor { + +FullFrameExecutionModel::FullFrameExecutionModel(CompositorContext &context, + SharedOperationBuffers &shared_buffers, + Span<NodeOperation *> operations) + : ExecutionModel(context, operations), + active_buffers_(shared_buffers), + num_operations_finished_(0), + priorities_(), + work_mutex_(), + work_finished_cond_() +{ + priorities_.append(eCompositorPriority::High); + if (!context.isFastCalculation()) { + priorities_.append(eCompositorPriority::Medium); + priorities_.append(eCompositorPriority::Low); + } + + BLI_mutex_init(&work_mutex_); + BLI_condition_init(&work_finished_cond_); +} + +FullFrameExecutionModel::~FullFrameExecutionModel() +{ + BLI_condition_end(&work_finished_cond_); + BLI_mutex_end(&work_mutex_); +} + +void FullFrameExecutionModel::execute(ExecutionSystem &exec_system) +{ + const bNodeTree *node_tree = this->context_.getbNodeTree(); + node_tree->stats_draw(node_tree->sdh, TIP_("Compositing | Initializing execution")); + + DebugInfo::graphviz(&exec_system); + + determine_areas_to_render_and_reads(); + render_operations(exec_system); +} + +void FullFrameExecutionModel::determine_areas_to_render_and_reads() +{ + const bool is_rendering = context_.isRendering(); + const bNodeTree *node_tree = context_.getbNodeTree(); + + rcti area; + for (eCompositorPriority priority : priorities_) { + for (NodeOperation *op : operations_) { + op->setbNodeTree(node_tree); + if (op->isOutputOperation(is_rendering) && op->getRenderPriority() == priority) { + get_output_render_area(op, area); + determine_areas_to_render(op, area); + determine_reads(op); + } + } + } +} + +void FullFrameExecutionModel::ensure_inputs_rendered(NodeOperation *op, + ExecutionSystem &exec_system) +{ + const int num_inputs = op->getNumberOfInputSockets(); + for (int i = 0; i < num_inputs; i++) { + NodeOperation *input_op = op->get_input_operation(i); + if (!active_buffers_.is_operation_rendered(input_op)) { + render_operation(input_op, exec_system); + } + } +} + +Vector<MemoryBuffer *> FullFrameExecutionModel::get_input_buffers(NodeOperation *op) +{ + const int num_inputs = op->getNumberOfInputSockets(); + Vector<MemoryBuffer *> inputs_buffers(num_inputs); + for (int i = 0; i < num_inputs; i++) { + NodeOperation *input_op = op->get_input_operation(i); + inputs_buffers[i] = active_buffers_.get_rendered_buffer(input_op); + } + return inputs_buffers; +} + +MemoryBuffer *FullFrameExecutionModel::create_operation_buffer(NodeOperation *op) +{ + rcti op_rect; + BLI_rcti_init(&op_rect, 0, op->getWidth(), 0, op->getHeight()); + + const DataType data_type = op->getOutputSocket(0)->getDataType(); + /* TODO: We should check if the operation is constant instead of is_set_operation. Finding a way + * to know if an operation is constant has to be implemented yet. */ + const bool is_a_single_elem = op->get_flags().is_set_operation; + return new MemoryBuffer(data_type, op_rect, is_a_single_elem); +} + +void FullFrameExecutionModel::render_operation(NodeOperation *op, ExecutionSystem &exec_system) +{ + if (active_buffers_.is_operation_rendered(op)) { + return; + } + + ensure_inputs_rendered(op, exec_system); + Vector<MemoryBuffer *> input_bufs = get_input_buffers(op); + + const bool has_outputs = op->getNumberOfOutputSockets() > 0; + MemoryBuffer *op_buf = has_outputs ? create_operation_buffer(op) : nullptr; + Span<rcti> areas = active_buffers_.get_areas_to_render(op); + op->render(op_buf, areas, input_bufs, exec_system); + active_buffers_.set_rendered_buffer(op, std::unique_ptr<MemoryBuffer>(op_buf)); + + operation_finished(op); +} + +/** + * Render output operations in order of priority. + */ +void FullFrameExecutionModel::render_operations(ExecutionSystem &exec_system) +{ + const bool is_rendering = context_.isRendering(); + + WorkScheduler::start(this->context_); + for (eCompositorPriority priority : priorities_) { + for (NodeOperation *op : operations_) { + if (op->isOutputOperation(is_rendering) && op->getRenderPriority() == priority) { + render_operation(op, exec_system); + } + } + } + WorkScheduler::stop(); +} + +/** + * Determines all input operations areas needed to render given operation area. + * \param operation: Renderer operation. + * \param render_area: Area within given operation bounds to render. + */ +void FullFrameExecutionModel::determine_areas_to_render(NodeOperation *operation, + const rcti &render_area) +{ + if (active_buffers_.is_area_registered(operation, render_area)) { + return; + } + + active_buffers_.register_area(operation, render_area); + + const int num_inputs = operation->getNumberOfInputSockets(); + for (int i = 0; i < num_inputs; i++) { + NodeOperation *input_op = operation->get_input_operation(i); + rcti input_op_rect, input_area; + BLI_rcti_init(&input_op_rect, 0, input_op->getWidth(), 0, input_op->getHeight()); + operation->get_area_of_interest(input_op, render_area, input_area); + + /* Ensure area of interest is within operation bounds, cropping areas outside. */ + BLI_rcti_isect(&input_area, &input_op_rect, &input_area); + + determine_areas_to_render(input_op, input_area); + } +} + +/** + * Determines the reads given operation and its inputs will receive (i.e: Number of dependent + * operations each operation has). + */ +void FullFrameExecutionModel::determine_reads(NodeOperation *operation) +{ + if (active_buffers_.has_registered_reads(operation)) { + return; + } + + const int num_inputs = operation->getNumberOfInputSockets(); + for (int i = 0; i < num_inputs; i++) { + NodeOperation *input_op = operation->get_input_operation(i); + determine_reads(input_op); + active_buffers_.register_read(input_op); + } +} + +/** + * Calculates given output operation area to be rendered taking into account viewer and render + * borders. + */ +void FullFrameExecutionModel::get_output_render_area(NodeOperation *output_op, rcti &r_area) +{ + BLI_assert(output_op->isOutputOperation(context_.isRendering())); + + /* By default return operation bounds (no border). */ + const int op_width = output_op->getWidth(); + const int op_height = output_op->getHeight(); + BLI_rcti_init(&r_area, 0, op_width, 0, op_height); + + const bool has_viewer_border = border_.use_viewer_border && + (output_op->get_flags().is_viewer_operation || + output_op->get_flags().is_preview_operation); + const bool has_render_border = border_.use_render_border; + if (has_viewer_border || has_render_border) { + /* Get border with normalized coordinates. */ + const rctf *norm_border = has_viewer_border ? border_.viewer_border : border_.render_border; + + /* Return de-normalized border. */ + BLI_rcti_init(&r_area, + norm_border->xmin * op_width, + norm_border->xmax * op_width, + norm_border->ymin * op_height, + norm_border->ymax * op_height); + } +} + +/** + * Multi-threadedly execute given work function passing work_rect splits as argument. + */ +void FullFrameExecutionModel::execute_work(const rcti &work_rect, + std::function<void(const rcti &split_rect)> work_func) +{ + if (is_breaked()) { + return; + } + + /* Split work vertically to maximize continuous memory. */ + const int work_height = BLI_rcti_size_y(&work_rect); + const int num_sub_works = MIN2(WorkScheduler::get_num_cpu_threads(), work_height); + const int split_height = num_sub_works == 0 ? 0 : work_height / num_sub_works; + int remaining_height = work_height - split_height * num_sub_works; + + Vector<WorkPackage> sub_works(num_sub_works); + int sub_work_y = work_rect.ymin; + int num_sub_works_finished = 0; + for (int i = 0; i < num_sub_works; i++) { + int sub_work_height = split_height; + + /* Distribute remaining height between sub-works. */ + if (remaining_height > 0) { + sub_work_height++; + remaining_height--; + } + + WorkPackage &sub_work = sub_works[i]; + sub_work.type = eWorkPackageType::CustomFunction; + sub_work.execute_fn = [=, &work_func, &work_rect]() { + if (is_breaked()) { + return; + } + rcti split_rect; + BLI_rcti_init( + &split_rect, work_rect.xmin, work_rect.xmax, sub_work_y, sub_work_y + sub_work_height); + work_func(split_rect); + }; + sub_work.executed_fn = [&]() { + BLI_mutex_lock(&work_mutex_); + num_sub_works_finished++; + if (num_sub_works_finished == num_sub_works) { + BLI_condition_notify_one(&work_finished_cond_); + } + BLI_mutex_unlock(&work_mutex_); + }; + WorkScheduler::schedule(&sub_work); + sub_work_y += sub_work_height; + } + BLI_assert(sub_work_y == work_rect.ymax); + + WorkScheduler::finish(); + + /* Ensure all sub-works finished. + * TODO: This a workaround for WorkScheduler::finish() not waiting all works on queue threading + * model. Sync code should be removed once it's fixed. */ + BLI_mutex_lock(&work_mutex_); + if (num_sub_works_finished < num_sub_works) { + BLI_condition_wait(&work_finished_cond_, &work_mutex_); + } + BLI_mutex_unlock(&work_mutex_); +} + +void FullFrameExecutionModel::operation_finished(NodeOperation *operation) +{ + /* Report inputs reads so that buffers may be freed/reused. */ + const int num_inputs = operation->getNumberOfInputSockets(); + for (int i = 0; i < num_inputs; i++) { + active_buffers_.read_finished(operation->get_input_operation(i)); + } + + num_operations_finished_++; + update_progress_bar(); +} + +void FullFrameExecutionModel::update_progress_bar() +{ + const bNodeTree *tree = context_.getbNodeTree(); + if (tree) { + const float progress = num_operations_finished_ / static_cast<float>(operations_.size()); + tree->progress(tree->prh, progress); + + char buf[128]; + BLI_snprintf(buf, + sizeof(buf), + TIP_("Compositing | Operation %i-%li"), + num_operations_finished_ + 1, + operations_.size()); + tree->stats_draw(tree->sdh, buf); + } +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_FullFrameExecutionModel.h b/source/blender/compositor/intern/COM_FullFrameExecutionModel.h new file mode 100644 index 00000000000..2c0d5e0460a --- /dev/null +++ b/source/blender/compositor/intern/COM_FullFrameExecutionModel.h @@ -0,0 +1,89 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "COM_ExecutionModel.h" + +#ifdef WITH_CXX_GUARDEDALLOC +# include "MEM_guardedalloc.h" +#endif + +namespace blender::compositor { + +/* Forward declarations. */ +class ExecutionGroup; + +/** + * Fully renders operations in order from inputs to outputs. + */ +class FullFrameExecutionModel : public ExecutionModel { + private: + /** + * Contains operations active buffers data. Buffers will be disposed once reader operations are + * finished. + */ + SharedOperationBuffers &active_buffers_; + + /** + * Number of operations finished. + */ + int num_operations_finished_; + + /** + * Order of priorities for output operations execution. + */ + Vector<eCompositorPriority> priorities_; + + ThreadMutex work_mutex_; + ThreadCondition work_finished_cond_; + + public: + FullFrameExecutionModel(CompositorContext &context, + SharedOperationBuffers &shared_buffers, + Span<NodeOperation *> operations); + ~FullFrameExecutionModel(); + + void execute(ExecutionSystem &exec_system) override; + + void execute_work(const rcti &work_rect, + std::function<void(const rcti &split_rect)> work_func) override; + + private: + void determine_areas_to_render_and_reads(); + void render_operations(ExecutionSystem &exec_system); + + void ensure_inputs_rendered(NodeOperation *op, ExecutionSystem &exec_system); + Vector<MemoryBuffer *> get_input_buffers(NodeOperation *op); + MemoryBuffer *create_operation_buffer(NodeOperation *op); + void render_operation(NodeOperation *op, ExecutionSystem &exec_system); + + void operation_finished(NodeOperation *operation); + + void get_output_render_area(NodeOperation *output_op, rcti &r_area); + void determine_areas_to_render(NodeOperation *operation, const rcti &render_area); + void determine_reads(NodeOperation *operation); + + void update_progress_bar(); + +#ifdef WITH_CXX_GUARDEDALLOC + MEM_CXX_CLASS_ALLOC_FUNCS("COM:FullFrameExecutionModel") +#endif +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_MultiThreadedOperation.cc b/source/blender/compositor/intern/COM_MultiThreadedOperation.cc new file mode 100644 index 00000000000..c54c2edccb0 --- /dev/null +++ b/source/blender/compositor/intern/COM_MultiThreadedOperation.cc @@ -0,0 +1,26 @@ +#include "COM_MultiThreadedOperation.h" +#include "COM_ExecutionSystem.h" + +namespace blender::compositor { + +MultiThreadedOperation::MultiThreadedOperation() +{ + m_num_passes = 1; + flags.is_fullframe_operation = true; +} + +void MultiThreadedOperation::update_memory_buffer(MemoryBuffer *output, + const rcti &output_area, + blender::Span<MemoryBuffer *> inputs, + ExecutionSystem &exec_system) +{ + for (int current_pass = 0; current_pass < m_num_passes; current_pass++) { + update_memory_buffer_started(output, output_area, inputs, exec_system, current_pass); + exec_system.execute_work(output_area, [=, &exec_system](const rcti &split_rect) { + update_memory_buffer_partial(output, split_rect, inputs, exec_system, current_pass); + }); + update_memory_buffer_finished(output, output_area, inputs, exec_system, current_pass); + } +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_MultiThreadedOperation.h b/source/blender/compositor/intern/COM_MultiThreadedOperation.h new file mode 100644 index 00000000000..e86b1d303f9 --- /dev/null +++ b/source/blender/compositor/intern/COM_MultiThreadedOperation.h @@ -0,0 +1,73 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "COM_NodeOperation.h" + +namespace blender::compositor { + +class MultiThreadedOperation : public NodeOperation { + protected: + /** + * Number of execution passes. + */ + int m_num_passes; + + protected: + MultiThreadedOperation(); + + /** + * Called before an update memory buffer pass is executed. Single-threaded calls. + */ + virtual void update_memory_buffer_started(MemoryBuffer *UNUSED(output), + const rcti &UNUSED(output_rect), + blender::Span<MemoryBuffer *> UNUSED(inputs), + ExecutionSystem &UNUSED(exec_system), + int UNUSED(current_pass)) + { + } + + /** + * Executes operation updating output memory buffer on output_rect area. Multi-threaded calls. + */ + virtual void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &output_rect, + blender::Span<MemoryBuffer *> inputs, + ExecutionSystem &exec_system, + int current_pass) = 0; + + /** + * Called after an update memory buffer pass is executed. Single-threaded calls. + */ + virtual void update_memory_buffer_finished(MemoryBuffer *UNUSED(output), + const rcti &UNUSED(output_rect), + blender::Span<MemoryBuffer *> UNUSED(inputs), + ExecutionSystem &UNUSED(exec_system), + int UNUSED(current_pass)) + { + } + + private: + void update_memory_buffer(MemoryBuffer *output, + const rcti &output_rect, + blender::Span<MemoryBuffer *> inputs, + ExecutionSystem &exec_system) override; +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_NodeOperation.cc b/source/blender/compositor/intern/COM_NodeOperation.cc index be3ea59efa5..83de8a751c4 100644 --- a/source/blender/compositor/intern/COM_NodeOperation.cc +++ b/source/blender/compositor/intern/COM_NodeOperation.cc @@ -17,8 +17,10 @@ */ #include <cstdio> +#include <memory> #include <typeinfo> +#include "COM_BufferOperation.h" #include "COM_ExecutionSystem.h" #include "COM_ReadBufferOperation.h" #include "COM_defines.h" @@ -175,6 +177,177 @@ bool NodeOperation::determineDependingAreaOfInterest(rcti *input, return !first; } +/* -------------------------------------------------------------------- */ +/** \name Full Frame Methods + * \{ */ + +/** + * \brief Get input operation area being read by this operation on rendering given output area. + * + * Implementation don't need to ensure r_input_area is within input operation bounds. The + * caller must clamp it. + * TODO: See if it's possible to use parameter overloading (input_id for example). + * + * \param input_op_idx: Input operation index for which we want to calculate the area being read. + * \param output_area: Area being rendered by this operation. + * \param r_input_area: Returned input operation area that needs to be read in order to render + * given output area. + */ +void NodeOperation::get_area_of_interest(const int input_op_idx, + const rcti &output_area, + rcti &r_input_area) +{ + if (get_flags().is_fullframe_operation) { + r_input_area = output_area; + } + else { + /* Non full-frame operations never implement this method. To ensure correctness assume + * whole area is used. */ + NodeOperation *input_op = getInputOperation(input_op_idx); + BLI_rcti_init(&r_input_area, 0, input_op->getWidth(), 0, input_op->getHeight()); + } +} + +void NodeOperation::get_area_of_interest(NodeOperation *input_op, + const rcti &output_area, + rcti &r_input_area) +{ + for (int i = 0; i < getNumberOfInputSockets(); i++) { + if (input_op == getInputOperation(i)) { + get_area_of_interest(i, output_area, r_input_area); + return; + } + } + BLI_assert(!"input_op is not an input operation."); +} + +/** + * Executes operation image manipulation algorithm rendering given areas. + * \param output_buf: Buffer to write result to. + * \param areas: Areas within this operation bounds to render. + * \param inputs_bufs: Inputs operations buffers. + * \param exec_system: Execution system. + */ +void NodeOperation::render(MemoryBuffer *output_buf, + Span<rcti> areas, + Span<MemoryBuffer *> inputs_bufs, + ExecutionSystem &exec_system) +{ + if (get_flags().is_fullframe_operation) { + render_full_frame(output_buf, areas, inputs_bufs, exec_system); + } + else { + render_full_frame_fallback(output_buf, areas, inputs_bufs, exec_system); + } +} + +/** + * Renders given areas using operations full frame implementation. + */ +void NodeOperation::render_full_frame(MemoryBuffer *output_buf, + Span<rcti> areas, + Span<MemoryBuffer *> inputs_bufs, + ExecutionSystem &exec_system) +{ + initExecution(); + for (const rcti &area : areas) { + update_memory_buffer(output_buf, area, inputs_bufs, exec_system); + } + deinitExecution(); +} + +/** + * Renders given areas using operations tiled implementation. + */ +void NodeOperation::render_full_frame_fallback(MemoryBuffer *output_buf, + Span<rcti> areas, + Span<MemoryBuffer *> inputs_bufs, + ExecutionSystem &exec_system) +{ + Vector<NodeOperationOutput *> orig_input_links = replace_inputs_with_buffers(inputs_bufs); + + initExecution(); + const bool is_output_operation = getNumberOfOutputSockets() == 0; + if (!is_output_operation && output_buf->is_a_single_elem()) { + float *output_elem = output_buf->get_elem(0, 0); + readSampled(output_elem, 0, 0, PixelSampler::Nearest); + } + else { + for (const rcti &rect : areas) { + exec_system.execute_work(rect, [=](const rcti &split_rect) { + rcti tile_rect = split_rect; + if (is_output_operation) { + executeRegion(&tile_rect, 0); + } + else { + render_tile(output_buf, &tile_rect); + } + }); + } + } + deinitExecution(); + + remove_buffers_and_restore_original_inputs(orig_input_links); +} + +void NodeOperation::render_tile(MemoryBuffer *output_buf, rcti *tile_rect) +{ + const bool is_complex = get_flags().complex; + void *tile_data = is_complex ? initializeTileData(tile_rect) : nullptr; + const int elem_stride = output_buf->elem_stride; + for (int y = tile_rect->ymin; y < tile_rect->ymax; y++) { + float *output_elem = output_buf->get_elem(tile_rect->xmin, y); + if (is_complex) { + for (int x = tile_rect->xmin; x < tile_rect->xmax; x++) { + read(output_elem, x, y, tile_data); + output_elem += elem_stride; + } + } + else { + for (int x = tile_rect->xmin; x < tile_rect->xmax; x++) { + readSampled(output_elem, x, y, PixelSampler::Nearest); + output_elem += elem_stride; + } + } + } + if (tile_data) { + deinitializeTileData(tile_rect, tile_data); + } +} + +/** + * \return Replaced inputs links. + */ +Vector<NodeOperationOutput *> NodeOperation::replace_inputs_with_buffers( + Span<MemoryBuffer *> inputs_bufs) +{ + BLI_assert(inputs_bufs.size() == getNumberOfInputSockets()); + Vector<NodeOperationOutput *> orig_links(inputs_bufs.size()); + for (int i = 0; i < inputs_bufs.size(); i++) { + NodeOperationInput *input_socket = getInputSocket(i); + BufferOperation *buffer_op = new BufferOperation(inputs_bufs[i], input_socket->getDataType()); + orig_links[i] = input_socket->getLink(); + input_socket->setLink(buffer_op->getOutputSocket()); + } + return orig_links; +} + +void NodeOperation::remove_buffers_and_restore_original_inputs( + Span<NodeOperationOutput *> original_inputs_links) +{ + BLI_assert(original_inputs_links.size() == getNumberOfInputSockets()); + for (int i = 0; i < original_inputs_links.size(); i++) { + NodeOperation *buffer_op = get_input_operation(i); + BLI_assert(buffer_op != nullptr); + BLI_assert(typeid(*buffer_op) == typeid(BufferOperation)); + NodeOperationInput *input_socket = getInputSocket(i); + input_socket->setLink(original_inputs_links[i]); + delete buffer_op; + } +} + +/** \} */ + /***************** **** OpInput **** *****************/ @@ -267,6 +440,9 @@ std::ostream &operator<<(std::ostream &os, const NodeOperationFlags &node_operat if (!node_operation_flags.use_datatype_conversion) { os << "no_conversion,"; } + if (node_operation_flags.is_fullframe_operation) { + os << "full_frame,"; + } return os; } diff --git a/source/blender/compositor/intern/COM_NodeOperation.h b/source/blender/compositor/intern/COM_NodeOperation.h index baf3a0878b9..01068c7f812 100644 --- a/source/blender/compositor/intern/COM_NodeOperation.h +++ b/source/blender/compositor/intern/COM_NodeOperation.h @@ -39,6 +39,7 @@ namespace blender::compositor { class OpenCLDevice; class ReadBufferOperation; class WriteBufferOperation; +class ExecutionSystem; class NodeOperation; typedef NodeOperation SocketReader; @@ -190,6 +191,10 @@ struct NodeOperationFlags { */ bool open_cl : 1; + /** + * TODO: Remove this flag and SingleThreadedOperation if tiled implemention is removed. + * Full-frame implemention doesn't need it. + */ bool single_threaded : 1; /** @@ -232,6 +237,11 @@ struct NodeOperationFlags { */ bool use_datatype_conversion : 1; + /** + * Has this operation fullframe implementation. + */ + bool is_fullframe_operation : 1; + NodeOperationFlags() { complex = false; @@ -247,6 +257,7 @@ struct NodeOperationFlags { is_viewer_operation = false; is_preview_operation = false; use_datatype_conversion = true; + is_fullframe_operation = false; } }; @@ -341,6 +352,13 @@ class NodeOperation { NodeOperationOutput *getOutputSocket(unsigned int index = 0); NodeOperationInput *getInputSocket(unsigned int index); + NodeOperation *get_input_operation(int index) + { + /* TODO: Rename protected getInputOperation to get_input_operation and make it public replacing + * this method. */ + return getInputOperation(index); + } + /** * \brief determine the resolution of this node * \note this method will not set the resolution, this is the responsibility of the caller @@ -537,6 +555,33 @@ class NodeOperation { return std::unique_ptr<MetaData>(); } + /* -------------------------------------------------------------------- */ + /** \name Full Frame Methods + * \{ */ + + void render(MemoryBuffer *output_buf, + Span<rcti> areas, + Span<MemoryBuffer *> inputs_bufs, + ExecutionSystem &exec_system); + + /** + * Executes operation updating output memory buffer. Single-threaded calls. + */ + virtual void update_memory_buffer(MemoryBuffer *UNUSED(output), + const rcti &UNUSED(output_area), + Span<MemoryBuffer *> UNUSED(inputs), + ExecutionSystem &UNUSED(exec_system)) + { + } + + /** + * Get input operation area being read by this operation on rendering given output area. + */ + virtual void get_area_of_interest(int input_op_idx, const rcti &output_area, rcti &r_input_area); + void get_area_of_interest(NodeOperation *input_op, const rcti &output_area, rcti &r_input_area); + + /** \} */ + protected: NodeOperation(); @@ -616,6 +661,27 @@ class NodeOperation { { } + private: + /* -------------------------------------------------------------------- */ + /** \name Full Frame Methods + * \{ */ + + void render_full_frame(MemoryBuffer *output_buf, + Span<rcti> areas, + Span<MemoryBuffer *> inputs_bufs, + ExecutionSystem &exec_system); + + void render_full_frame_fallback(MemoryBuffer *output_buf, + Span<rcti> areas, + Span<MemoryBuffer *> inputs, + ExecutionSystem &exec_system); + void render_tile(MemoryBuffer *output_buf, rcti *tile_rect); + Vector<NodeOperationOutput *> replace_inputs_with_buffers(Span<MemoryBuffer *> inputs_bufs); + void remove_buffers_and_restore_original_inputs( + Span<NodeOperationOutput *> original_inputs_links); + + /** \} */ + /* allow the DebugInfo class to look at internals */ friend class DebugInfo; diff --git a/source/blender/compositor/intern/COM_NodeOperationBuilder.cc b/source/blender/compositor/intern/COM_NodeOperationBuilder.cc index 82eb969b752..c81a5a2bd98 100644 --- a/source/blender/compositor/intern/COM_NodeOperationBuilder.cc +++ b/source/blender/compositor/intern/COM_NodeOperationBuilder.cc @@ -99,8 +99,10 @@ void NodeOperationBuilder::convertToOperations(ExecutionSystem *system) determineResolutions(); - /* surround complex ops with read/write buffer */ - add_complex_operation_buffers(); + if (m_context->get_execution_model() == eExecutionModel::Tiled) { + /* surround complex ops with read/write buffer */ + add_complex_operation_buffers(); + } /* links not available from here on */ /* XXX make m_links a local variable to avoid confusion! */ @@ -111,8 +113,10 @@ void NodeOperationBuilder::convertToOperations(ExecutionSystem *system) /* ensure topological (link-based) order of nodes */ /*sort_operations();*/ /* not needed yet */ - /* create execution groups */ - group_operations(); + if (m_context->get_execution_model() == eExecutionModel::Tiled) { + /* create execution groups */ + group_operations(); + } /* transfer resulting operations to the system */ system->set_operations(m_operations, m_groups); diff --git a/source/blender/compositor/intern/COM_SharedOperationBuffers.cc b/source/blender/compositor/intern/COM_SharedOperationBuffers.cc new file mode 100644 index 00000000000..021e948a727 --- /dev/null +++ b/source/blender/compositor/intern/COM_SharedOperationBuffers.cc @@ -0,0 +1,131 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_SharedOperationBuffers.h" +#include "BLI_rect.h" +#include "COM_NodeOperation.h" + +namespace blender::compositor { + +SharedOperationBuffers::SharedOperationBuffers() : buffers_() +{ +} +SharedOperationBuffers::BufferData::BufferData() + : buffer(nullptr), render_areas(), registered_reads(0), received_reads(0) +{ +} + +SharedOperationBuffers::BufferData &SharedOperationBuffers::get_buffer_data(NodeOperation *op) +{ + return buffers_.lookup_or_add_cb(op, []() { return BufferData(); }); +} + +/** + * Whether given operation area to render is already registered. + * TODO: Possibly refactor to "request_area". Current implementation is incomplete: partial + * overlapping, etc. Leading to more rendering than necessary. + */ +bool SharedOperationBuffers::is_area_registered(NodeOperation *op, const rcti &area_to_render) +{ + BufferData &buf_data = get_buffer_data(op); + for (rcti ®_rect : buf_data.render_areas) { + if (BLI_rcti_inside_rcti(®_rect, &area_to_render)) { + return true; + } + } + return false; +} + +/** + * Registers an operation area to render. + */ +void SharedOperationBuffers::register_area(NodeOperation *op, const rcti &area_to_render) +{ + get_buffer_data(op).render_areas.append(area_to_render); +} + +/** + * Whether given operation has any registered reads (other operation registered it depends on given + * operation). + */ +bool SharedOperationBuffers::has_registered_reads(NodeOperation *op) +{ + return get_buffer_data(op).registered_reads > 0; +} + +/** + * Registers an operation read (other operation depends on given operation). + */ +void SharedOperationBuffers::register_read(NodeOperation *read_op) +{ + get_buffer_data(read_op).registered_reads++; +} + +/** + * Get registered areas given operation needs to render. + */ +blender::Span<rcti> SharedOperationBuffers::get_areas_to_render(NodeOperation *op) +{ + return get_buffer_data(op).render_areas.as_span(); +} + +/** + * Whether this operation buffer has already been rendered. + */ +bool SharedOperationBuffers::is_operation_rendered(NodeOperation *op) +{ + return get_buffer_data(op).buffer != nullptr; +} + +/** + * Stores given operation rendered buffer. + */ +void SharedOperationBuffers::set_rendered_buffer(NodeOperation *op, + std::unique_ptr<MemoryBuffer> buffer) +{ + BufferData &buf_data = get_buffer_data(op); + BLI_assert(buf_data.received_reads == 0); + BLI_assert(buf_data.buffer == nullptr); + buf_data.buffer = std::move(buffer); +} + +/** + * Get given operation rendered buffer. + */ +MemoryBuffer *SharedOperationBuffers::get_rendered_buffer(NodeOperation *op) +{ + BLI_assert(is_operation_rendered(op)); + return get_buffer_data(op).buffer.get(); +} + +/** + * Reports an operation has finished reading given operation. If all given operation dependencies + * have finished its buffer will be disposed. + */ +void SharedOperationBuffers::read_finished(NodeOperation *read_op) +{ + BufferData &buf_data = get_buffer_data(read_op); + buf_data.received_reads++; + BLI_assert(buf_data.received_reads > 0 && buf_data.received_reads <= buf_data.registered_reads); + if (buf_data.received_reads == buf_data.registered_reads) { + /* Dispose buffer. */ + buf_data.buffer = nullptr; + } +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_SharedOperationBuffers.h b/source/blender/compositor/intern/COM_SharedOperationBuffers.h new file mode 100644 index 00000000000..9e90a06a0d3 --- /dev/null +++ b/source/blender/compositor/intern/COM_SharedOperationBuffers.h @@ -0,0 +1,71 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "BLI_map.hh" +#include "BLI_span.hh" +#include "BLI_vector.hh" +#include "COM_MemoryBuffer.h" +#ifdef WITH_CXX_GUARDEDALLOC +# include "MEM_guardedalloc.h" +#endif +#include <memory> + +namespace blender::compositor { + +/** + * Stores and shares operations rendered buffers including render data. Buffers are + * disposed once all dependent operations have finished reading them. + */ +class SharedOperationBuffers { + private: + typedef struct BufferData { + public: + BufferData(); + std::unique_ptr<MemoryBuffer> buffer; + blender::Vector<rcti> render_areas; + int registered_reads; + int received_reads; + } BufferData; + blender::Map<NodeOperation *, BufferData> buffers_; + + public: + SharedOperationBuffers(); + bool is_area_registered(NodeOperation *op, const rcti &area_to_render); + void register_area(NodeOperation *op, const rcti &area_to_render); + + bool has_registered_reads(NodeOperation *op); + void register_read(NodeOperation *read_op); + + blender::Span<rcti> get_areas_to_render(NodeOperation *op); + bool is_operation_rendered(NodeOperation *op); + void set_rendered_buffer(NodeOperation *op, std::unique_ptr<MemoryBuffer> buffer); + MemoryBuffer *get_rendered_buffer(NodeOperation *op); + + void read_finished(NodeOperation *read_op); + + private: + BufferData &get_buffer_data(NodeOperation *op); + +#ifdef WITH_CXX_GUARDEDALLOC + MEM_CXX_CLASS_ALLOC_FUNCS("COM:SharedOperationBuffers") +#endif +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_TiledExecutionModel.cc b/source/blender/compositor/intern/COM_TiledExecutionModel.cc new file mode 100644 index 00000000000..d025ce53330 --- /dev/null +++ b/source/blender/compositor/intern/COM_TiledExecutionModel.cc @@ -0,0 +1,158 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_TiledExecutionModel.h" +#include "COM_Debug.h" +#include "COM_ExecutionGroup.h" +#include "COM_ReadBufferOperation.h" +#include "COM_WorkScheduler.h" + +#include "BLT_translation.h" + +#ifdef WITH_CXX_GUARDEDALLOC +# include "MEM_guardedalloc.h" +#endif + +namespace blender::compositor { + +TiledExecutionModel::TiledExecutionModel(CompositorContext &context, + Span<NodeOperation *> operations, + Span<ExecutionGroup *> groups) + : ExecutionModel(context, operations), groups_(groups) +{ + const bNodeTree *node_tree = context.getbNodeTree(); + node_tree->stats_draw(node_tree->sdh, TIP_("Compositing | Determining resolution")); + + unsigned int resolution[2]; + for (ExecutionGroup *group : groups_) { + resolution[0] = 0; + resolution[1] = 0; + group->determineResolution(resolution); + + if (border_.use_render_border) { + const rctf *render_border = border_.viewer_border; + group->setRenderBorder( + render_border->xmin, render_border->xmax, render_border->ymin, render_border->ymax); + } + + if (border_.use_viewer_border) { + const rctf *viewer_border = border_.viewer_border; + group->setViewerBorder( + viewer_border->xmin, viewer_border->xmax, viewer_border->ymin, viewer_border->ymax); + } + } +} + +static void update_read_buffer_offset(Span<NodeOperation *> operations) +{ + unsigned int order = 0; + for (NodeOperation *operation : operations) { + if (operation->get_flags().is_read_buffer_operation) { + ReadBufferOperation *readOperation = (ReadBufferOperation *)operation; + readOperation->setOffset(order); + order++; + } + } +} + +static void init_write_operations_for_execution(Span<NodeOperation *> operations, + const bNodeTree *bTree) +{ + for (NodeOperation *operation : operations) { + if (operation->get_flags().is_write_buffer_operation) { + operation->setbNodeTree(bTree); + operation->initExecution(); + } + } +} + +static void link_write_buffers(Span<NodeOperation *> operations) +{ + for (NodeOperation *operation : operations) { + if (operation->get_flags().is_read_buffer_operation) { + ReadBufferOperation *readOperation = static_cast<ReadBufferOperation *>(operation); + readOperation->updateMemoryBuffer(); + } + } +} + +static void init_non_write_operations_for_execution(Span<NodeOperation *> operations, + const bNodeTree *bTree) +{ + for (NodeOperation *operation : operations) { + if (!operation->get_flags().is_write_buffer_operation) { + operation->setbNodeTree(bTree); + operation->initExecution(); + } + } +} + +static void init_execution_groups_for_execution(Span<ExecutionGroup *> groups, + const int chunk_size) +{ + for (ExecutionGroup *execution_group : groups) { + execution_group->setChunksize(chunk_size); + execution_group->initExecution(); + } +} + +void TiledExecutionModel::execute(ExecutionSystem &exec_system) +{ + const bNodeTree *editingtree = this->context_.getbNodeTree(); + + editingtree->stats_draw(editingtree->sdh, TIP_("Compositing | Initializing execution")); + + update_read_buffer_offset(operations_); + + init_write_operations_for_execution(operations_, context_.getbNodeTree()); + link_write_buffers(operations_); + init_non_write_operations_for_execution(operations_, context_.getbNodeTree()); + init_execution_groups_for_execution(groups_, context_.getChunksize()); + + WorkScheduler::start(context_); + execute_groups(eCompositorPriority::High, exec_system); + if (!context_.isFastCalculation()) { + execute_groups(eCompositorPriority::Medium, exec_system); + execute_groups(eCompositorPriority::Low, exec_system); + } + WorkScheduler::finish(); + WorkScheduler::stop(); + + editingtree->stats_draw(editingtree->sdh, TIP_("Compositing | De-initializing execution")); + + for (NodeOperation *operation : operations_) { + operation->deinitExecution(); + } + + for (ExecutionGroup *execution_group : groups_) { + execution_group->deinitExecution(); + } +} + +void TiledExecutionModel::execute_groups(eCompositorPriority priority, + ExecutionSystem &exec_system) +{ + for (ExecutionGroup *execution_group : groups_) { + if (execution_group->get_flags().is_output && + execution_group->getRenderPriority() == priority) { + execution_group->execute(&exec_system); + } + } +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_TiledExecutionModel.h b/source/blender/compositor/intern/COM_TiledExecutionModel.h new file mode 100644 index 00000000000..05a795b9f07 --- /dev/null +++ b/source/blender/compositor/intern/COM_TiledExecutionModel.h @@ -0,0 +1,54 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "COM_ExecutionModel.h" + +#ifdef WITH_CXX_GUARDEDALLOC +# include "MEM_guardedalloc.h" +#endif + +namespace blender::compositor { + +class ExecutionGroup; + +/** + * Operations are executed from outputs to inputs grouped in execution groups and rendered in + * tiles. + */ +class TiledExecutionModel : public ExecutionModel { + private: + Span<ExecutionGroup *> groups_; + + public: + TiledExecutionModel(CompositorContext &context, + Span<NodeOperation *> operations, + Span<ExecutionGroup *> groups); + + void execute(ExecutionSystem &exec_system) override; + + private: + void execute_groups(eCompositorPriority priority, ExecutionSystem &exec_system); + +#ifdef WITH_CXX_GUARDEDALLOC + MEM_CXX_CLASS_ALLOC_FUNCS("COM:TiledExecutionModel") +#endif +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/intern/COM_WorkPackage.h b/source/blender/compositor/intern/COM_WorkPackage.h index 28aa746fdc4..4d503022120 100644 --- a/source/blender/compositor/intern/COM_WorkPackage.h +++ b/source/blender/compositor/intern/COM_WorkPackage.h @@ -22,6 +22,7 @@ #include "BLI_rect.h" +#include <functional> #include <ostream> namespace blender::compositor { @@ -33,6 +34,8 @@ class ExecutionGroup; * \see WorkScheduler */ struct WorkPackage { + eWorkPackageType type; + eWorkPackageState state = eWorkPackageState::NotScheduled; /** @@ -50,6 +53,16 @@ struct WorkPackage { */ rcti rect; + /** + * Custom function to execute when work package type is CustomFunction. + */ + std::function<void()> execute_fn; + + /** + * Called when work execution is finished. + */ + std::function<void()> executed_fn; + #ifdef WITH_CXX_GUARDEDALLOC MEM_CXX_CLASS_ALLOC_FUNCS("COM:WorkPackage") #endif diff --git a/source/blender/compositor/intern/COM_WorkScheduler.cc b/source/blender/compositor/intern/COM_WorkScheduler.cc index d578ac24a4a..157ded943d6 100644 --- a/source/blender/compositor/intern/COM_WorkScheduler.cc +++ b/source/blender/compositor/intern/COM_WorkScheduler.cc @@ -98,6 +98,8 @@ static struct { bool active = false; bool initialized = false; } opencl; + + int num_cpu_threads; } g_work_scheduler; /* -------------------------------------------------------------------- */ @@ -143,7 +145,8 @@ static void opencl_start(CompositorContext &context) static bool opencl_schedule(WorkPackage *package) { - if (package->execution_group->get_flags().open_cl && g_work_scheduler.opencl.active) { + if (package->type == eWorkPackageType::Tile && package->execution_group->get_flags().open_cl && + g_work_scheduler.opencl.active) { BLI_thread_queue_push(g_work_scheduler.opencl.queue, package); return true; } @@ -532,11 +535,12 @@ void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads) opencl_initialize(use_opencl); } + g_work_scheduler.num_cpu_threads = num_cpu_threads; switch (COM_threading_model()) { case ThreadingModel::SingleThreaded: + g_work_scheduler.num_cpu_threads = 1; /* Nothing to do. */ break; - case ThreadingModel::Queue: threading_model_queue_initialize(num_cpu_threads); break; @@ -568,6 +572,11 @@ void WorkScheduler::deinitialize() } } +int WorkScheduler::get_num_cpu_threads() +{ + return g_work_scheduler.num_cpu_threads; +} + int WorkScheduler::current_thread_id() { if (COM_threading_model() == ThreadingModel::SingleThreaded) { diff --git a/source/blender/compositor/intern/COM_WorkScheduler.h b/source/blender/compositor/intern/COM_WorkScheduler.h index 85b1d7e2ebf..be88859be7c 100644 --- a/source/blender/compositor/intern/COM_WorkScheduler.h +++ b/source/blender/compositor/intern/COM_WorkScheduler.h @@ -87,6 +87,8 @@ struct WorkScheduler { */ static bool has_gpu_devices(); + static int get_num_cpu_threads(); + static int current_thread_id(); #ifdef WITH_CXX_GUARDEDALLOC |