From 9d7cb5c4a1158266d2f8caa1fc19be2a00fdf101 Mon Sep 17 00:00:00 2001 From: Manuel Castilla Date: Sat, 4 Sep 2021 15:23:28 +0200 Subject: Compositor: Full frame filter nodes Adds full frame implementation to Anti-Aliasing, Defocus, Denoise, Despeckle, Dilate/Erode, Directional Blur, Filter, Inpaint and Vector Blur nodes. The other nodes in "Filter" sub-menu are submitted separately. Part of T88150. Reviewed By: jbakker Differential Revision: https://developer.blender.org/D12219 --- .../blender/compositor/intern/COM_MemoryBuffer.h | 6 + .../COM_ConvertDepthToRadiusOperation.cc | 27 ++ .../operations/COM_ConvertDepthToRadiusOperation.h | 8 +- .../COM_ConvolutionEdgeFilterOperation.cc | 77 +++++ .../COM_ConvolutionEdgeFilterOperation.h | 4 + .../operations/COM_ConvolutionFilterOperation.cc | 58 ++++ .../operations/COM_ConvolutionFilterOperation.h | 13 +- .../compositor/operations/COM_DenoiseOperation.cc | 100 ++++-- .../compositor/operations/COM_DenoiseOperation.h | 15 +- .../operations/COM_DespeckleOperation.cc | 107 +++++++ .../compositor/operations/COM_DespeckleOperation.h | 12 +- .../operations/COM_DilateErodeOperation.cc | 355 ++++++++++++++++++++- .../operations/COM_DilateErodeOperation.h | 40 ++- .../operations/COM_DirectionalBlurOperation.cc | 54 ++++ .../operations/COM_DirectionalBlurOperation.h | 9 +- .../compositor/operations/COM_InpaintOperation.cc | 44 +++ .../compositor/operations/COM_InpaintOperation.h | 7 + .../compositor/operations/COM_SMAAOperation.cc | 355 +++++++++++++++++++-- .../compositor/operations/COM_SMAAOperation.h | 28 +- .../operations/COM_VectorBlurOperation.cc | 46 +++ .../operations/COM_VectorBlurOperation.h | 11 + 21 files changed, 1286 insertions(+), 90 deletions(-) diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.h b/source/blender/compositor/intern/COM_MemoryBuffer.h index f3e15c2a495..f730d53acec 100644 --- a/source/blender/compositor/intern/COM_MemoryBuffer.h +++ b/source/blender/compositor/intern/COM_MemoryBuffer.h @@ -373,6 +373,12 @@ class MemoryBuffer { return this->m_buffer; } + float *release_ownership_buffer() + { + owns_data_ = false; + return this->m_buffer; + } + MemoryBuffer *inflate() const; inline void wrap_pixel(int &x, int &y, MemoryBufferExtend extend_x, MemoryBufferExtend extend_y) diff --git a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc index a9c58b55d73..405ba03abf3 100644 --- a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc +++ b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc @@ -116,4 +116,31 @@ void ConvertDepthToRadiusOperation::deinitExecution() this->m_inputOperation = nullptr; } +void ConvertDepthToRadiusOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + const float z = *it.in(0); + if (z == 0.0f) { + *it.out = 0.0f; + continue; + } + + const float inv_z = (1.0f / z); + + /* Bug T6656 part 2b, do not re-scale. */ +#if 0 + bcrad = 0.5f * fabs(aperture * (dof_sp * (cam_invfdist - iZ) - 1.0f)); + /* Scale crad back to original maximum and blend: + * `crad->rect[px] = bcrad + wts->rect[px] * (scf * crad->rect[px] - bcrad);` */ +#endif + const float radius = 0.5f * + fabsf(m_aperture * (m_dof_sp * (m_inverseFocalDistance - inv_z) - 1.0f)); + /* Bug T6615, limit minimum radius to 1 pixel, + * not really a solution, but somewhat mitigates the problem. */ + *it.out = CLAMPIS(radius, 0.0f, m_maxRadius); + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h index 1f4e856b128..3d163843d06 100644 --- a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h +++ b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h @@ -19,7 +19,7 @@ #pragma once #include "COM_FastGaussianBlurOperation.h" -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" #include "DNA_object_types.h" namespace blender::compositor { @@ -28,7 +28,7 @@ namespace blender::compositor { * this program converts an input color to an output value. * it assumes we are in sRGB color space. */ -class ConvertDepthToRadiusOperation : public NodeOperation { +class ConvertDepthToRadiusOperation : public MultiThreadedOperation { private: /** * Cached reference to the inputProgram @@ -83,6 +83,10 @@ class ConvertDepthToRadiusOperation : public NodeOperation { { this->m_blurPostOperation = operation; } + + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc index 5ead300a368..9127a871b04 100644 --- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc +++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc @@ -95,4 +95,81 @@ void ConvolutionEdgeFilterOperation::executePixel(float output[4], int x, int y, output[3] = MAX2(output[3], 0.0f); } +void ConvolutionEdgeFilterOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX]; + const int last_x = getWidth() - 1; + const int last_y = getHeight() - 1; + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + const int left_offset = (it.x == 0) ? 0 : -image->elem_stride; + const int right_offset = (it.x == last_x) ? 0 : image->elem_stride; + const int down_offset = (it.y == 0) ? 0 : -image->row_stride; + const int up_offset = (it.y == last_y) ? 0 : image->row_stride; + + const float *center_color = it.in(IMAGE_INPUT_INDEX); + float res1[4] = {0}; + float res2[4] = {0}; + + const float *color = center_color + down_offset + left_offset; + madd_v3_v3fl(res1, color, m_filter[0]); + copy_v3_v3(res2, res1); + + color = center_color + down_offset; + madd_v3_v3fl(res1, color, m_filter[1]); + madd_v3_v3fl(res2, color, m_filter[3]); + + color = center_color + down_offset + right_offset; + madd_v3_v3fl(res1, color, m_filter[2]); + madd_v3_v3fl(res2, color, m_filter[6]); + + color = center_color + left_offset; + madd_v3_v3fl(res1, color, m_filter[3]); + madd_v3_v3fl(res2, color, m_filter[1]); + + { + float rgb_filtered[3]; + mul_v3_v3fl(rgb_filtered, center_color, m_filter[4]); + add_v3_v3(res1, rgb_filtered); + add_v3_v3(res2, rgb_filtered); + } + + color = center_color + right_offset; + madd_v3_v3fl(res1, color, m_filter[5]); + madd_v3_v3fl(res2, color, m_filter[7]); + + color = center_color + up_offset + left_offset; + madd_v3_v3fl(res1, color, m_filter[6]); + madd_v3_v3fl(res2, color, m_filter[2]); + + color = center_color + up_offset; + madd_v3_v3fl(res1, color, m_filter[7]); + madd_v3_v3fl(res2, color, m_filter[5]); + + { + color = center_color + up_offset + right_offset; + float rgb_filtered[3]; + mul_v3_v3fl(rgb_filtered, color, m_filter[8]); + add_v3_v3(res1, rgb_filtered); + add_v3_v3(res2, rgb_filtered); + } + + it.out[0] = sqrt(res1[0] * res1[0] + res2[0] * res2[0]); + it.out[1] = sqrt(res1[1] * res1[1] + res2[1] * res2[1]); + it.out[2] = sqrt(res1[2] * res1[2] + res2[2] * res2[2]); + + const float factor = *it.in(FACTOR_INPUT_INDEX); + const float m_factor = 1.0f - factor; + it.out[0] = it.out[0] * factor + center_color[0] * m_factor; + it.out[1] = it.out[1] * factor + center_color[1] * m_factor; + it.out[2] = it.out[2] * factor + center_color[2] * m_factor; + + it.out[3] = center_color[3]; + + /* Make sure we don't return negative color. */ + CLAMP4_MIN(it.out, 0.0f); + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h index 319b424bd4a..bd38e27165a 100644 --- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h +++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h @@ -25,6 +25,10 @@ namespace blender::compositor { class ConvolutionEdgeFilterOperation : public ConvolutionFilterOperation { public: void executePixel(float output[4], int x, int y, void *data) override; + + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc index 72cbbf4283a..11a077229fd 100644 --- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc +++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc @@ -127,4 +127,62 @@ bool ConvolutionFilterOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void ConvolutionFilterOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + switch (input_idx) { + case IMAGE_INPUT_INDEX: { + const int add_x = (m_filterWidth - 1) / 2 + 1; + const int add_y = (m_filterHeight - 1) / 2 + 1; + r_input_area.xmin = output_area.xmin - add_x; + r_input_area.xmax = output_area.xmax + add_x; + r_input_area.ymin = output_area.ymin - add_y; + r_input_area.ymax = output_area.ymax + add_y; + break; + } + case FACTOR_INPUT_INDEX: { + r_input_area = output_area; + break; + } + } +} + +void ConvolutionFilterOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX]; + const int last_x = getWidth() - 1; + const int last_y = getHeight() - 1; + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + const int left_offset = (it.x == 0) ? 0 : -image->elem_stride; + const int right_offset = (it.x == last_x) ? 0 : image->elem_stride; + const int down_offset = (it.y == 0) ? 0 : -image->row_stride; + const int up_offset = (it.y == last_y) ? 0 : image->row_stride; + + const float *center_color = it.in(IMAGE_INPUT_INDEX); + zero_v4(it.out); + madd_v4_v4fl(it.out, center_color + down_offset + left_offset, m_filter[0]); + madd_v4_v4fl(it.out, center_color + down_offset, m_filter[1]); + madd_v4_v4fl(it.out, center_color + down_offset + right_offset, m_filter[2]); + madd_v4_v4fl(it.out, center_color + left_offset, m_filter[3]); + madd_v4_v4fl(it.out, center_color, m_filter[4]); + madd_v4_v4fl(it.out, center_color + right_offset, m_filter[5]); + madd_v4_v4fl(it.out, center_color + up_offset + left_offset, m_filter[6]); + madd_v4_v4fl(it.out, center_color + up_offset, m_filter[7]); + madd_v4_v4fl(it.out, center_color + up_offset + right_offset, m_filter[8]); + + const float factor = *it.in(FACTOR_INPUT_INDEX); + const float m_factor = 1.0f - factor; + it.out[0] = it.out[0] * factor + center_color[0] * m_factor; + it.out[1] = it.out[1] * factor + center_color[1] * m_factor; + it.out[2] = it.out[2] * factor + center_color[2] * m_factor; + it.out[3] = it.out[3] * factor + center_color[3] * m_factor; + + /* Make sure we don't return negative color. */ + CLAMP4_MIN(it.out, 0.0f); + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h index 16dee502929..7e12c7faa5c 100644 --- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h +++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h @@ -18,11 +18,15 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" namespace blender::compositor { -class ConvolutionFilterOperation : public NodeOperation { +class ConvolutionFilterOperation : public MultiThreadedOperation { + protected: + static constexpr int IMAGE_INPUT_INDEX = 0; + static constexpr int FACTOR_INPUT_INDEX = 1; + private: int m_filterWidth; int m_filterHeight; @@ -43,6 +47,11 @@ class ConvolutionFilterOperation : public NodeOperation { void initExecution() override; void deinitExecution() override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final; + virtual void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.cc b/source/blender/compositor/operations/COM_DenoiseOperation.cc index ec11ad4d69a..e7f2d5a740a 100644 --- a/source/blender/compositor/operations/COM_DenoiseOperation.cc +++ b/source/blender/compositor/operations/COM_DenoiseOperation.cc @@ -35,6 +35,8 @@ DenoiseOperation::DenoiseOperation() this->addInputSocket(DataType::Color); this->addOutputSocket(DataType::Color); this->m_settings = nullptr; + flags.is_fullframe_operation = true; + output_rendered_ = false; } void DenoiseOperation::initExecution() { @@ -63,8 +65,7 @@ MemoryBuffer *DenoiseOperation::createMemoryBuffer(rcti *rect2) rect.xmax = getWidth(); rect.ymax = getHeight(); MemoryBuffer *result = new MemoryBuffer(DataType::Color, rect); - float *data = result->getBuffer(); - this->generateDenoise(data, tileColor, tileNormal, tileAlbedo, this->m_settings); + this->generateDenoise(result, tileColor, tileNormal, tileAlbedo, this->m_settings); return result; } @@ -84,23 +85,33 @@ bool DenoiseOperation::determineDependingAreaOfInterest(rcti * /*input*/, return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } -void DenoiseOperation::generateDenoise(float *data, - MemoryBuffer *inputTileColor, - MemoryBuffer *inputTileNormal, - MemoryBuffer *inputTileAlbedo, +void DenoiseOperation::generateDenoise(MemoryBuffer *output, + MemoryBuffer *input_color, + MemoryBuffer *input_normal, + MemoryBuffer *input_albedo, NodeDenoise *settings) { - float *inputBufferColor = inputTileColor->getBuffer(); - BLI_assert(inputBufferColor); - if (!inputBufferColor) { + BLI_assert(input_color->getBuffer()); + if (!input_color->getBuffer()) { return; } + #ifdef WITH_OPENIMAGEDENOISE /* Always supported through Accelerate framework BNNS on macOS. */ # ifndef __APPLE__ if (BLI_cpu_support_sse41()) # endif { + /* OpenImageDenoise needs full buffers. */ + MemoryBuffer *buf_color = input_color->is_a_single_elem() ? input_color->inflate() : + input_color; + MemoryBuffer *buf_normal = input_normal && input_normal->is_a_single_elem() ? + input_normal->inflate() : + input_normal; + MemoryBuffer *buf_albedo = input_albedo && input_albedo->is_a_single_elem() ? + input_albedo->inflate() : + input_albedo; + /* Since it's memory intensive, it's better to run only one instance of OIDN at a time. * OpenImageDenoise is multithreaded internally and should use all available cores nonetheless. */ @@ -111,35 +122,35 @@ void DenoiseOperation::generateDenoise(float *data, oidn::FilterRef filter = device.newFilter("RT"); filter.setImage("color", - inputBufferColor, + buf_color->getBuffer(), oidn::Format::Float3, - inputTileColor->getWidth(), - inputTileColor->getHeight(), + buf_color->getWidth(), + buf_color->getHeight(), 0, sizeof(float[4])); - if (inputTileNormal && inputTileNormal->getBuffer()) { + if (buf_normal && buf_normal->getBuffer()) { filter.setImage("normal", - inputTileNormal->getBuffer(), + buf_normal->getBuffer(), oidn::Format::Float3, - inputTileNormal->getWidth(), - inputTileNormal->getHeight(), + buf_normal->getWidth(), + buf_normal->getHeight(), 0, sizeof(float[3])); } - if (inputTileAlbedo && inputTileAlbedo->getBuffer()) { + if (buf_albedo && buf_albedo->getBuffer()) { filter.setImage("albedo", - inputTileAlbedo->getBuffer(), + buf_albedo->getBuffer(), oidn::Format::Float3, - inputTileAlbedo->getWidth(), - inputTileAlbedo->getHeight(), + buf_albedo->getWidth(), + buf_albedo->getHeight(), 0, sizeof(float[4])); } filter.setImage("output", - data, + output->getBuffer(), oidn::Format::Float3, - inputTileColor->getWidth(), - inputTileColor->getHeight(), + buf_color->getWidth(), + buf_color->getHeight(), 0, sizeof(float[4])); @@ -153,19 +164,46 @@ void DenoiseOperation::generateDenoise(float *data, filter.execute(); BLI_mutex_unlock(&oidn_lock); - /* copy the alpha channel, OpenImageDenoise currently only supports RGB */ - size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight(); - for (size_t i = 0; i < numPixels; i++) { - data[i * 4 + 3] = inputBufferColor[i * 4 + 3]; + /* Copy the alpha channel, OpenImageDenoise currently only supports RGB. */ + output->copy_from(input_color, input_color->get_rect(), 3, COM_DATA_TYPE_VALUE_CHANNELS, 3); + + /* Delete inflated buffers. */ + if (input_color->is_a_single_elem()) { + delete buf_color; + } + if (input_normal && input_normal->is_a_single_elem()) { + delete buf_normal; } + if (input_albedo && input_albedo->is_a_single_elem()) { + delete buf_albedo; + } + return; } #endif /* If built without OIDN or running on an unsupported CPU, just pass through. */ - UNUSED_VARS(inputTileAlbedo, inputTileNormal, settings); - ::memcpy(data, - inputBufferColor, - sizeof(float[4]) * inputTileColor->getWidth() * inputTileColor->getHeight()); + UNUSED_VARS(input_albedo, input_normal, settings); + output->copy_from(input_color, input_color->get_rect()); +} + +void DenoiseOperation::get_area_of_interest(const int UNUSED(input_idx), + const rcti &UNUSED(output_area), + rcti &r_input_area) +{ + r_input_area.xmin = 0; + r_input_area.xmax = this->getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = this->getHeight(); +} + +void DenoiseOperation::update_memory_buffer(MemoryBuffer *output, + const rcti &UNUSED(area), + Span inputs) +{ + if (!output_rendered_) { + this->generateDenoise(output, inputs[0], inputs[1], inputs[2], m_settings); + output_rendered_ = true; + } } } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.h b/source/blender/compositor/operations/COM_DenoiseOperation.h index a9298c17e92..48209c3eacf 100644 --- a/source/blender/compositor/operations/COM_DenoiseOperation.h +++ b/source/blender/compositor/operations/COM_DenoiseOperation.h @@ -37,6 +37,8 @@ class DenoiseOperation : public SingleThreadedOperation { */ NodeDenoise *m_settings; + bool output_rendered_; + public: DenoiseOperation(); /** @@ -57,11 +59,16 @@ class DenoiseOperation : public SingleThreadedOperation { ReadBufferOperation *readOperation, rcti *output) override; + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer(MemoryBuffer *output, + const rcti &area, + Span inputs) override; + protected: - void generateDenoise(float *data, - MemoryBuffer *inputTileColor, - MemoryBuffer *inputTileNormal, - MemoryBuffer *inputTileAlbedo, + void generateDenoise(MemoryBuffer *output, + MemoryBuffer *input_color, + MemoryBuffer *input_normal, + MemoryBuffer *input_albedo, NodeDenoise *settings); MemoryBuffer *createMemoryBuffer(rcti *rect) override; diff --git a/source/blender/compositor/operations/COM_DespeckleOperation.cc b/source/blender/compositor/operations/COM_DespeckleOperation.cc index fc8778c7d2e..19bd7b2af6f 100644 --- a/source/blender/compositor/operations/COM_DespeckleOperation.cc +++ b/source/blender/compositor/operations/COM_DespeckleOperation.cc @@ -127,6 +127,11 @@ void DespeckleOperation::executePixel(float output[4], int x, int y, void * /*da else { copy_v4_v4(output, color_org); } + +#undef TOT_DIV_ONE +#undef TOT_DIV_CNR +#undef WTOT +#undef COLOR_ADD } bool DespeckleOperation::determineDependingAreaOfInterest(rcti *input, @@ -144,4 +149,106 @@ bool DespeckleOperation::determineDependingAreaOfInterest(rcti *input, return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void DespeckleOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + switch (input_idx) { + case IMAGE_INPUT_INDEX: { + const int add_x = 2; //(this->m_filterWidth - 1) / 2 + 1; + const int add_y = 2; //(this->m_filterHeight - 1) / 2 + 1; + r_input_area.xmin = output_area.xmin - add_x; + r_input_area.xmax = output_area.xmax + add_x; + r_input_area.ymin = output_area.ymin - add_y; + r_input_area.ymax = output_area.ymax + add_y; + break; + } + case FACTOR_INPUT_INDEX: { + r_input_area = output_area; + break; + } + } +} + +void DespeckleOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX]; + const int last_x = getWidth() - 1; + const int last_y = getHeight() - 1; + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + const int x1 = MAX2(it.x - 1, 0); + const int x2 = it.x; + const int x3 = MIN2(it.x + 1, last_x); + const int y1 = MAX2(it.y - 1, 0); + const int y2 = it.y; + const int y3 = MIN2(it.y + 1, last_y); + + float w = 0.0f; + const float *color_org = it.in(IMAGE_INPUT_INDEX); + float color_mid[4]; + float color_mid_ok[4]; + const float *in1 = nullptr; + +#define TOT_DIV_ONE 1.0f +#define TOT_DIV_CNR (float)M_SQRT1_2 + +#define WTOT (TOT_DIV_ONE * 4 + TOT_DIV_CNR * 4) + +#define COLOR_ADD(fac) \ + { \ + madd_v4_v4fl(color_mid, in1, fac); \ + if (color_diff(in1, color_org, m_threshold)) { \ + w += fac; \ + madd_v4_v4fl(color_mid_ok, in1, fac); \ + } \ + } + + zero_v4(color_mid); + zero_v4(color_mid_ok); + + in1 = image->get_elem(x1, y1); + COLOR_ADD(TOT_DIV_CNR) + in1 = image->get_elem(x2, y1); + COLOR_ADD(TOT_DIV_ONE) + in1 = image->get_elem(x3, y1); + COLOR_ADD(TOT_DIV_CNR) + in1 = image->get_elem(x1, y2); + COLOR_ADD(TOT_DIV_ONE) + +#if 0 + const float* in2 = image->get_elem(x2, y2); + madd_v4_v4fl(color_mid, in2, this->m_filter[4]); +#endif + + in1 = image->get_elem(x3, y2); + COLOR_ADD(TOT_DIV_ONE) + in1 = image->get_elem(x1, y3); + COLOR_ADD(TOT_DIV_CNR) + in1 = image->get_elem(x2, y3); + COLOR_ADD(TOT_DIV_ONE) + in1 = image->get_elem(x3, y3); + COLOR_ADD(TOT_DIV_CNR) + + mul_v4_fl(color_mid, 1.0f / (4.0f + (4.0f * (float)M_SQRT1_2))); + // mul_v4_fl(color_mid, 1.0f / w); + + if ((w != 0.0f) && ((w / WTOT) > (m_threshold_neighbor)) && + color_diff(color_mid, color_org, m_threshold)) { + const float factor = *it.in(FACTOR_INPUT_INDEX); + mul_v4_fl(color_mid_ok, 1.0f / w); + interp_v4_v4v4(it.out, color_org, color_mid_ok, factor); + } + else { + copy_v4_v4(it.out, color_org); + } + +#undef TOT_DIV_ONE +#undef TOT_DIV_CNR +#undef WTOT +#undef COLOR_ADD + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DespeckleOperation.h b/source/blender/compositor/operations/COM_DespeckleOperation.h index e8d3461d2ec..70d6c2227f4 100644 --- a/source/blender/compositor/operations/COM_DespeckleOperation.h +++ b/source/blender/compositor/operations/COM_DespeckleOperation.h @@ -18,12 +18,15 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" namespace blender::compositor { -class DespeckleOperation : public NodeOperation { +class DespeckleOperation : public MultiThreadedOperation { private: + constexpr static int IMAGE_INPUT_INDEX = 0; + constexpr static int FACTOR_INPUT_INDEX = 1; + float m_threshold; float m_threshold_neighbor; @@ -52,6 +55,11 @@ class DespeckleOperation : public NodeOperation { void initExecution() override; void deinitExecution() override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.cc b/source/blender/compositor/operations/COM_DilateErodeOperation.cc index c459d09f02c..28b40021cd9 100644 --- a/source/blender/compositor/operations/COM_DilateErodeOperation.cc +++ b/source/blender/compositor/operations/COM_DilateErodeOperation.cc @@ -35,9 +35,9 @@ DilateErodeThresholdOperation::DilateErodeThresholdOperation() this->m__switch = 0.5f; this->m_distance = 0.0f; } -void DilateErodeThresholdOperation::initExecution() + +void DilateErodeThresholdOperation::init_data() { - this->m_inputProgram = this->getInputSocketReader(0); if (this->m_distance < 0.0f) { this->m_scope = -this->m_distance + this->m_inset; } @@ -54,6 +54,11 @@ void DilateErodeThresholdOperation::initExecution() } } +void DilateErodeThresholdOperation::initExecution() +{ + this->m_inputProgram = this->getInputSocketReader(0); +} + void *DilateErodeThresholdOperation::initializeTileData(rcti * /*rect*/) { void *buffer = this->m_inputProgram->initializeTileData(nullptr); @@ -160,6 +165,112 @@ bool DilateErodeThresholdOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void DilateErodeThresholdOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + BLI_assert(input_idx == 0); + UNUSED_VARS_NDEBUG(input_idx); + r_input_area.xmin = output_area.xmin - m_scope; + r_input_area.xmax = output_area.xmax + m_scope; + r_input_area.ymin = output_area.ymin - m_scope; + r_input_area.ymax = output_area.ymax + m_scope; +} + +struct DilateErodeThresholdOperation::PixelData { + int x; + int y; + int xmin; + int xmax; + int ymin; + int ymax; + const float *elem; + float distance; + int elem_stride; + int row_stride; + /** Switch. */ + float sw; +}; + +template typename TCompare> +static float get_min_distance(DilateErodeThresholdOperation::PixelData &p) +{ + /* TODO(manzanilla): bad performance, generate a table with relative offsets on operation + * initialization to loop from less to greater distance and break as soon as #compare is + * true. */ + const TCompare compare; + float min_dist = p.distance; + const float *row = p.elem + ((intptr_t)p.ymin - p.y) * p.row_stride + + ((intptr_t)p.xmin - p.x) * p.elem_stride; + for (int yi = p.ymin; yi < p.ymax; yi++) { + const float dy = yi - p.y; + const float dist_y = dy * dy; + const float *elem = row; + for (int xi = p.xmin; xi < p.xmax; xi++) { + if (compare(*elem, p.sw)) { + const float dx = xi - p.x; + const float dist = dx * dx + dist_y; + min_dist = MIN2(min_dist, dist); + } + elem += p.elem_stride; + } + row += p.row_stride; + } + return min_dist; +} + +void DilateErodeThresholdOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + const MemoryBuffer *input = inputs[0]; + const rcti &input_rect = input->get_rect(); + const float rd = m_scope * m_scope; + const float inset = m_inset; + + PixelData p; + p.sw = m__switch; + p.distance = rd * 2; + p.elem_stride = input->elem_stride; + p.row_stride = input->row_stride; + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + p.x = it.x; + p.y = it.y; + p.xmin = MAX2(p.x - m_scope, input_rect.xmin); + p.ymin = MAX2(p.y - m_scope, input_rect.ymin); + p.xmax = MIN2(p.x + m_scope, input_rect.xmax); + p.ymax = MIN2(p.y + m_scope, input_rect.ymax); + p.elem = it.in(0); + + float pixel_value; + if (*p.elem > p.sw) { + pixel_value = -sqrtf(get_min_distance(p)); + } + else { + pixel_value = sqrtf(get_min_distance(p)); + } + + if (m_distance > 0.0f) { + const float delta = m_distance - pixel_value; + if (delta >= 0.0f) { + *it.out = delta >= inset ? 1.0f : delta / inset; + } + else { + *it.out = 0.0f; + } + } + else { + const float delta = -m_distance + pixel_value; + if (delta < 0.0f) { + *it.out = delta < -inset ? 1.0f : (-delta) / inset; + } + else { + *it.out = 0.0f; + } + } + } +} + /* Dilate Distance. */ DilateDistanceOperation::DilateDistanceOperation() { @@ -170,15 +281,20 @@ DilateDistanceOperation::DilateDistanceOperation() flags.complex = true; flags.open_cl = true; } -void DilateDistanceOperation::initExecution() + +void DilateDistanceOperation::init_data() { - this->m_inputProgram = this->getInputSocketReader(0); this->m_scope = this->m_distance; if (this->m_scope < 3) { this->m_scope = 3; } } +void DilateDistanceOperation::initExecution() +{ + this->m_inputProgram = this->getInputSocketReader(0); +} + void *DilateDistanceOperation::initializeTileData(rcti * /*rect*/) { void *buffer = this->m_inputProgram->initializeTileData(nullptr); @@ -258,6 +374,92 @@ void DilateDistanceOperation::executeOpenCL(OpenCLDevice *device, device->COM_clEnqueueRange(dilateKernel, outputMemoryBuffer, 7, this); } +void DilateDistanceOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + BLI_assert(input_idx == 0); + UNUSED_VARS_NDEBUG(input_idx); + r_input_area.xmin = output_area.xmin - m_scope; + r_input_area.xmax = output_area.xmax + m_scope; + r_input_area.ymin = output_area.ymin - m_scope; + r_input_area.ymax = output_area.ymax + m_scope; +} + +struct DilateDistanceOperation::PixelData { + int x; + int y; + int xmin; + int xmax; + int ymin; + int ymax; + const float *elem; + float min_distance; + int scope; + int elem_stride; + int row_stride; + const rcti &input_rect; + + PixelData(MemoryBuffer *input, const int distance, const int scope) + : min_distance(distance * distance), + scope(scope), + elem_stride(input->elem_stride), + row_stride(input->row_stride), + input_rect(input->get_rect()) + { + } + + void update(BuffersIterator &it) + { + x = it.x; + y = it.y; + xmin = MAX2(x - scope, input_rect.xmin); + ymin = MAX2(y - scope, input_rect.ymin); + xmax = MIN2(x + scope, input_rect.xmax); + ymax = MIN2(y + scope, input_rect.ymax); + elem = it.in(0); + } +}; + +template typename TCompare> +static float get_distance_value(DilateDistanceOperation::PixelData &p, const float start_value) +{ + /* TODO(manzanilla): bad performance, only loop elements within minimum distance removing + * coordinates and conditional if `dist <= min_dist`. May need to generate a table of offsets. */ + const TCompare compare; + const float min_dist = p.min_distance; + float value = start_value; + const float *row = p.elem + ((intptr_t)p.ymin - p.y) * p.row_stride + + ((intptr_t)p.xmin - p.x) * p.elem_stride; + for (int yi = p.ymin; yi < p.ymax; yi++) { + const float dy = yi - p.y; + const float dist_y = dy * dy; + const float *elem = row; + for (int xi = p.xmin; xi < p.xmax; xi++) { + const float dx = xi - p.x; + const float dist = dx * dx + dist_y; + if (dist <= min_dist) { + value = compare(*elem, value) ? *elem : value; + } + elem += p.elem_stride; + } + row += p.row_stride; + } + + return value; +} + +void DilateDistanceOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + PixelData p(inputs[0], m_distance, m_scope); + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + p.update(it); + *it.out = get_distance_value(p, 0.0f); + } +} + /* Erode Distance */ ErodeDistanceOperation::ErodeDistanceOperation() : DilateDistanceOperation() { @@ -318,6 +520,17 @@ void ErodeDistanceOperation::executeOpenCL(OpenCLDevice *device, device->COM_clEnqueueRange(erodeKernel, outputMemoryBuffer, 7, this); } +void ErodeDistanceOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + PixelData p(inputs[0], m_distance, m_scope); + for (BuffersIterator it = output->iterate_with(inputs, area); !it.is_end(); ++it) { + p.update(it); + *it.out = get_distance_value(p, 1.0f); + } +} + /* Dilate step */ DilateStepOperation::DilateStepOperation() { @@ -475,6 +688,126 @@ bool DilateStepOperation::determineDependingAreaOfInterest(rcti *input, return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void DilateStepOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + BLI_assert(input_idx == 0); + UNUSED_VARS_NDEBUG(input_idx); + r_input_area.xmin = output_area.xmin - m_iterations; + r_input_area.xmax = output_area.xmax + m_iterations; + r_input_area.ymin = output_area.ymin - m_iterations; + r_input_area.ymax = output_area.ymax + m_iterations; +} + +template +static void step_update_memory_buffer(MemoryBuffer *output, + const MemoryBuffer *input, + const rcti &area, + const int num_iterations, + const float compare_min_value) +{ + TCompareSelector selector; + + const int width = output->getWidth(); + const int height = output->getHeight(); + + const int half_window = num_iterations; + const int window = half_window * 2 + 1; + + const int xmin = MAX2(0, area.xmin - half_window); + const int ymin = MAX2(0, area.ymin - half_window); + const int xmax = MIN2(width, area.xmax + half_window); + const int ymax = MIN2(height, area.ymax + half_window); + + const int bwidth = area.xmax - area.xmin; + const int bheight = area.ymax - area.ymin; + + /* NOTE: #result has area width, but new height. + * We have to calculate the additional rows in the first pass, + * to have valid data available for the second pass. */ + rcti result_area; + BLI_rcti_init(&result_area, area.xmin, area.xmax, ymin, ymax); + MemoryBuffer result(DataType::Value, result_area); + + /* #temp holds maxima for every step in the algorithm, #buf holds a + * single row or column of input values, padded with #limit values to + * simplify the logic. */ + float *temp = (float *)MEM_mallocN(sizeof(float) * (2 * window - 1), "dilate erode temp"); + float *buf = (float *)MEM_mallocN(sizeof(float) * (MAX2(bwidth, bheight) + 5 * half_window), + "dilate erode buf"); + + /* The following is based on the van Herk/Gil-Werman algorithm for morphology operations. */ + /* First pass, horizontal dilate/erode. */ + for (int y = ymin; y < ymax; y++) { + for (int x = 0; x < bwidth + 5 * half_window; x++) { + buf[x] = compare_min_value; + } + for (int x = xmin; x < xmax; x++) { + buf[x - area.xmin + window - 1] = input->get_value(x, y, 0); + } + + for (int i = 0; i < (bwidth + 3 * half_window) / window; i++) { + int start = (i + 1) * window - 1; + + temp[window - 1] = buf[start]; + for (int x = 1; x < window; x++) { + temp[window - 1 - x] = selector(temp[window - x], buf[start - x]); + temp[window - 1 + x] = selector(temp[window + x - 2], buf[start + x]); + } + + start = half_window + (i - 1) * window + 1; + for (int x = -MIN2(0, start); x < window - MAX2(0, start + window - bwidth); x++) { + result.get_value(start + x + area.xmin, y, 0) = selector(temp[x], temp[x + window - 1]); + } + } + } + + /* Second pass, vertical dilate/erode. */ + for (int x = 0; x < bwidth; x++) { + for (int y = 0; y < bheight + 5 * half_window; y++) { + buf[y] = compare_min_value; + } + for (int y = ymin; y < ymax; y++) { + buf[y - area.ymin + window - 1] = result.get_value(x + area.xmin, y, 0); + } + + for (int i = 0; i < (bheight + 3 * half_window) / window; i++) { + int start = (i + 1) * window - 1; + + temp[window - 1] = buf[start]; + for (int y = 1; y < window; y++) { + temp[window - 1 - y] = selector(temp[window - y], buf[start - y]); + temp[window - 1 + y] = selector(temp[window + y - 2], buf[start + y]); + } + + start = half_window + (i - 1) * window + 1; + for (int y = -MIN2(0, start); y < window - MAX2(0, start + window - bheight); y++) { + result.get_value(x, y + start + area.ymin, 0) = selector(temp[y], temp[y + window - 1]); + } + } + } + + MEM_freeN(temp); + MEM_freeN(buf); + + output->copy_from(&result, area); +} + +struct Max2Selector { + float operator()(float f1, float f2) const + { + return MAX2(f1, f2); + } +}; + +void DilateStepOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + step_update_memory_buffer(output, inputs[0], area, m_iterations, -FLT_MAX); +} + /* Erode step */ ErodeStepOperation::ErodeStepOperation() : DilateStepOperation() { @@ -571,4 +904,18 @@ void *ErodeStepOperation::initializeTileData(rcti *rect) return result; } +struct Min2Selector { + float operator()(float f1, float f2) const + { + return MIN2(f1, f2); + } +}; + +void ErodeStepOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + step_update_memory_buffer(output, inputs[0], area, m_iterations, FLT_MAX); +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.h b/source/blender/compositor/operations/COM_DilateErodeOperation.h index a489e293e8e..9c32a5ac1fd 100644 --- a/source/blender/compositor/operations/COM_DilateErodeOperation.h +++ b/source/blender/compositor/operations/COM_DilateErodeOperation.h @@ -18,11 +18,14 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" namespace blender::compositor { -class DilateErodeThresholdOperation : public NodeOperation { +class DilateErodeThresholdOperation : public MultiThreadedOperation { + public: + struct PixelData; + private: /** * Cached reference to the inputProgram @@ -47,6 +50,7 @@ class DilateErodeThresholdOperation : public NodeOperation { */ void executePixel(float output[4], int x, int y, void *data) override; + void init_data() override; /** * Initialize the execution */ @@ -74,10 +78,17 @@ class DilateErodeThresholdOperation : public NodeOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; -class DilateDistanceOperation : public NodeOperation { - private: +class DilateDistanceOperation : public MultiThreadedOperation { + public: + struct PixelData; + protected: /** * Cached reference to the inputProgram @@ -94,6 +105,7 @@ class DilateDistanceOperation : public NodeOperation { */ void executePixel(float output[4], int x, int y, void *data) override; + void init_data() override; /** * Initialize the execution */ @@ -119,7 +131,13 @@ class DilateDistanceOperation : public NodeOperation { MemoryBuffer **inputMemoryBuffers, std::list *clMemToCleanUp, std::list *clKernelsToCleanUp) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final; + virtual void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; + class ErodeDistanceOperation : public DilateDistanceOperation { public: ErodeDistanceOperation(); @@ -135,9 +153,13 @@ class ErodeDistanceOperation : public DilateDistanceOperation { MemoryBuffer **inputMemoryBuffers, std::list *clMemToCleanUp, std::list *clKernelsToCleanUp) override; + + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; -class DilateStepOperation : public NodeOperation { +class DilateStepOperation : public MultiThreadedOperation { protected: /** * Cached reference to the inputProgram @@ -174,6 +196,11 @@ class DilateStepOperation : public NodeOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final; + virtual void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; class ErodeStepOperation : public DilateStepOperation { @@ -181,6 +208,9 @@ class ErodeStepOperation : public DilateStepOperation { ErodeStepOperation(); void *initializeTileData(rcti *rect) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc b/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc index 97bdc25af3b..102025ed915 100644 --- a/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc +++ b/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc @@ -146,4 +146,58 @@ bool DirectionalBlurOperation::determineDependingAreaOfInterest(rcti * /*input*/ return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void DirectionalBlurOperation::get_area_of_interest(const int input_idx, + const rcti &UNUSED(output_area), + rcti &r_input_area) +{ + BLI_assert(input_idx == 0); + UNUSED_VARS_NDEBUG(input_idx); + r_input_area.xmin = 0; + r_input_area.xmax = this->getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = this->getHeight(); +} + +void DirectionalBlurOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + const MemoryBuffer *input = inputs[0]; + const int iterations = pow(2.0f, this->m_data->iter); + for (BuffersIterator it = output->iterate_with({}, area); !it.is_end(); ++it) { + const int x = it.x; + const int y = it.y; + float color_accum[4]; + input->read_elem_bilinear(x, y, color_accum); + + /* Blur pixel. */ + /* TODO(manzanilla): Many values used on iterations can be calculated beforehand. Create a + * table on operation initialization. */ + float ltx = this->m_tx; + float lty = this->m_ty; + float lsc = this->m_sc; + float lrot = this->m_rot; + for (int i = 0; i < iterations; i++) { + const float cs = cosf(lrot), ss = sinf(lrot); + const float isc = 1.0f / (1.0f + lsc); + + const float v = isc * (y - this->m_center_y_pix) + lty; + const float u = isc * (x - this->m_center_x_pix) + ltx; + + float color[4]; + input->read_elem_bilinear( + cs * u + ss * v + this->m_center_x_pix, cs * v - ss * u + this->m_center_y_pix, color); + add_v4_v4(color_accum, color); + + /* Double transformations. */ + ltx += this->m_tx; + lty += this->m_ty; + lrot += this->m_rot; + lsc += this->m_sc; + } + + mul_v4_v4fl(it.out, color_accum, 1.0f / (iterations + 1)); + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_DirectionalBlurOperation.h b/source/blender/compositor/operations/COM_DirectionalBlurOperation.h index 5555520462b..9a982bf6481 100644 --- a/source/blender/compositor/operations/COM_DirectionalBlurOperation.h +++ b/source/blender/compositor/operations/COM_DirectionalBlurOperation.h @@ -18,12 +18,12 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" #include "COM_QualityStepHelper.h" namespace blender::compositor { -class DirectionalBlurOperation : public NodeOperation, public QualityStepHelper { +class DirectionalBlurOperation : public MultiThreadedOperation, public QualityStepHelper { private: SocketReader *m_inputProgram; NodeDBlurData *m_data; @@ -65,6 +65,11 @@ class DirectionalBlurOperation : public NodeOperation, public QualityStepHelper MemoryBuffer **inputMemoryBuffers, std::list *clMemToCleanUp, std::list *clKernelsToCleanUp) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_InpaintOperation.cc b/source/blender/compositor/operations/COM_InpaintOperation.cc index bfcd504177f..5e76c41752c 100644 --- a/source/blender/compositor/operations/COM_InpaintOperation.cc +++ b/source/blender/compositor/operations/COM_InpaintOperation.cc @@ -39,6 +39,7 @@ InpaintSimpleOperation::InpaintSimpleOperation() this->m_manhattan_distance = nullptr; this->m_cached_buffer = nullptr; this->m_cached_buffer_ready = false; + flags.is_fullframe_operation = true; } void InpaintSimpleOperation::initExecution() { @@ -286,4 +287,47 @@ bool InpaintSimpleOperation::determineDependingAreaOfInterest(rcti * /*input*/, return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void InpaintSimpleOperation::get_area_of_interest(const int input_idx, + const rcti &UNUSED(output_area), + rcti &r_input_area) +{ + BLI_assert(input_idx == 0); + UNUSED_VARS_NDEBUG(input_idx); + r_input_area.xmin = 0; + r_input_area.xmax = this->getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = this->getHeight(); +} + +void InpaintSimpleOperation::update_memory_buffer(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + /* TODO(manzanilla): once tiled implementation is removed, run multi-threaded where possible. */ + MemoryBuffer *input = inputs[0]; + if (!m_cached_buffer_ready) { + if (input->is_a_single_elem()) { + MemoryBuffer *tmp = input->inflate(); + m_cached_buffer = tmp->release_ownership_buffer(); + delete tmp; + } + else { + m_cached_buffer = (float *)MEM_dupallocN(input->getBuffer()); + } + + this->calc_manhattan_distance(); + + int curr = 0; + int x, y; + while (this->next_pixel(x, y, curr, this->m_iterations)) { + this->pix_step(x, y); + } + m_cached_buffer_ready = true; + } + + const int num_channels = COM_data_type_num_channels(getOutputSocket()->getDataType()); + MemoryBuffer buf(m_cached_buffer, num_channels, input->getWidth(), input->getHeight()); + output->copy_from(&buf, area); +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_InpaintOperation.h b/source/blender/compositor/operations/COM_InpaintOperation.h index e3d27bf7704..e11610bd263 100644 --- a/source/blender/compositor/operations/COM_InpaintOperation.h +++ b/source/blender/compositor/operations/COM_InpaintOperation.h @@ -66,6 +66,13 @@ class InpaintSimpleOperation : public NodeOperation { ReadBufferOperation *readOperation, rcti *output) override; + void get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) override; + void update_memory_buffer(MemoryBuffer *output, + const rcti &area, + Span inputs) override; + private: void calc_manhattan_distance(); void clamp_xy(int &x, int &y); diff --git a/source/blender/compositor/operations/COM_SMAAOperation.cc b/source/blender/compositor/operations/COM_SMAAOperation.cc index b078d85372d..4153b9c8523 100644 --- a/source/blender/compositor/operations/COM_SMAAOperation.cc +++ b/source/blender/compositor/operations/COM_SMAAOperation.cc @@ -61,6 +61,8 @@ namespace blender::compositor { /*-----------------------------------------------------------------------------*/ /* Internal Functions to Sample Pixel Color from Image */ +/* TODO(manzanilla): to be removed with tiled implementation. Replace it with + * #buffer->read_elem_checked. */ static inline void sample(SocketReader *reader, int x, int y, float color[4]) { if (x < 0 || x >= reader->getWidth() || y < 0 || y >= reader->getHeight()) { @@ -71,8 +73,13 @@ static inline void sample(SocketReader *reader, int x, int y, float color[4]) reader->read(color, x, y, nullptr); } -static void sample_bilinear_vertical( - SocketReader *reader, int x, int y, float yoffset, float color[4]) +static inline void sample(MemoryBuffer *reader, int x, int y, float color[4]) +{ + reader->read_elem_checked(x, y, color); +} + +template +static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4]) { float iy = floorf(yoffset); float fy = yoffset - iy; @@ -89,8 +96,8 @@ static void sample_bilinear_vertical( color[3] = interpf(color01[3], color00[3], fy); } -static void sample_bilinear_horizontal( - SocketReader *reader, int x, int y, float xoffset, float color[4]) +template +static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4]) { float ix = floorf(xoffset); float fx = xoffset - ix; @@ -162,7 +169,7 @@ static void area_diag(int d1, int d2, int e1, int e2, float weights[2]) SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation() { this->addInputSocket(DataType::Color); /* image */ - this->addInputSocket(DataType::Value); /* depth, material ID, etc. */ + this->addInputSocket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */ this->addOutputSocket(DataType::Color); this->flags.complex = true; this->m_imageReader = nullptr; @@ -207,6 +214,16 @@ bool SMAAEdgeDetectionOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void SMAAEdgeDetectionOperation::get_area_of_interest(const int UNUSED(input_idx), + const rcti &output_area, + rcti &r_input_area) +{ + r_input_area.xmax = output_area.xmax + 1; + r_input_area.xmin = output_area.xmin - 2; + r_input_area.ymax = output_area.ymax + 1; + r_input_area.ymin = output_area.ymin - 2; +} + void SMAAEdgeDetectionOperation::executePixel(float output[4], int x, int y, void * /*data*/) { float color[4]; @@ -288,6 +305,94 @@ void SMAAEdgeDetectionOperation::executePixel(float output[4], int x, int y, voi } } +void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + const MemoryBuffer *image = inputs[0]; + for (BuffersIterator it = output->iterate_with({}, area); !it.is_end(); ++it) { + float color[4]; + const int x = it.x; + const int y = it.y; + + /* Calculate luma deltas: */ + image->read_elem_checked(x, y, color); + const float L = IMB_colormanagement_get_luminance(color); + image->read_elem_checked(x - 1, y, color); + const float Lleft = IMB_colormanagement_get_luminance(color); + image->read_elem_checked(x, y - 1, color); + const float Ltop = IMB_colormanagement_get_luminance(color); + const float Dleft = fabsf(L - Lleft); + const float Dtop = fabsf(L - Ltop); + + /* We do the usual threshold: */ + it.out[0] = (x > 0 && Dleft >= m_threshold) ? 1.0f : 0.0f; + it.out[1] = (y > 0 && Dtop >= m_threshold) ? 1.0f : 0.0f; + it.out[2] = 0.0f; + it.out[3] = 1.0f; + + /* Then discard if there is no edge: */ + if (is_zero_v2(it.out)) { + continue; + } + + /* Calculate right and bottom deltas: */ + image->read_elem_checked(x + 1, y, color); + const float Lright = IMB_colormanagement_get_luminance(color); + image->read_elem_checked(x, y + 1, color); + const float Lbottom = IMB_colormanagement_get_luminance(color); + const float Dright = fabsf(L - Lright); + const float Dbottom = fabsf(L - Lbottom); + + /* Calculate the maximum delta in the direct neighborhood: */ + float maxDelta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom)); + + /* Calculate luma used for both left and top edges: */ + image->read_elem_checked(x - 1, y - 1, color); + const float Llefttop = IMB_colormanagement_get_luminance(color); + + /* Left edge */ + if (it.out[0] != 0.0f) { + /* Calculate deltas around the left pixel: */ + image->read_elem_checked(x - 2, y, color); + const float Lleftleft = IMB_colormanagement_get_luminance(color); + image->read_elem_checked(x - 1, y + 1, color); + const float Lleftbottom = IMB_colormanagement_get_luminance(color); + const float Dleftleft = fabsf(Lleft - Lleftleft); + const float Dlefttop = fabsf(Lleft - Llefttop); + const float Dleftbottom = fabsf(Lleft - Lleftbottom); + + /* Calculate the final maximum delta: */ + maxDelta = fmaxf(maxDelta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom))); + + /* Local contrast adaptation: */ + if (maxDelta > m_contrast_limit * Dleft) { + it.out[0] = 0.0f; + } + } + + /* Top edge */ + if (it.out[1] != 0.0f) { + /* Calculate top-top delta: */ + image->read_elem_checked(x, y - 2, color); + const float Ltoptop = IMB_colormanagement_get_luminance(color); + image->read_elem_checked(x + 1, y - 1, color); + const float Ltopright = IMB_colormanagement_get_luminance(color); + const float Dtoptop = fabsf(Ltop - Ltoptop); + const float Dtopleft = fabsf(Ltop - Llefttop); + const float Dtopright = fabsf(Ltop - Ltopright); + + /* Calculate the final maximum delta: */ + maxDelta = fmaxf(maxDelta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright))); + + /* Local contrast adaptation: */ + if (maxDelta > m_contrast_limit * Dtop) { + it.out[1] = 0.0f; + } + } + } +} + /*-----------------------------------------------------------------------------*/ /* Blending Weight Calculation (Second Pass) */ /*-----------------------------------------------------------------------------*/ @@ -309,6 +414,9 @@ void *SMAABlendingWeightCalculationOperation::initializeTileData(rcti *rect) void SMAABlendingWeightCalculationOperation::initExecution() { this->m_imageReader = this->getInputSocketReader(0); + if (execution_model_ == eExecutionModel::Tiled) { + sample_image_fn_ = [=](int x, int y, float *out) { sample(m_imageReader, x, y, out); }; + } } void SMAABlendingWeightCalculationOperation::setCornerRounding(float rounding) @@ -414,6 +522,113 @@ void SMAABlendingWeightCalculationOperation::executePixel(float output[4], } } +void SMAABlendingWeightCalculationOperation::update_memory_buffer_started( + MemoryBuffer *UNUSED(output), const rcti &UNUSED(out_area), Span inputs) +{ + const MemoryBuffer *image = inputs[0]; + sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); }; +} + +void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial( + MemoryBuffer *output, const rcti &out_area, Span UNUSED(inputs)) +{ + for (BuffersIterator it = output->iterate_with({}, out_area); !it.is_end(); ++it) { + const int x = it.x; + const int y = it.y; + zero_v4(it.out); + + float edges[4]; + sample_image_fn_(x, y, edges); + + /* Edge at north */ + float c[4]; + if (edges[1] > 0.0f) { + /* Diagonals have both north and west edges, so calculating weights for them */ + /* in one of the boundaries is enough. */ + calculateDiagWeights(x, y, edges, it.out); + + /* We give priority to diagonals, so if we find a diagonal we skip. */ + /* horizontal/vertical processing. */ + if (!is_zero_v2(it.out)) { + continue; + } + + /* Find the distance to the left and the right: */ + int left = searchXLeft(x, y); + int right = searchXRight(x, y); + int d1 = x - left, d2 = right - x; + + /* Fetch the left and right crossing edges: */ + int e1 = 0, e2 = 0; + sample_image_fn_(left, y - 1, c); + if (c[0] > 0.0) { + e1 += 1; + } + sample_image_fn_(left, y, c); + if (c[0] > 0.0) { + e1 += 2; + } + sample_image_fn_(right + 1, y - 1, c); + if (c[0] > 0.0) { + e2 += 1; + } + sample_image_fn_(right + 1, y, c); + if (c[0] > 0.0) { + e2 += 2; + } + + /* Ok, we know how this pattern looks like, now it is time for getting */ + /* the actual area: */ + area(d1, d2, e1, e2, it.out); /* R, G */ + + /* Fix corners: */ + if (m_corner_rounding) { + detectHorizontalCornerPattern(it.out, left, right, y, d1, d2); + } + } + + /* Edge at west */ + if (edges[0] > 0.0f) { + /* Did we already do diagonal search for this west edge from the left neighboring pixel? */ + if (isVerticalSearchUnneeded(x, y)) { + continue; + } + + /* Find the distance to the top and the bottom: */ + int top = searchYUp(x, y); + int bottom = searchYDown(x, y); + int d1 = y - top, d2 = bottom - y; + + /* Fetch the top and bottom crossing edges: */ + int e1 = 0, e2 = 0; + sample_image_fn_(x - 1, top, c); + if (c[1] > 0.0) { + e1 += 1; + } + sample_image_fn_(x, top, c); + if (c[1] > 0.0) { + e1 += 2; + } + sample_image_fn_(x - 1, bottom + 1, c); + if (c[1] > 0.0) { + e2 += 1; + } + sample_image_fn_(x, bottom + 1, c); + if (c[1] > 0.0) { + e2 += 2; + } + + /* Get the area for this direction: */ + area(d1, d2, e1, e2, it.out + 2); /* B, A */ + + /* Fix corners: */ + if (m_corner_rounding) { + detectVerticalCornerPattern(it.out + 2, x, top, bottom, d1, d2); + } + } + } +} + void SMAABlendingWeightCalculationOperation::deinitExecution() { this->m_imageReader = nullptr; @@ -434,6 +649,19 @@ bool SMAABlendingWeightCalculationOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int UNUSED(input_idx), + const rcti &output_area, + rcti &r_input_area) +{ + r_input_area.xmax = output_area.xmax + + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1); + r_input_area.xmin = output_area.xmin - + fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1); + r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG); + r_input_area.ymin = output_area.ymin - + fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG); +} + /*-----------------------------------------------------------------------------*/ /* Diagonal Search Functions */ @@ -449,7 +677,7 @@ int SMAABlendingWeightCalculationOperation::searchDiag1(int x, int y, int dir, b while (x != end) { x += dir; y -= dir; - sample(m_imageReader, x, y, e); + sample_image_fn_(x, y, e); if (e[1] == 0.0f) { *found = true; break; @@ -472,12 +700,12 @@ int SMAABlendingWeightCalculationOperation::searchDiag2(int x, int y, int dir, b while (x != end) { x += dir; y += dir; - sample(m_imageReader, x, y, e); + sample_image_fn_(x, y, e); if (e[1] == 0.0f) { *found = true; break; } - sample(m_imageReader, x + 1, y, e); + sample_image_fn_(x + 1, y, e); if (e[0] == 0.0f) { *found = true; return (dir > 0) ? x : x - dir; @@ -522,11 +750,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x, /* Fetch the crossing edges: */ int left = x - d1, bottom = y + d1; - sample(m_imageReader, left - 1, bottom, c); + sample_image_fn_(left - 1, bottom, c); if (c[1] > 0.0) { e1 += 2; } - sample(m_imageReader, left, bottom, c); + sample_image_fn_(left, bottom, c); if (c[0] > 0.0) { e1 += 1; } @@ -536,11 +764,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x, /* Fetch the crossing edges: */ int right = x + d2, top = y - d2; - sample(m_imageReader, right + 1, top, c); + sample_image_fn_(right + 1, top, c); if (c[1] > 0.0) { e2 += 2; } - sample(m_imageReader, right + 1, top - 1, c); + sample_image_fn_(right + 1, top - 1, c); if (c[0] > 0.0) { e2 += 1; } @@ -552,7 +780,7 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x, /* Search for the line ends: */ d1 = x - searchDiag2(x, y, -1, &d1_found); - sample(m_imageReader, x + 1, y, e); + sample_image_fn_(x + 1, y, e); if (e[0] > 0.0f) { d2 = searchDiag2(x, y, 1, &d2_found) - x; } @@ -568,11 +796,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x, /* Fetch the crossing edges: */ int left = x - d1, top = y - d1; - sample(m_imageReader, left - 1, top, c); + sample_image_fn_(left - 1, top, c); if (c[1] > 0.0) { e1 += 2; } - sample(m_imageReader, left, top - 1, c); + sample_image_fn_(left, top - 1, c); if (c[0] > 0.0) { e1 += 1; } @@ -582,7 +810,7 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x, /* Fetch the crossing edges: */ int right = x + d2, bottom = y + d2; - sample(m_imageReader, right + 1, bottom, c); + sample_image_fn_(right + 1, bottom, c); if (c[1] > 0.0) { e2 += 2; } @@ -610,7 +838,7 @@ bool SMAABlendingWeightCalculationOperation::isVerticalSearchUnneeded(int x, int } /* Search for the line ends: */ - sample(m_imageReader, x - 1, y, e); + sample_image_fn_(x - 1, y, e); if (e[1] > 0.0f) { d1 = x - searchDiag2(x - 1, y, -1, &found); } @@ -631,14 +859,14 @@ int SMAABlendingWeightCalculationOperation::searchXLeft(int x, int y) float e[4]; while (x > end) { - sample(m_imageReader, x, y, e); + sample_image_fn_(x, y, e); if (e[1] == 0.0f) { /* Is the edge not activated? */ break; } if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ return x; } - sample(m_imageReader, x, y - 1, e); + sample_image_fn_(x, y - 1, e); if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ return x; } @@ -655,12 +883,12 @@ int SMAABlendingWeightCalculationOperation::searchXRight(int x, int y) while (x < end) { x++; - sample(m_imageReader, x, y, e); + sample_image_fn_(x, y, e); if (e[1] == 0.0f || /* Is the edge not activated? */ e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ break; } - sample(m_imageReader, x, y - 1, e); + sample_image_fn_(x, y - 1, e); if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ break; } @@ -675,14 +903,14 @@ int SMAABlendingWeightCalculationOperation::searchYUp(int x, int y) float e[4]; while (y > end) { - sample(m_imageReader, x, y, e); + sample_image_fn_(x, y, e); if (e[0] == 0.0f) { /* Is the edge not activated? */ break; } if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ return y; } - sample(m_imageReader, x - 1, y, e); + sample_image_fn_(x - 1, y, e); if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ return y; } @@ -699,12 +927,12 @@ int SMAABlendingWeightCalculationOperation::searchYDown(int x, int y) while (y < end) { y++; - sample(m_imageReader, x, y, e); + sample_image_fn_(x, y, e); if (e[0] == 0.0f || /* Is the edge not activated? */ e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ break; } - sample(m_imageReader, x - 1, y, e); + sample_image_fn_(x - 1, y, e); if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ break; } @@ -728,16 +956,16 @@ void SMAABlendingWeightCalculationOperation::detectHorizontalCornerPattern( /* Near the left corner */ if (d1 <= d2) { - sample(m_imageReader, left, y + 1, e); + sample_image_fn_(left, y + 1, e); factor[0] -= rounding * e[0]; - sample(m_imageReader, left, y - 2, e); + sample_image_fn_(left, y - 2, e); factor[1] -= rounding * e[0]; } /* Near the right corner */ if (d1 >= d2) { - sample(m_imageReader, right + 1, y + 1, e); + sample_image_fn_(right + 1, y + 1, e); factor[0] -= rounding * e[0]; - sample(m_imageReader, right + 1, y - 2, e); + sample_image_fn_(right + 1, y - 2, e); factor[1] -= rounding * e[0]; } @@ -757,16 +985,16 @@ void SMAABlendingWeightCalculationOperation::detectVerticalCornerPattern( /* Near the top corner */ if (d1 <= d2) { - sample(m_imageReader, x + 1, top, e); + sample_image_fn_(x + 1, top, e); factor[0] -= rounding * e[1]; - sample(m_imageReader, x - 2, top, e); + sample_image_fn_(x - 2, top, e); factor[1] -= rounding * e[1]; } /* Near the bottom corner */ if (d1 >= d2) { - sample(m_imageReader, x + 1, bottom + 1, e); + sample_image_fn_(x + 1, bottom + 1, e); factor[0] -= rounding * e[1]; - sample(m_imageReader, x - 2, bottom + 1, e); + sample_image_fn_(x - 2, bottom + 1, e); factor[1] -= rounding * e[1]; } @@ -847,6 +1075,59 @@ void SMAANeighborhoodBlendingOperation::executePixel(float output[4], madd_v4_v4fl(output, color2, weight2); } +void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &out_area, + Span inputs) +{ + MemoryBuffer *image1 = inputs[0]; + MemoryBuffer *image2 = inputs[1]; + for (BuffersIterator it = output->iterate_with({}, out_area); !it.is_end(); ++it) { + const float x = it.x; + const float y = it.y; + float w[4]; + + /* Fetch the blending weights for current pixel: */ + image2->read_elem_checked(x, y, w); + const float left = w[2], top = w[0]; + image2->read_elem_checked(x + 1, y, w); + const float right = w[3]; + image2->read_elem_checked(x, y + 1, w); + const float bottom = w[1]; + + /* Is there any blending weight with a value greater than 0.0? */ + if (right + bottom + left + top < 1e-5f) { + image1->read_elem_checked(x, y, it.out); + continue; + } + + /* Calculate the blending offsets: */ + void (*sample_fn)(MemoryBuffer * reader, int x, int y, float xoffset, float color[4]); + float offset1, offset2, weight1, weight2, color1[4], color2[4]; + + if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */ + sample_fn = sample_bilinear_horizontal; + offset1 = right; + offset2 = -left; + weight1 = right / (right + left); + weight2 = left / (right + left); + } + else { + sample_fn = sample_bilinear_vertical; + offset1 = bottom; + offset2 = -top; + weight1 = bottom / (bottom + top); + weight2 = top / (bottom + top); + } + + /* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */ + sample_fn(image1, x, y, offset1, color1); + sample_fn(image1, x, y, offset2, color2); + + mul_v4_v4fl(it.out, color1, weight1); + madd_v4_v4fl(it.out, color2, weight2); + } +} + void SMAANeighborhoodBlendingOperation::deinitExecution() { this->m_image1Reader = nullptr; @@ -866,4 +1147,12 @@ bool SMAANeighborhoodBlendingOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int UNUSED(input_idx), + const rcti &output_area, + rcti &r_input_area) +{ + r_input_area = output_area; + expand_area_for_sampler(r_input_area, PixelSampler::Bilinear); +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_SMAAOperation.h b/source/blender/compositor/operations/COM_SMAAOperation.h index 781762202b4..91b9299ee43 100644 --- a/source/blender/compositor/operations/COM_SMAAOperation.h +++ b/source/blender/compositor/operations/COM_SMAAOperation.h @@ -20,14 +20,14 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" namespace blender::compositor { /*-----------------------------------------------------------------------------*/ /* Edge Detection (First Pass) */ -class SMAAEdgeDetectionOperation : public NodeOperation { +class SMAAEdgeDetectionOperation : public MultiThreadedOperation { protected: SocketReader *m_imageReader; SocketReader *m_valueReader; @@ -60,15 +60,20 @@ class SMAAEdgeDetectionOperation : public NodeOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; /*-----------------------------------------------------------------------------*/ /* Blending Weight Calculation (Second Pass) */ -class SMAABlendingWeightCalculationOperation : public NodeOperation { +class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation { private: SocketReader *m_imageReader; - + std::function sample_image_fn_; int m_corner_rounding; public: @@ -96,6 +101,14 @@ class SMAABlendingWeightCalculationOperation : public NodeOperation { ReadBufferOperation *readOperation, rcti *output) override; + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_started(MemoryBuffer *output, + const rcti &area, + Span inputs) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; + private: /* Diagonal Search Functions */ int searchDiag1(int x, int y, int dir, bool *found); @@ -117,7 +130,7 @@ class SMAABlendingWeightCalculationOperation : public NodeOperation { /*-----------------------------------------------------------------------------*/ /* Neighborhood Blending (Third Pass) */ -class SMAANeighborhoodBlendingOperation : public NodeOperation { +class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation { private: SocketReader *m_image1Reader; SocketReader *m_image2Reader; @@ -144,6 +157,11 @@ class SMAANeighborhoodBlendingOperation : public NodeOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_VectorBlurOperation.cc b/source/blender/compositor/operations/COM_VectorBlurOperation.cc index df65044afc1..5405e6d424a 100644 --- a/source/blender/compositor/operations/COM_VectorBlurOperation.cc +++ b/source/blender/compositor/operations/COM_VectorBlurOperation.cc @@ -57,6 +57,7 @@ VectorBlurOperation::VectorBlurOperation() this->m_inputSpeedProgram = nullptr; this->m_inputZProgram = nullptr; flags.complex = true; + flags.is_fullframe_operation = true; } void VectorBlurOperation::initExecution() { @@ -121,6 +122,51 @@ bool VectorBlurOperation::determineDependingAreaOfInterest(rcti * /*input*/, return false; } +void VectorBlurOperation::get_area_of_interest(const int UNUSED(input_idx), + const rcti &UNUSED(output_area), + rcti &r_input_area) +{ + r_input_area.xmin = 0; + r_input_area.xmax = this->getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = this->getHeight(); +} + +void VectorBlurOperation::update_memory_buffer(MemoryBuffer *output, + const rcti &area, + Span inputs) +{ + /* TODO(manzanilla): once tiled implementation is removed, run multi-threaded where possible. */ + if (!m_cachedInstance) { + MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX]; + const bool is_image_inflated = image->is_a_single_elem(); + image = is_image_inflated ? image->inflate() : image; + + /* Must be a copy because it's modified in #generateVectorBlur. */ + MemoryBuffer *speed = inputs[SPEED_INPUT_INDEX]; + speed = speed->is_a_single_elem() ? speed->inflate() : new MemoryBuffer(*speed); + + MemoryBuffer *z = inputs[Z_INPUT_INDEX]; + const bool is_z_inflated = z->is_a_single_elem(); + z = is_z_inflated ? z->inflate() : z; + + m_cachedInstance = (float *)MEM_dupallocN(image->getBuffer()); + this->generateVectorBlur(m_cachedInstance, image, speed, z); + + if (is_image_inflated) { + delete image; + } + delete speed; + if (is_z_inflated) { + delete z; + } + } + + const int num_channels = COM_data_type_num_channels(getOutputSocket()->getDataType()); + MemoryBuffer buf(m_cachedInstance, num_channels, this->getWidth(), this->getHeight()); + output->copy_from(&buf, area); +} + void VectorBlurOperation::generateVectorBlur(float *data, MemoryBuffer *inputImage, MemoryBuffer *inputSpeed, diff --git a/source/blender/compositor/operations/COM_VectorBlurOperation.h b/source/blender/compositor/operations/COM_VectorBlurOperation.h index dfcf1fb16f7..c30c150db3c 100644 --- a/source/blender/compositor/operations/COM_VectorBlurOperation.h +++ b/source/blender/compositor/operations/COM_VectorBlurOperation.h @@ -26,6 +26,10 @@ namespace blender::compositor { class VectorBlurOperation : public NodeOperation, public QualityStepHelper { private: + static constexpr int IMAGE_INPUT_INDEX = 0; + static constexpr int Z_INPUT_INDEX = 1; + static constexpr int SPEED_INPUT_INDEX = 2; + /** * \brief Cached reference to the inputProgram */ @@ -68,6 +72,13 @@ class VectorBlurOperation : public NodeOperation, public QualityStepHelper { ReadBufferOperation *readOperation, rcti *output) override; + void get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) override; + void update_memory_buffer(MemoryBuffer *output, + const rcti &area, + Span inputs) override; + protected: void generateVectorBlur(float *data, MemoryBuffer *inputImage, -- cgit v1.2.3