Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Castilla <manzanillawork@gmail.com>2021-09-04 16:23:28 +0300
committerManuel Castilla <manzanillawork@gmail.com>2021-09-04 18:05:58 +0300
commit9d7cb5c4a1158266d2f8caa1fc19be2a00fdf101 (patch)
tree594efdc892cd171767fb6f207bfd540adf2cbc3b
parent9290b41381fdf02d1431b216de6477f93b2897cb (diff)
Compositor: Full frame filter nodes
Adds full frame implementation to Anti-Aliasing, Defocus, Denoise, Despeckle, Dilate/Erode, Directional Blur, Filter, Inpaint and Vector Blur nodes. The other nodes in "Filter" sub-menu are submitted separately. Part of T88150. Reviewed By: jbakker Differential Revision: https://developer.blender.org/D12219
-rw-r--r--source/blender/compositor/intern/COM_MemoryBuffer.h6
-rw-r--r--source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc27
-rw-r--r--source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h8
-rw-r--r--source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc77
-rw-r--r--source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h4
-rw-r--r--source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc58
-rw-r--r--source/blender/compositor/operations/COM_ConvolutionFilterOperation.h13
-rw-r--r--source/blender/compositor/operations/COM_DenoiseOperation.cc100
-rw-r--r--source/blender/compositor/operations/COM_DenoiseOperation.h15
-rw-r--r--source/blender/compositor/operations/COM_DespeckleOperation.cc107
-rw-r--r--source/blender/compositor/operations/COM_DespeckleOperation.h12
-rw-r--r--source/blender/compositor/operations/COM_DilateErodeOperation.cc355
-rw-r--r--source/blender/compositor/operations/COM_DilateErodeOperation.h40
-rw-r--r--source/blender/compositor/operations/COM_DirectionalBlurOperation.cc54
-rw-r--r--source/blender/compositor/operations/COM_DirectionalBlurOperation.h9
-rw-r--r--source/blender/compositor/operations/COM_InpaintOperation.cc44
-rw-r--r--source/blender/compositor/operations/COM_InpaintOperation.h7
-rw-r--r--source/blender/compositor/operations/COM_SMAAOperation.cc355
-rw-r--r--source/blender/compositor/operations/COM_SMAAOperation.h28
-rw-r--r--source/blender/compositor/operations/COM_VectorBlurOperation.cc46
-rw-r--r--source/blender/compositor/operations/COM_VectorBlurOperation.h11
21 files changed, 1286 insertions, 90 deletions
diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.h b/source/blender/compositor/intern/COM_MemoryBuffer.h
index f3e15c2a495..f730d53acec 100644
--- a/source/blender/compositor/intern/COM_MemoryBuffer.h
+++ b/source/blender/compositor/intern/COM_MemoryBuffer.h
@@ -373,6 +373,12 @@ class MemoryBuffer {
return this->m_buffer;
}
+ float *release_ownership_buffer()
+ {
+ owns_data_ = false;
+ return this->m_buffer;
+ }
+
MemoryBuffer *inflate() const;
inline void wrap_pixel(int &x, int &y, MemoryBufferExtend extend_x, MemoryBufferExtend extend_y)
diff --git a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc
index a9c58b55d73..405ba03abf3 100644
--- a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc
+++ b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc
@@ -116,4 +116,31 @@ void ConvertDepthToRadiusOperation::deinitExecution()
this->m_inputOperation = nullptr;
}
+void ConvertDepthToRadiusOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ const float z = *it.in(0);
+ if (z == 0.0f) {
+ *it.out = 0.0f;
+ continue;
+ }
+
+ const float inv_z = (1.0f / z);
+
+ /* Bug T6656 part 2b, do not re-scale. */
+#if 0
+ bcrad = 0.5f * fabs(aperture * (dof_sp * (cam_invfdist - iZ) - 1.0f));
+ /* Scale crad back to original maximum and blend:
+ * `crad->rect[px] = bcrad + wts->rect[px] * (scf * crad->rect[px] - bcrad);` */
+#endif
+ const float radius = 0.5f *
+ fabsf(m_aperture * (m_dof_sp * (m_inverseFocalDistance - inv_z) - 1.0f));
+ /* Bug T6615, limit minimum radius to 1 pixel,
+ * not really a solution, but somewhat mitigates the problem. */
+ *it.out = CLAMPIS(radius, 0.0f, m_maxRadius);
+ }
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h
index 1f4e856b128..3d163843d06 100644
--- a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h
+++ b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h
@@ -19,7 +19,7 @@
#pragma once
#include "COM_FastGaussianBlurOperation.h"
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
#include "DNA_object_types.h"
namespace blender::compositor {
@@ -28,7 +28,7 @@ namespace blender::compositor {
* this program converts an input color to an output value.
* it assumes we are in sRGB color space.
*/
-class ConvertDepthToRadiusOperation : public NodeOperation {
+class ConvertDepthToRadiusOperation : public MultiThreadedOperation {
private:
/**
* Cached reference to the inputProgram
@@ -83,6 +83,10 @@ class ConvertDepthToRadiusOperation : public NodeOperation {
{
this->m_blurPostOperation = operation;
}
+
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc
index 5ead300a368..9127a871b04 100644
--- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc
+++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc
@@ -95,4 +95,81 @@ void ConvolutionEdgeFilterOperation::executePixel(float output[4], int x, int y,
output[3] = MAX2(output[3], 0.0f);
}
+void ConvolutionEdgeFilterOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+ const int last_x = getWidth() - 1;
+ const int last_y = getHeight() - 1;
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ const int left_offset = (it.x == 0) ? 0 : -image->elem_stride;
+ const int right_offset = (it.x == last_x) ? 0 : image->elem_stride;
+ const int down_offset = (it.y == 0) ? 0 : -image->row_stride;
+ const int up_offset = (it.y == last_y) ? 0 : image->row_stride;
+
+ const float *center_color = it.in(IMAGE_INPUT_INDEX);
+ float res1[4] = {0};
+ float res2[4] = {0};
+
+ const float *color = center_color + down_offset + left_offset;
+ madd_v3_v3fl(res1, color, m_filter[0]);
+ copy_v3_v3(res2, res1);
+
+ color = center_color + down_offset;
+ madd_v3_v3fl(res1, color, m_filter[1]);
+ madd_v3_v3fl(res2, color, m_filter[3]);
+
+ color = center_color + down_offset + right_offset;
+ madd_v3_v3fl(res1, color, m_filter[2]);
+ madd_v3_v3fl(res2, color, m_filter[6]);
+
+ color = center_color + left_offset;
+ madd_v3_v3fl(res1, color, m_filter[3]);
+ madd_v3_v3fl(res2, color, m_filter[1]);
+
+ {
+ float rgb_filtered[3];
+ mul_v3_v3fl(rgb_filtered, center_color, m_filter[4]);
+ add_v3_v3(res1, rgb_filtered);
+ add_v3_v3(res2, rgb_filtered);
+ }
+
+ color = center_color + right_offset;
+ madd_v3_v3fl(res1, color, m_filter[5]);
+ madd_v3_v3fl(res2, color, m_filter[7]);
+
+ color = center_color + up_offset + left_offset;
+ madd_v3_v3fl(res1, color, m_filter[6]);
+ madd_v3_v3fl(res2, color, m_filter[2]);
+
+ color = center_color + up_offset;
+ madd_v3_v3fl(res1, color, m_filter[7]);
+ madd_v3_v3fl(res2, color, m_filter[5]);
+
+ {
+ color = center_color + up_offset + right_offset;
+ float rgb_filtered[3];
+ mul_v3_v3fl(rgb_filtered, color, m_filter[8]);
+ add_v3_v3(res1, rgb_filtered);
+ add_v3_v3(res2, rgb_filtered);
+ }
+
+ it.out[0] = sqrt(res1[0] * res1[0] + res2[0] * res2[0]);
+ it.out[1] = sqrt(res1[1] * res1[1] + res2[1] * res2[1]);
+ it.out[2] = sqrt(res1[2] * res1[2] + res2[2] * res2[2]);
+
+ const float factor = *it.in(FACTOR_INPUT_INDEX);
+ const float m_factor = 1.0f - factor;
+ it.out[0] = it.out[0] * factor + center_color[0] * m_factor;
+ it.out[1] = it.out[1] * factor + center_color[1] * m_factor;
+ it.out[2] = it.out[2] * factor + center_color[2] * m_factor;
+
+ it.out[3] = center_color[3];
+
+ /* Make sure we don't return negative color. */
+ CLAMP4_MIN(it.out, 0.0f);
+ }
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h
index 319b424bd4a..bd38e27165a 100644
--- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h
+++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h
@@ -25,6 +25,10 @@ namespace blender::compositor {
class ConvolutionEdgeFilterOperation : public ConvolutionFilterOperation {
public:
void executePixel(float output[4], int x, int y, void *data) override;
+
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc
index 72cbbf4283a..11a077229fd 100644
--- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc
+++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc
@@ -127,4 +127,62 @@ bool ConvolutionFilterOperation::determineDependingAreaOfInterest(
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void ConvolutionFilterOperation::get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ switch (input_idx) {
+ case IMAGE_INPUT_INDEX: {
+ const int add_x = (m_filterWidth - 1) / 2 + 1;
+ const int add_y = (m_filterHeight - 1) / 2 + 1;
+ r_input_area.xmin = output_area.xmin - add_x;
+ r_input_area.xmax = output_area.xmax + add_x;
+ r_input_area.ymin = output_area.ymin - add_y;
+ r_input_area.ymax = output_area.ymax + add_y;
+ break;
+ }
+ case FACTOR_INPUT_INDEX: {
+ r_input_area = output_area;
+ break;
+ }
+ }
+}
+
+void ConvolutionFilterOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+ const int last_x = getWidth() - 1;
+ const int last_y = getHeight() - 1;
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ const int left_offset = (it.x == 0) ? 0 : -image->elem_stride;
+ const int right_offset = (it.x == last_x) ? 0 : image->elem_stride;
+ const int down_offset = (it.y == 0) ? 0 : -image->row_stride;
+ const int up_offset = (it.y == last_y) ? 0 : image->row_stride;
+
+ const float *center_color = it.in(IMAGE_INPUT_INDEX);
+ zero_v4(it.out);
+ madd_v4_v4fl(it.out, center_color + down_offset + left_offset, m_filter[0]);
+ madd_v4_v4fl(it.out, center_color + down_offset, m_filter[1]);
+ madd_v4_v4fl(it.out, center_color + down_offset + right_offset, m_filter[2]);
+ madd_v4_v4fl(it.out, center_color + left_offset, m_filter[3]);
+ madd_v4_v4fl(it.out, center_color, m_filter[4]);
+ madd_v4_v4fl(it.out, center_color + right_offset, m_filter[5]);
+ madd_v4_v4fl(it.out, center_color + up_offset + left_offset, m_filter[6]);
+ madd_v4_v4fl(it.out, center_color + up_offset, m_filter[7]);
+ madd_v4_v4fl(it.out, center_color + up_offset + right_offset, m_filter[8]);
+
+ const float factor = *it.in(FACTOR_INPUT_INDEX);
+ const float m_factor = 1.0f - factor;
+ it.out[0] = it.out[0] * factor + center_color[0] * m_factor;
+ it.out[1] = it.out[1] * factor + center_color[1] * m_factor;
+ it.out[2] = it.out[2] * factor + center_color[2] * m_factor;
+ it.out[3] = it.out[3] * factor + center_color[3] * m_factor;
+
+ /* Make sure we don't return negative color. */
+ CLAMP4_MIN(it.out, 0.0f);
+ }
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h
index 16dee502929..7e12c7faa5c 100644
--- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h
+++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h
@@ -18,11 +18,15 @@
#pragma once
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
namespace blender::compositor {
-class ConvolutionFilterOperation : public NodeOperation {
+class ConvolutionFilterOperation : public MultiThreadedOperation {
+ protected:
+ static constexpr int IMAGE_INPUT_INDEX = 0;
+ static constexpr int FACTOR_INPUT_INDEX = 1;
+
private:
int m_filterWidth;
int m_filterHeight;
@@ -43,6 +47,11 @@ class ConvolutionFilterOperation : public NodeOperation {
void initExecution() override;
void deinitExecution() override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final;
+ virtual void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.cc b/source/blender/compositor/operations/COM_DenoiseOperation.cc
index ec11ad4d69a..e7f2d5a740a 100644
--- a/source/blender/compositor/operations/COM_DenoiseOperation.cc
+++ b/source/blender/compositor/operations/COM_DenoiseOperation.cc
@@ -35,6 +35,8 @@ DenoiseOperation::DenoiseOperation()
this->addInputSocket(DataType::Color);
this->addOutputSocket(DataType::Color);
this->m_settings = nullptr;
+ flags.is_fullframe_operation = true;
+ output_rendered_ = false;
}
void DenoiseOperation::initExecution()
{
@@ -63,8 +65,7 @@ MemoryBuffer *DenoiseOperation::createMemoryBuffer(rcti *rect2)
rect.xmax = getWidth();
rect.ymax = getHeight();
MemoryBuffer *result = new MemoryBuffer(DataType::Color, rect);
- float *data = result->getBuffer();
- this->generateDenoise(data, tileColor, tileNormal, tileAlbedo, this->m_settings);
+ this->generateDenoise(result, tileColor, tileNormal, tileAlbedo, this->m_settings);
return result;
}
@@ -84,23 +85,33 @@ bool DenoiseOperation::determineDependingAreaOfInterest(rcti * /*input*/,
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
-void DenoiseOperation::generateDenoise(float *data,
- MemoryBuffer *inputTileColor,
- MemoryBuffer *inputTileNormal,
- MemoryBuffer *inputTileAlbedo,
+void DenoiseOperation::generateDenoise(MemoryBuffer *output,
+ MemoryBuffer *input_color,
+ MemoryBuffer *input_normal,
+ MemoryBuffer *input_albedo,
NodeDenoise *settings)
{
- float *inputBufferColor = inputTileColor->getBuffer();
- BLI_assert(inputBufferColor);
- if (!inputBufferColor) {
+ BLI_assert(input_color->getBuffer());
+ if (!input_color->getBuffer()) {
return;
}
+
#ifdef WITH_OPENIMAGEDENOISE
/* Always supported through Accelerate framework BNNS on macOS. */
# ifndef __APPLE__
if (BLI_cpu_support_sse41())
# endif
{
+ /* OpenImageDenoise needs full buffers. */
+ MemoryBuffer *buf_color = input_color->is_a_single_elem() ? input_color->inflate() :
+ input_color;
+ MemoryBuffer *buf_normal = input_normal && input_normal->is_a_single_elem() ?
+ input_normal->inflate() :
+ input_normal;
+ MemoryBuffer *buf_albedo = input_albedo && input_albedo->is_a_single_elem() ?
+ input_albedo->inflate() :
+ input_albedo;
+
/* Since it's memory intensive, it's better to run only one instance of OIDN at a time.
* OpenImageDenoise is multithreaded internally and should use all available cores nonetheless.
*/
@@ -111,35 +122,35 @@ void DenoiseOperation::generateDenoise(float *data,
oidn::FilterRef filter = device.newFilter("RT");
filter.setImage("color",
- inputBufferColor,
+ buf_color->getBuffer(),
oidn::Format::Float3,
- inputTileColor->getWidth(),
- inputTileColor->getHeight(),
+ buf_color->getWidth(),
+ buf_color->getHeight(),
0,
sizeof(float[4]));
- if (inputTileNormal && inputTileNormal->getBuffer()) {
+ if (buf_normal && buf_normal->getBuffer()) {
filter.setImage("normal",
- inputTileNormal->getBuffer(),
+ buf_normal->getBuffer(),
oidn::Format::Float3,
- inputTileNormal->getWidth(),
- inputTileNormal->getHeight(),
+ buf_normal->getWidth(),
+ buf_normal->getHeight(),
0,
sizeof(float[3]));
}
- if (inputTileAlbedo && inputTileAlbedo->getBuffer()) {
+ if (buf_albedo && buf_albedo->getBuffer()) {
filter.setImage("albedo",
- inputTileAlbedo->getBuffer(),
+ buf_albedo->getBuffer(),
oidn::Format::Float3,
- inputTileAlbedo->getWidth(),
- inputTileAlbedo->getHeight(),
+ buf_albedo->getWidth(),
+ buf_albedo->getHeight(),
0,
sizeof(float[4]));
}
filter.setImage("output",
- data,
+ output->getBuffer(),
oidn::Format::Float3,
- inputTileColor->getWidth(),
- inputTileColor->getHeight(),
+ buf_color->getWidth(),
+ buf_color->getHeight(),
0,
sizeof(float[4]));
@@ -153,19 +164,46 @@ void DenoiseOperation::generateDenoise(float *data,
filter.execute();
BLI_mutex_unlock(&oidn_lock);
- /* copy the alpha channel, OpenImageDenoise currently only supports RGB */
- size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight();
- for (size_t i = 0; i < numPixels; i++) {
- data[i * 4 + 3] = inputBufferColor[i * 4 + 3];
+ /* Copy the alpha channel, OpenImageDenoise currently only supports RGB. */
+ output->copy_from(input_color, input_color->get_rect(), 3, COM_DATA_TYPE_VALUE_CHANNELS, 3);
+
+ /* Delete inflated buffers. */
+ if (input_color->is_a_single_elem()) {
+ delete buf_color;
+ }
+ if (input_normal && input_normal->is_a_single_elem()) {
+ delete buf_normal;
}
+ if (input_albedo && input_albedo->is_a_single_elem()) {
+ delete buf_albedo;
+ }
+
return;
}
#endif
/* If built without OIDN or running on an unsupported CPU, just pass through. */
- UNUSED_VARS(inputTileAlbedo, inputTileNormal, settings);
- ::memcpy(data,
- inputBufferColor,
- sizeof(float[4]) * inputTileColor->getWidth() * inputTileColor->getHeight());
+ UNUSED_VARS(input_albedo, input_normal, settings);
+ output->copy_from(input_color, input_color->get_rect());
+}
+
+void DenoiseOperation::get_area_of_interest(const int UNUSED(input_idx),
+ const rcti &UNUSED(output_area),
+ rcti &r_input_area)
+{
+ r_input_area.xmin = 0;
+ r_input_area.xmax = this->getWidth();
+ r_input_area.ymin = 0;
+ r_input_area.ymax = this->getHeight();
+}
+
+void DenoiseOperation::update_memory_buffer(MemoryBuffer *output,
+ const rcti &UNUSED(area),
+ Span<MemoryBuffer *> inputs)
+{
+ if (!output_rendered_) {
+ this->generateDenoise(output, inputs[0], inputs[1], inputs[2], m_settings);
+ output_rendered_ = true;
+ }
}
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.h b/source/blender/compositor/operations/COM_DenoiseOperation.h
index a9298c17e92..48209c3eacf 100644
--- a/source/blender/compositor/operations/COM_DenoiseOperation.h
+++ b/source/blender/compositor/operations/COM_DenoiseOperation.h
@@ -37,6 +37,8 @@ class DenoiseOperation : public SingleThreadedOperation {
*/
NodeDenoise *m_settings;
+ bool output_rendered_;
+
public:
DenoiseOperation();
/**
@@ -57,11 +59,16 @@ class DenoiseOperation : public SingleThreadedOperation {
ReadBufferOperation *readOperation,
rcti *output) override;
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
+
protected:
- void generateDenoise(float *data,
- MemoryBuffer *inputTileColor,
- MemoryBuffer *inputTileNormal,
- MemoryBuffer *inputTileAlbedo,
+ void generateDenoise(MemoryBuffer *output,
+ MemoryBuffer *input_color,
+ MemoryBuffer *input_normal,
+ MemoryBuffer *input_albedo,
NodeDenoise *settings);
MemoryBuffer *createMemoryBuffer(rcti *rect) override;
diff --git a/source/blender/compositor/operations/COM_DespeckleOperation.cc b/source/blender/compositor/operations/COM_DespeckleOperation.cc
index fc8778c7d2e..19bd7b2af6f 100644
--- a/source/blender/compositor/operations/COM_DespeckleOperation.cc
+++ b/source/blender/compositor/operations/COM_DespeckleOperation.cc
@@ -127,6 +127,11 @@ void DespeckleOperation::executePixel(float output[4], int x, int y, void * /*da
else {
copy_v4_v4(output, color_org);
}
+
+#undef TOT_DIV_ONE
+#undef TOT_DIV_CNR
+#undef WTOT
+#undef COLOR_ADD
}
bool DespeckleOperation::determineDependingAreaOfInterest(rcti *input,
@@ -144,4 +149,106 @@ bool DespeckleOperation::determineDependingAreaOfInterest(rcti *input,
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void DespeckleOperation::get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ switch (input_idx) {
+ case IMAGE_INPUT_INDEX: {
+ const int add_x = 2; //(this->m_filterWidth - 1) / 2 + 1;
+ const int add_y = 2; //(this->m_filterHeight - 1) / 2 + 1;
+ r_input_area.xmin = output_area.xmin - add_x;
+ r_input_area.xmax = output_area.xmax + add_x;
+ r_input_area.ymin = output_area.ymin - add_y;
+ r_input_area.ymax = output_area.ymax + add_y;
+ break;
+ }
+ case FACTOR_INPUT_INDEX: {
+ r_input_area = output_area;
+ break;
+ }
+ }
+}
+
+void DespeckleOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+ const int last_x = getWidth() - 1;
+ const int last_y = getHeight() - 1;
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ const int x1 = MAX2(it.x - 1, 0);
+ const int x2 = it.x;
+ const int x3 = MIN2(it.x + 1, last_x);
+ const int y1 = MAX2(it.y - 1, 0);
+ const int y2 = it.y;
+ const int y3 = MIN2(it.y + 1, last_y);
+
+ float w = 0.0f;
+ const float *color_org = it.in(IMAGE_INPUT_INDEX);
+ float color_mid[4];
+ float color_mid_ok[4];
+ const float *in1 = nullptr;
+
+#define TOT_DIV_ONE 1.0f
+#define TOT_DIV_CNR (float)M_SQRT1_2
+
+#define WTOT (TOT_DIV_ONE * 4 + TOT_DIV_CNR * 4)
+
+#define COLOR_ADD(fac) \
+ { \
+ madd_v4_v4fl(color_mid, in1, fac); \
+ if (color_diff(in1, color_org, m_threshold)) { \
+ w += fac; \
+ madd_v4_v4fl(color_mid_ok, in1, fac); \
+ } \
+ }
+
+ zero_v4(color_mid);
+ zero_v4(color_mid_ok);
+
+ in1 = image->get_elem(x1, y1);
+ COLOR_ADD(TOT_DIV_CNR)
+ in1 = image->get_elem(x2, y1);
+ COLOR_ADD(TOT_DIV_ONE)
+ in1 = image->get_elem(x3, y1);
+ COLOR_ADD(TOT_DIV_CNR)
+ in1 = image->get_elem(x1, y2);
+ COLOR_ADD(TOT_DIV_ONE)
+
+#if 0
+ const float* in2 = image->get_elem(x2, y2);
+ madd_v4_v4fl(color_mid, in2, this->m_filter[4]);
+#endif
+
+ in1 = image->get_elem(x3, y2);
+ COLOR_ADD(TOT_DIV_ONE)
+ in1 = image->get_elem(x1, y3);
+ COLOR_ADD(TOT_DIV_CNR)
+ in1 = image->get_elem(x2, y3);
+ COLOR_ADD(TOT_DIV_ONE)
+ in1 = image->get_elem(x3, y3);
+ COLOR_ADD(TOT_DIV_CNR)
+
+ mul_v4_fl(color_mid, 1.0f / (4.0f + (4.0f * (float)M_SQRT1_2)));
+ // mul_v4_fl(color_mid, 1.0f / w);
+
+ if ((w != 0.0f) && ((w / WTOT) > (m_threshold_neighbor)) &&
+ color_diff(color_mid, color_org, m_threshold)) {
+ const float factor = *it.in(FACTOR_INPUT_INDEX);
+ mul_v4_fl(color_mid_ok, 1.0f / w);
+ interp_v4_v4v4(it.out, color_org, color_mid_ok, factor);
+ }
+ else {
+ copy_v4_v4(it.out, color_org);
+ }
+
+#undef TOT_DIV_ONE
+#undef TOT_DIV_CNR
+#undef WTOT
+#undef COLOR_ADD
+ }
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DespeckleOperation.h b/source/blender/compositor/operations/COM_DespeckleOperation.h
index e8d3461d2ec..70d6c2227f4 100644
--- a/source/blender/compositor/operations/COM_DespeckleOperation.h
+++ b/source/blender/compositor/operations/COM_DespeckleOperation.h
@@ -18,12 +18,15 @@
#pragma once
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
namespace blender::compositor {
-class DespeckleOperation : public NodeOperation {
+class DespeckleOperation : public MultiThreadedOperation {
private:
+ constexpr static int IMAGE_INPUT_INDEX = 0;
+ constexpr static int FACTOR_INPUT_INDEX = 1;
+
float m_threshold;
float m_threshold_neighbor;
@@ -52,6 +55,11 @@ class DespeckleOperation : public NodeOperation {
void initExecution() override;
void deinitExecution() override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.cc b/source/blender/compositor/operations/COM_DilateErodeOperation.cc
index c459d09f02c..28b40021cd9 100644
--- a/source/blender/compositor/operations/COM_DilateErodeOperation.cc
+++ b/source/blender/compositor/operations/COM_DilateErodeOperation.cc
@@ -35,9 +35,9 @@ DilateErodeThresholdOperation::DilateErodeThresholdOperation()
this->m__switch = 0.5f;
this->m_distance = 0.0f;
}
-void DilateErodeThresholdOperation::initExecution()
+
+void DilateErodeThresholdOperation::init_data()
{
- this->m_inputProgram = this->getInputSocketReader(0);
if (this->m_distance < 0.0f) {
this->m_scope = -this->m_distance + this->m_inset;
}
@@ -54,6 +54,11 @@ void DilateErodeThresholdOperation::initExecution()
}
}
+void DilateErodeThresholdOperation::initExecution()
+{
+ this->m_inputProgram = this->getInputSocketReader(0);
+}
+
void *DilateErodeThresholdOperation::initializeTileData(rcti * /*rect*/)
{
void *buffer = this->m_inputProgram->initializeTileData(nullptr);
@@ -160,6 +165,112 @@ bool DilateErodeThresholdOperation::determineDependingAreaOfInterest(
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void DilateErodeThresholdOperation::get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ BLI_assert(input_idx == 0);
+ UNUSED_VARS_NDEBUG(input_idx);
+ r_input_area.xmin = output_area.xmin - m_scope;
+ r_input_area.xmax = output_area.xmax + m_scope;
+ r_input_area.ymin = output_area.ymin - m_scope;
+ r_input_area.ymax = output_area.ymax + m_scope;
+}
+
+struct DilateErodeThresholdOperation::PixelData {
+ int x;
+ int y;
+ int xmin;
+ int xmax;
+ int ymin;
+ int ymax;
+ const float *elem;
+ float distance;
+ int elem_stride;
+ int row_stride;
+ /** Switch. */
+ float sw;
+};
+
+template<template<typename> typename TCompare>
+static float get_min_distance(DilateErodeThresholdOperation::PixelData &p)
+{
+ /* TODO(manzanilla): bad performance, generate a table with relative offsets on operation
+ * initialization to loop from less to greater distance and break as soon as #compare is
+ * true. */
+ const TCompare compare;
+ float min_dist = p.distance;
+ const float *row = p.elem + ((intptr_t)p.ymin - p.y) * p.row_stride +
+ ((intptr_t)p.xmin - p.x) * p.elem_stride;
+ for (int yi = p.ymin; yi < p.ymax; yi++) {
+ const float dy = yi - p.y;
+ const float dist_y = dy * dy;
+ const float *elem = row;
+ for (int xi = p.xmin; xi < p.xmax; xi++) {
+ if (compare(*elem, p.sw)) {
+ const float dx = xi - p.x;
+ const float dist = dx * dx + dist_y;
+ min_dist = MIN2(min_dist, dist);
+ }
+ elem += p.elem_stride;
+ }
+ row += p.row_stride;
+ }
+ return min_dist;
+}
+
+void DilateErodeThresholdOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *input = inputs[0];
+ const rcti &input_rect = input->get_rect();
+ const float rd = m_scope * m_scope;
+ const float inset = m_inset;
+
+ PixelData p;
+ p.sw = m__switch;
+ p.distance = rd * 2;
+ p.elem_stride = input->elem_stride;
+ p.row_stride = input->row_stride;
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ p.x = it.x;
+ p.y = it.y;
+ p.xmin = MAX2(p.x - m_scope, input_rect.xmin);
+ p.ymin = MAX2(p.y - m_scope, input_rect.ymin);
+ p.xmax = MIN2(p.x + m_scope, input_rect.xmax);
+ p.ymax = MIN2(p.y + m_scope, input_rect.ymax);
+ p.elem = it.in(0);
+
+ float pixel_value;
+ if (*p.elem > p.sw) {
+ pixel_value = -sqrtf(get_min_distance<std::less>(p));
+ }
+ else {
+ pixel_value = sqrtf(get_min_distance<std::greater>(p));
+ }
+
+ if (m_distance > 0.0f) {
+ const float delta = m_distance - pixel_value;
+ if (delta >= 0.0f) {
+ *it.out = delta >= inset ? 1.0f : delta / inset;
+ }
+ else {
+ *it.out = 0.0f;
+ }
+ }
+ else {
+ const float delta = -m_distance + pixel_value;
+ if (delta < 0.0f) {
+ *it.out = delta < -inset ? 1.0f : (-delta) / inset;
+ }
+ else {
+ *it.out = 0.0f;
+ }
+ }
+ }
+}
+
/* Dilate Distance. */
DilateDistanceOperation::DilateDistanceOperation()
{
@@ -170,15 +281,20 @@ DilateDistanceOperation::DilateDistanceOperation()
flags.complex = true;
flags.open_cl = true;
}
-void DilateDistanceOperation::initExecution()
+
+void DilateDistanceOperation::init_data()
{
- this->m_inputProgram = this->getInputSocketReader(0);
this->m_scope = this->m_distance;
if (this->m_scope < 3) {
this->m_scope = 3;
}
}
+void DilateDistanceOperation::initExecution()
+{
+ this->m_inputProgram = this->getInputSocketReader(0);
+}
+
void *DilateDistanceOperation::initializeTileData(rcti * /*rect*/)
{
void *buffer = this->m_inputProgram->initializeTileData(nullptr);
@@ -258,6 +374,92 @@ void DilateDistanceOperation::executeOpenCL(OpenCLDevice *device,
device->COM_clEnqueueRange(dilateKernel, outputMemoryBuffer, 7, this);
}
+void DilateDistanceOperation::get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ BLI_assert(input_idx == 0);
+ UNUSED_VARS_NDEBUG(input_idx);
+ r_input_area.xmin = output_area.xmin - m_scope;
+ r_input_area.xmax = output_area.xmax + m_scope;
+ r_input_area.ymin = output_area.ymin - m_scope;
+ r_input_area.ymax = output_area.ymax + m_scope;
+}
+
+struct DilateDistanceOperation::PixelData {
+ int x;
+ int y;
+ int xmin;
+ int xmax;
+ int ymin;
+ int ymax;
+ const float *elem;
+ float min_distance;
+ int scope;
+ int elem_stride;
+ int row_stride;
+ const rcti &input_rect;
+
+ PixelData(MemoryBuffer *input, const int distance, const int scope)
+ : min_distance(distance * distance),
+ scope(scope),
+ elem_stride(input->elem_stride),
+ row_stride(input->row_stride),
+ input_rect(input->get_rect())
+ {
+ }
+
+ void update(BuffersIterator<float> &it)
+ {
+ x = it.x;
+ y = it.y;
+ xmin = MAX2(x - scope, input_rect.xmin);
+ ymin = MAX2(y - scope, input_rect.ymin);
+ xmax = MIN2(x + scope, input_rect.xmax);
+ ymax = MIN2(y + scope, input_rect.ymax);
+ elem = it.in(0);
+ }
+};
+
+template<template<typename> typename TCompare>
+static float get_distance_value(DilateDistanceOperation::PixelData &p, const float start_value)
+{
+ /* TODO(manzanilla): bad performance, only loop elements within minimum distance removing
+ * coordinates and conditional if `dist <= min_dist`. May need to generate a table of offsets. */
+ const TCompare compare;
+ const float min_dist = p.min_distance;
+ float value = start_value;
+ const float *row = p.elem + ((intptr_t)p.ymin - p.y) * p.row_stride +
+ ((intptr_t)p.xmin - p.x) * p.elem_stride;
+ for (int yi = p.ymin; yi < p.ymax; yi++) {
+ const float dy = yi - p.y;
+ const float dist_y = dy * dy;
+ const float *elem = row;
+ for (int xi = p.xmin; xi < p.xmax; xi++) {
+ const float dx = xi - p.x;
+ const float dist = dx * dx + dist_y;
+ if (dist <= min_dist) {
+ value = compare(*elem, value) ? *elem : value;
+ }
+ elem += p.elem_stride;
+ }
+ row += p.row_stride;
+ }
+
+ return value;
+}
+
+void DilateDistanceOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ PixelData p(inputs[0], m_distance, m_scope);
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ p.update(it);
+ *it.out = get_distance_value<std::greater>(p, 0.0f);
+ }
+}
+
/* Erode Distance */
ErodeDistanceOperation::ErodeDistanceOperation() : DilateDistanceOperation()
{
@@ -318,6 +520,17 @@ void ErodeDistanceOperation::executeOpenCL(OpenCLDevice *device,
device->COM_clEnqueueRange(erodeKernel, outputMemoryBuffer, 7, this);
}
+void ErodeDistanceOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ PixelData p(inputs[0], m_distance, m_scope);
+ for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+ p.update(it);
+ *it.out = get_distance_value<std::less>(p, 1.0f);
+ }
+}
+
/* Dilate step */
DilateStepOperation::DilateStepOperation()
{
@@ -475,6 +688,126 @@ bool DilateStepOperation::determineDependingAreaOfInterest(rcti *input,
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void DilateStepOperation::get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ BLI_assert(input_idx == 0);
+ UNUSED_VARS_NDEBUG(input_idx);
+ r_input_area.xmin = output_area.xmin - m_iterations;
+ r_input_area.xmax = output_area.xmax + m_iterations;
+ r_input_area.ymin = output_area.ymin - m_iterations;
+ r_input_area.ymax = output_area.ymax + m_iterations;
+}
+
+template<typename TCompareSelector>
+static void step_update_memory_buffer(MemoryBuffer *output,
+ const MemoryBuffer *input,
+ const rcti &area,
+ const int num_iterations,
+ const float compare_min_value)
+{
+ TCompareSelector selector;
+
+ const int width = output->getWidth();
+ const int height = output->getHeight();
+
+ const int half_window = num_iterations;
+ const int window = half_window * 2 + 1;
+
+ const int xmin = MAX2(0, area.xmin - half_window);
+ const int ymin = MAX2(0, area.ymin - half_window);
+ const int xmax = MIN2(width, area.xmax + half_window);
+ const int ymax = MIN2(height, area.ymax + half_window);
+
+ const int bwidth = area.xmax - area.xmin;
+ const int bheight = area.ymax - area.ymin;
+
+ /* NOTE: #result has area width, but new height.
+ * We have to calculate the additional rows in the first pass,
+ * to have valid data available for the second pass. */
+ rcti result_area;
+ BLI_rcti_init(&result_area, area.xmin, area.xmax, ymin, ymax);
+ MemoryBuffer result(DataType::Value, result_area);
+
+ /* #temp holds maxima for every step in the algorithm, #buf holds a
+ * single row or column of input values, padded with #limit values to
+ * simplify the logic. */
+ float *temp = (float *)MEM_mallocN(sizeof(float) * (2 * window - 1), "dilate erode temp");
+ float *buf = (float *)MEM_mallocN(sizeof(float) * (MAX2(bwidth, bheight) + 5 * half_window),
+ "dilate erode buf");
+
+ /* The following is based on the van Herk/Gil-Werman algorithm for morphology operations. */
+ /* First pass, horizontal dilate/erode. */
+ for (int y = ymin; y < ymax; y++) {
+ for (int x = 0; x < bwidth + 5 * half_window; x++) {
+ buf[x] = compare_min_value;
+ }
+ for (int x = xmin; x < xmax; x++) {
+ buf[x - area.xmin + window - 1] = input->get_value(x, y, 0);
+ }
+
+ for (int i = 0; i < (bwidth + 3 * half_window) / window; i++) {
+ int start = (i + 1) * window - 1;
+
+ temp[window - 1] = buf[start];
+ for (int x = 1; x < window; x++) {
+ temp[window - 1 - x] = selector(temp[window - x], buf[start - x]);
+ temp[window - 1 + x] = selector(temp[window + x - 2], buf[start + x]);
+ }
+
+ start = half_window + (i - 1) * window + 1;
+ for (int x = -MIN2(0, start); x < window - MAX2(0, start + window - bwidth); x++) {
+ result.get_value(start + x + area.xmin, y, 0) = selector(temp[x], temp[x + window - 1]);
+ }
+ }
+ }
+
+ /* Second pass, vertical dilate/erode. */
+ for (int x = 0; x < bwidth; x++) {
+ for (int y = 0; y < bheight + 5 * half_window; y++) {
+ buf[y] = compare_min_value;
+ }
+ for (int y = ymin; y < ymax; y++) {
+ buf[y - area.ymin + window - 1] = result.get_value(x + area.xmin, y, 0);
+ }
+
+ for (int i = 0; i < (bheight + 3 * half_window) / window; i++) {
+ int start = (i + 1) * window - 1;
+
+ temp[window - 1] = buf[start];
+ for (int y = 1; y < window; y++) {
+ temp[window - 1 - y] = selector(temp[window - y], buf[start - y]);
+ temp[window - 1 + y] = selector(temp[window + y - 2], buf[start + y]);
+ }
+
+ start = half_window + (i - 1) * window + 1;
+ for (int y = -MIN2(0, start); y < window - MAX2(0, start + window - bheight); y++) {
+ result.get_value(x, y + start + area.ymin, 0) = selector(temp[y], temp[y + window - 1]);
+ }
+ }
+ }
+
+ MEM_freeN(temp);
+ MEM_freeN(buf);
+
+ output->copy_from(&result, area);
+}
+
+struct Max2Selector {
+ float operator()(float f1, float f2) const
+ {
+ return MAX2(f1, f2);
+ }
+};
+
+void DilateStepOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ step_update_memory_buffer<Max2Selector>(output, inputs[0], area, m_iterations, -FLT_MAX);
+}
+
/* Erode step */
ErodeStepOperation::ErodeStepOperation() : DilateStepOperation()
{
@@ -571,4 +904,18 @@ void *ErodeStepOperation::initializeTileData(rcti *rect)
return result;
}
+struct Min2Selector {
+ float operator()(float f1, float f2) const
+ {
+ return MIN2(f1, f2);
+ }
+};
+
+void ErodeStepOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ step_update_memory_buffer<Min2Selector>(output, inputs[0], area, m_iterations, FLT_MAX);
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.h b/source/blender/compositor/operations/COM_DilateErodeOperation.h
index a489e293e8e..9c32a5ac1fd 100644
--- a/source/blender/compositor/operations/COM_DilateErodeOperation.h
+++ b/source/blender/compositor/operations/COM_DilateErodeOperation.h
@@ -18,11 +18,14 @@
#pragma once
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
namespace blender::compositor {
-class DilateErodeThresholdOperation : public NodeOperation {
+class DilateErodeThresholdOperation : public MultiThreadedOperation {
+ public:
+ struct PixelData;
+
private:
/**
* Cached reference to the inputProgram
@@ -47,6 +50,7 @@ class DilateErodeThresholdOperation : public NodeOperation {
*/
void executePixel(float output[4], int x, int y, void *data) override;
+ void init_data() override;
/**
* Initialize the execution
*/
@@ -74,10 +78,17 @@ class DilateErodeThresholdOperation : public NodeOperation {
bool determineDependingAreaOfInterest(rcti *input,
ReadBufferOperation *readOperation,
rcti *output) override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
-class DilateDistanceOperation : public NodeOperation {
- private:
+class DilateDistanceOperation : public MultiThreadedOperation {
+ public:
+ struct PixelData;
+
protected:
/**
* Cached reference to the inputProgram
@@ -94,6 +105,7 @@ class DilateDistanceOperation : public NodeOperation {
*/
void executePixel(float output[4], int x, int y, void *data) override;
+ void init_data() override;
/**
* Initialize the execution
*/
@@ -119,7 +131,13 @@ class DilateDistanceOperation : public NodeOperation {
MemoryBuffer **inputMemoryBuffers,
std::list<cl_mem> *clMemToCleanUp,
std::list<cl_kernel> *clKernelsToCleanUp) override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final;
+ virtual void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
+
class ErodeDistanceOperation : public DilateDistanceOperation {
public:
ErodeDistanceOperation();
@@ -135,9 +153,13 @@ class ErodeDistanceOperation : public DilateDistanceOperation {
MemoryBuffer **inputMemoryBuffers,
std::list<cl_mem> *clMemToCleanUp,
std::list<cl_kernel> *clKernelsToCleanUp) override;
+
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
-class DilateStepOperation : public NodeOperation {
+class DilateStepOperation : public MultiThreadedOperation {
protected:
/**
* Cached reference to the inputProgram
@@ -174,6 +196,11 @@ class DilateStepOperation : public NodeOperation {
bool determineDependingAreaOfInterest(rcti *input,
ReadBufferOperation *readOperation,
rcti *output) override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final;
+ virtual void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
class ErodeStepOperation : public DilateStepOperation {
@@ -181,6 +208,9 @@ class ErodeStepOperation : public DilateStepOperation {
ErodeStepOperation();
void *initializeTileData(rcti *rect) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc b/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc
index 97bdc25af3b..102025ed915 100644
--- a/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc
+++ b/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc
@@ -146,4 +146,58 @@ bool DirectionalBlurOperation::determineDependingAreaOfInterest(rcti * /*input*/
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void DirectionalBlurOperation::get_area_of_interest(const int input_idx,
+ const rcti &UNUSED(output_area),
+ rcti &r_input_area)
+{
+ BLI_assert(input_idx == 0);
+ UNUSED_VARS_NDEBUG(input_idx);
+ r_input_area.xmin = 0;
+ r_input_area.xmax = this->getWidth();
+ r_input_area.ymin = 0;
+ r_input_area.ymax = this->getHeight();
+}
+
+void DirectionalBlurOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *input = inputs[0];
+ const int iterations = pow(2.0f, this->m_data->iter);
+ for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
+ const int x = it.x;
+ const int y = it.y;
+ float color_accum[4];
+ input->read_elem_bilinear(x, y, color_accum);
+
+ /* Blur pixel. */
+ /* TODO(manzanilla): Many values used on iterations can be calculated beforehand. Create a
+ * table on operation initialization. */
+ float ltx = this->m_tx;
+ float lty = this->m_ty;
+ float lsc = this->m_sc;
+ float lrot = this->m_rot;
+ for (int i = 0; i < iterations; i++) {
+ const float cs = cosf(lrot), ss = sinf(lrot);
+ const float isc = 1.0f / (1.0f + lsc);
+
+ const float v = isc * (y - this->m_center_y_pix) + lty;
+ const float u = isc * (x - this->m_center_x_pix) + ltx;
+
+ float color[4];
+ input->read_elem_bilinear(
+ cs * u + ss * v + this->m_center_x_pix, cs * v - ss * u + this->m_center_y_pix, color);
+ add_v4_v4(color_accum, color);
+
+ /* Double transformations. */
+ ltx += this->m_tx;
+ lty += this->m_ty;
+ lrot += this->m_rot;
+ lsc += this->m_sc;
+ }
+
+ mul_v4_v4fl(it.out, color_accum, 1.0f / (iterations + 1));
+ }
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DirectionalBlurOperation.h b/source/blender/compositor/operations/COM_DirectionalBlurOperation.h
index 5555520462b..9a982bf6481 100644
--- a/source/blender/compositor/operations/COM_DirectionalBlurOperation.h
+++ b/source/blender/compositor/operations/COM_DirectionalBlurOperation.h
@@ -18,12 +18,12 @@
#pragma once
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
#include "COM_QualityStepHelper.h"
namespace blender::compositor {
-class DirectionalBlurOperation : public NodeOperation, public QualityStepHelper {
+class DirectionalBlurOperation : public MultiThreadedOperation, public QualityStepHelper {
private:
SocketReader *m_inputProgram;
NodeDBlurData *m_data;
@@ -65,6 +65,11 @@ class DirectionalBlurOperation : public NodeOperation, public QualityStepHelper
MemoryBuffer **inputMemoryBuffers,
std::list<cl_mem> *clMemToCleanUp,
std::list<cl_kernel> *clKernelsToCleanUp) override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_InpaintOperation.cc b/source/blender/compositor/operations/COM_InpaintOperation.cc
index bfcd504177f..5e76c41752c 100644
--- a/source/blender/compositor/operations/COM_InpaintOperation.cc
+++ b/source/blender/compositor/operations/COM_InpaintOperation.cc
@@ -39,6 +39,7 @@ InpaintSimpleOperation::InpaintSimpleOperation()
this->m_manhattan_distance = nullptr;
this->m_cached_buffer = nullptr;
this->m_cached_buffer_ready = false;
+ flags.is_fullframe_operation = true;
}
void InpaintSimpleOperation::initExecution()
{
@@ -286,4 +287,47 @@ bool InpaintSimpleOperation::determineDependingAreaOfInterest(rcti * /*input*/,
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void InpaintSimpleOperation::get_area_of_interest(const int input_idx,
+ const rcti &UNUSED(output_area),
+ rcti &r_input_area)
+{
+ BLI_assert(input_idx == 0);
+ UNUSED_VARS_NDEBUG(input_idx);
+ r_input_area.xmin = 0;
+ r_input_area.xmax = this->getWidth();
+ r_input_area.ymin = 0;
+ r_input_area.ymax = this->getHeight();
+}
+
+void InpaintSimpleOperation::update_memory_buffer(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ /* TODO(manzanilla): once tiled implementation is removed, run multi-threaded where possible. */
+ MemoryBuffer *input = inputs[0];
+ if (!m_cached_buffer_ready) {
+ if (input->is_a_single_elem()) {
+ MemoryBuffer *tmp = input->inflate();
+ m_cached_buffer = tmp->release_ownership_buffer();
+ delete tmp;
+ }
+ else {
+ m_cached_buffer = (float *)MEM_dupallocN(input->getBuffer());
+ }
+
+ this->calc_manhattan_distance();
+
+ int curr = 0;
+ int x, y;
+ while (this->next_pixel(x, y, curr, this->m_iterations)) {
+ this->pix_step(x, y);
+ }
+ m_cached_buffer_ready = true;
+ }
+
+ const int num_channels = COM_data_type_num_channels(getOutputSocket()->getDataType());
+ MemoryBuffer buf(m_cached_buffer, num_channels, input->getWidth(), input->getHeight());
+ output->copy_from(&buf, area);
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_InpaintOperation.h b/source/blender/compositor/operations/COM_InpaintOperation.h
index e3d27bf7704..e11610bd263 100644
--- a/source/blender/compositor/operations/COM_InpaintOperation.h
+++ b/source/blender/compositor/operations/COM_InpaintOperation.h
@@ -66,6 +66,13 @@ class InpaintSimpleOperation : public NodeOperation {
ReadBufferOperation *readOperation,
rcti *output) override;
+ void get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area) override;
+ void update_memory_buffer(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
+
private:
void calc_manhattan_distance();
void clamp_xy(int &x, int &y);
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.cc b/source/blender/compositor/operations/COM_SMAAOperation.cc
index b078d85372d..4153b9c8523 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.cc
+++ b/source/blender/compositor/operations/COM_SMAAOperation.cc
@@ -61,6 +61,8 @@ namespace blender::compositor {
/*-----------------------------------------------------------------------------*/
/* Internal Functions to Sample Pixel Color from Image */
+/* TODO(manzanilla): to be removed with tiled implementation. Replace it with
+ * #buffer->read_elem_checked. */
static inline void sample(SocketReader *reader, int x, int y, float color[4])
{
if (x < 0 || x >= reader->getWidth() || y < 0 || y >= reader->getHeight()) {
@@ -71,8 +73,13 @@ static inline void sample(SocketReader *reader, int x, int y, float color[4])
reader->read(color, x, y, nullptr);
}
-static void sample_bilinear_vertical(
- SocketReader *reader, int x, int y, float yoffset, float color[4])
+static inline void sample(MemoryBuffer *reader, int x, int y, float color[4])
+{
+ reader->read_elem_checked(x, y, color);
+}
+
+template<typename T>
+static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4])
{
float iy = floorf(yoffset);
float fy = yoffset - iy;
@@ -89,8 +96,8 @@ static void sample_bilinear_vertical(
color[3] = interpf(color01[3], color00[3], fy);
}
-static void sample_bilinear_horizontal(
- SocketReader *reader, int x, int y, float xoffset, float color[4])
+template<typename T>
+static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4])
{
float ix = floorf(xoffset);
float fx = xoffset - ix;
@@ -162,7 +169,7 @@ static void area_diag(int d1, int d2, int e1, int e2, float weights[2])
SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation()
{
this->addInputSocket(DataType::Color); /* image */
- this->addInputSocket(DataType::Value); /* depth, material ID, etc. */
+ this->addInputSocket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */
this->addOutputSocket(DataType::Color);
this->flags.complex = true;
this->m_imageReader = nullptr;
@@ -207,6 +214,16 @@ bool SMAAEdgeDetectionOperation::determineDependingAreaOfInterest(
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void SMAAEdgeDetectionOperation::get_area_of_interest(const int UNUSED(input_idx),
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ r_input_area.xmax = output_area.xmax + 1;
+ r_input_area.xmin = output_area.xmin - 2;
+ r_input_area.ymax = output_area.ymax + 1;
+ r_input_area.ymin = output_area.ymin - 2;
+}
+
void SMAAEdgeDetectionOperation::executePixel(float output[4], int x, int y, void * /*data*/)
{
float color[4];
@@ -288,6 +305,94 @@ void SMAAEdgeDetectionOperation::executePixel(float output[4], int x, int y, voi
}
}
+void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *image = inputs[0];
+ for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
+ float color[4];
+ const int x = it.x;
+ const int y = it.y;
+
+ /* Calculate luma deltas: */
+ image->read_elem_checked(x, y, color);
+ const float L = IMB_colormanagement_get_luminance(color);
+ image->read_elem_checked(x - 1, y, color);
+ const float Lleft = IMB_colormanagement_get_luminance(color);
+ image->read_elem_checked(x, y - 1, color);
+ const float Ltop = IMB_colormanagement_get_luminance(color);
+ const float Dleft = fabsf(L - Lleft);
+ const float Dtop = fabsf(L - Ltop);
+
+ /* We do the usual threshold: */
+ it.out[0] = (x > 0 && Dleft >= m_threshold) ? 1.0f : 0.0f;
+ it.out[1] = (y > 0 && Dtop >= m_threshold) ? 1.0f : 0.0f;
+ it.out[2] = 0.0f;
+ it.out[3] = 1.0f;
+
+ /* Then discard if there is no edge: */
+ if (is_zero_v2(it.out)) {
+ continue;
+ }
+
+ /* Calculate right and bottom deltas: */
+ image->read_elem_checked(x + 1, y, color);
+ const float Lright = IMB_colormanagement_get_luminance(color);
+ image->read_elem_checked(x, y + 1, color);
+ const float Lbottom = IMB_colormanagement_get_luminance(color);
+ const float Dright = fabsf(L - Lright);
+ const float Dbottom = fabsf(L - Lbottom);
+
+ /* Calculate the maximum delta in the direct neighborhood: */
+ float maxDelta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom));
+
+ /* Calculate luma used for both left and top edges: */
+ image->read_elem_checked(x - 1, y - 1, color);
+ const float Llefttop = IMB_colormanagement_get_luminance(color);
+
+ /* Left edge */
+ if (it.out[0] != 0.0f) {
+ /* Calculate deltas around the left pixel: */
+ image->read_elem_checked(x - 2, y, color);
+ const float Lleftleft = IMB_colormanagement_get_luminance(color);
+ image->read_elem_checked(x - 1, y + 1, color);
+ const float Lleftbottom = IMB_colormanagement_get_luminance(color);
+ const float Dleftleft = fabsf(Lleft - Lleftleft);
+ const float Dlefttop = fabsf(Lleft - Llefttop);
+ const float Dleftbottom = fabsf(Lleft - Lleftbottom);
+
+ /* Calculate the final maximum delta: */
+ maxDelta = fmaxf(maxDelta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom)));
+
+ /* Local contrast adaptation: */
+ if (maxDelta > m_contrast_limit * Dleft) {
+ it.out[0] = 0.0f;
+ }
+ }
+
+ /* Top edge */
+ if (it.out[1] != 0.0f) {
+ /* Calculate top-top delta: */
+ image->read_elem_checked(x, y - 2, color);
+ const float Ltoptop = IMB_colormanagement_get_luminance(color);
+ image->read_elem_checked(x + 1, y - 1, color);
+ const float Ltopright = IMB_colormanagement_get_luminance(color);
+ const float Dtoptop = fabsf(Ltop - Ltoptop);
+ const float Dtopleft = fabsf(Ltop - Llefttop);
+ const float Dtopright = fabsf(Ltop - Ltopright);
+
+ /* Calculate the final maximum delta: */
+ maxDelta = fmaxf(maxDelta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright)));
+
+ /* Local contrast adaptation: */
+ if (maxDelta > m_contrast_limit * Dtop) {
+ it.out[1] = 0.0f;
+ }
+ }
+ }
+}
+
/*-----------------------------------------------------------------------------*/
/* Blending Weight Calculation (Second Pass) */
/*-----------------------------------------------------------------------------*/
@@ -309,6 +414,9 @@ void *SMAABlendingWeightCalculationOperation::initializeTileData(rcti *rect)
void SMAABlendingWeightCalculationOperation::initExecution()
{
this->m_imageReader = this->getInputSocketReader(0);
+ if (execution_model_ == eExecutionModel::Tiled) {
+ sample_image_fn_ = [=](int x, int y, float *out) { sample(m_imageReader, x, y, out); };
+ }
}
void SMAABlendingWeightCalculationOperation::setCornerRounding(float rounding)
@@ -414,6 +522,113 @@ void SMAABlendingWeightCalculationOperation::executePixel(float output[4],
}
}
+void SMAABlendingWeightCalculationOperation::update_memory_buffer_started(
+ MemoryBuffer *UNUSED(output), const rcti &UNUSED(out_area), Span<MemoryBuffer *> inputs)
+{
+ const MemoryBuffer *image = inputs[0];
+ sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); };
+}
+
+void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial(
+ MemoryBuffer *output, const rcti &out_area, Span<MemoryBuffer *> UNUSED(inputs))
+{
+ for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
+ const int x = it.x;
+ const int y = it.y;
+ zero_v4(it.out);
+
+ float edges[4];
+ sample_image_fn_(x, y, edges);
+
+ /* Edge at north */
+ float c[4];
+ if (edges[1] > 0.0f) {
+ /* Diagonals have both north and west edges, so calculating weights for them */
+ /* in one of the boundaries is enough. */
+ calculateDiagWeights(x, y, edges, it.out);
+
+ /* We give priority to diagonals, so if we find a diagonal we skip. */
+ /* horizontal/vertical processing. */
+ if (!is_zero_v2(it.out)) {
+ continue;
+ }
+
+ /* Find the distance to the left and the right: */
+ int left = searchXLeft(x, y);
+ int right = searchXRight(x, y);
+ int d1 = x - left, d2 = right - x;
+
+ /* Fetch the left and right crossing edges: */
+ int e1 = 0, e2 = 0;
+ sample_image_fn_(left, y - 1, c);
+ if (c[0] > 0.0) {
+ e1 += 1;
+ }
+ sample_image_fn_(left, y, c);
+ if (c[0] > 0.0) {
+ e1 += 2;
+ }
+ sample_image_fn_(right + 1, y - 1, c);
+ if (c[0] > 0.0) {
+ e2 += 1;
+ }
+ sample_image_fn_(right + 1, y, c);
+ if (c[0] > 0.0) {
+ e2 += 2;
+ }
+
+ /* Ok, we know how this pattern looks like, now it is time for getting */
+ /* the actual area: */
+ area(d1, d2, e1, e2, it.out); /* R, G */
+
+ /* Fix corners: */
+ if (m_corner_rounding) {
+ detectHorizontalCornerPattern(it.out, left, right, y, d1, d2);
+ }
+ }
+
+ /* Edge at west */
+ if (edges[0] > 0.0f) {
+ /* Did we already do diagonal search for this west edge from the left neighboring pixel? */
+ if (isVerticalSearchUnneeded(x, y)) {
+ continue;
+ }
+
+ /* Find the distance to the top and the bottom: */
+ int top = searchYUp(x, y);
+ int bottom = searchYDown(x, y);
+ int d1 = y - top, d2 = bottom - y;
+
+ /* Fetch the top and bottom crossing edges: */
+ int e1 = 0, e2 = 0;
+ sample_image_fn_(x - 1, top, c);
+ if (c[1] > 0.0) {
+ e1 += 1;
+ }
+ sample_image_fn_(x, top, c);
+ if (c[1] > 0.0) {
+ e1 += 2;
+ }
+ sample_image_fn_(x - 1, bottom + 1, c);
+ if (c[1] > 0.0) {
+ e2 += 1;
+ }
+ sample_image_fn_(x, bottom + 1, c);
+ if (c[1] > 0.0) {
+ e2 += 2;
+ }
+
+ /* Get the area for this direction: */
+ area(d1, d2, e1, e2, it.out + 2); /* B, A */
+
+ /* Fix corners: */
+ if (m_corner_rounding) {
+ detectVerticalCornerPattern(it.out + 2, x, top, bottom, d1, d2);
+ }
+ }
+ }
+}
+
void SMAABlendingWeightCalculationOperation::deinitExecution()
{
this->m_imageReader = nullptr;
@@ -434,6 +649,19 @@ bool SMAABlendingWeightCalculationOperation::determineDependingAreaOfInterest(
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int UNUSED(input_idx),
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ r_input_area.xmax = output_area.xmax +
+ fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+ r_input_area.xmin = output_area.xmin -
+ fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+ r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
+ r_input_area.ymin = output_area.ymin -
+ fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG);
+}
+
/*-----------------------------------------------------------------------------*/
/* Diagonal Search Functions */
@@ -449,7 +677,7 @@ int SMAABlendingWeightCalculationOperation::searchDiag1(int x, int y, int dir, b
while (x != end) {
x += dir;
y -= dir;
- sample(m_imageReader, x, y, e);
+ sample_image_fn_(x, y, e);
if (e[1] == 0.0f) {
*found = true;
break;
@@ -472,12 +700,12 @@ int SMAABlendingWeightCalculationOperation::searchDiag2(int x, int y, int dir, b
while (x != end) {
x += dir;
y += dir;
- sample(m_imageReader, x, y, e);
+ sample_image_fn_(x, y, e);
if (e[1] == 0.0f) {
*found = true;
break;
}
- sample(m_imageReader, x + 1, y, e);
+ sample_image_fn_(x + 1, y, e);
if (e[0] == 0.0f) {
*found = true;
return (dir > 0) ? x : x - dir;
@@ -522,11 +750,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
/* Fetch the crossing edges: */
int left = x - d1, bottom = y + d1;
- sample(m_imageReader, left - 1, bottom, c);
+ sample_image_fn_(left - 1, bottom, c);
if (c[1] > 0.0) {
e1 += 2;
}
- sample(m_imageReader, left, bottom, c);
+ sample_image_fn_(left, bottom, c);
if (c[0] > 0.0) {
e1 += 1;
}
@@ -536,11 +764,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
/* Fetch the crossing edges: */
int right = x + d2, top = y - d2;
- sample(m_imageReader, right + 1, top, c);
+ sample_image_fn_(right + 1, top, c);
if (c[1] > 0.0) {
e2 += 2;
}
- sample(m_imageReader, right + 1, top - 1, c);
+ sample_image_fn_(right + 1, top - 1, c);
if (c[0] > 0.0) {
e2 += 1;
}
@@ -552,7 +780,7 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
/* Search for the line ends: */
d1 = x - searchDiag2(x, y, -1, &d1_found);
- sample(m_imageReader, x + 1, y, e);
+ sample_image_fn_(x + 1, y, e);
if (e[0] > 0.0f) {
d2 = searchDiag2(x, y, 1, &d2_found) - x;
}
@@ -568,11 +796,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
/* Fetch the crossing edges: */
int left = x - d1, top = y - d1;
- sample(m_imageReader, left - 1, top, c);
+ sample_image_fn_(left - 1, top, c);
if (c[1] > 0.0) {
e1 += 2;
}
- sample(m_imageReader, left, top - 1, c);
+ sample_image_fn_(left, top - 1, c);
if (c[0] > 0.0) {
e1 += 1;
}
@@ -582,7 +810,7 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
/* Fetch the crossing edges: */
int right = x + d2, bottom = y + d2;
- sample(m_imageReader, right + 1, bottom, c);
+ sample_image_fn_(right + 1, bottom, c);
if (c[1] > 0.0) {
e2 += 2;
}
@@ -610,7 +838,7 @@ bool SMAABlendingWeightCalculationOperation::isVerticalSearchUnneeded(int x, int
}
/* Search for the line ends: */
- sample(m_imageReader, x - 1, y, e);
+ sample_image_fn_(x - 1, y, e);
if (e[1] > 0.0f) {
d1 = x - searchDiag2(x - 1, y, -1, &found);
}
@@ -631,14 +859,14 @@ int SMAABlendingWeightCalculationOperation::searchXLeft(int x, int y)
float e[4];
while (x > end) {
- sample(m_imageReader, x, y, e);
+ sample_image_fn_(x, y, e);
if (e[1] == 0.0f) { /* Is the edge not activated? */
break;
}
if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
return x;
}
- sample(m_imageReader, x, y - 1, e);
+ sample_image_fn_(x, y - 1, e);
if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
return x;
}
@@ -655,12 +883,12 @@ int SMAABlendingWeightCalculationOperation::searchXRight(int x, int y)
while (x < end) {
x++;
- sample(m_imageReader, x, y, e);
+ sample_image_fn_(x, y, e);
if (e[1] == 0.0f || /* Is the edge not activated? */
e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
break;
}
- sample(m_imageReader, x, y - 1, e);
+ sample_image_fn_(x, y - 1, e);
if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
break;
}
@@ -675,14 +903,14 @@ int SMAABlendingWeightCalculationOperation::searchYUp(int x, int y)
float e[4];
while (y > end) {
- sample(m_imageReader, x, y, e);
+ sample_image_fn_(x, y, e);
if (e[0] == 0.0f) { /* Is the edge not activated? */
break;
}
if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
return y;
}
- sample(m_imageReader, x - 1, y, e);
+ sample_image_fn_(x - 1, y, e);
if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
return y;
}
@@ -699,12 +927,12 @@ int SMAABlendingWeightCalculationOperation::searchYDown(int x, int y)
while (y < end) {
y++;
- sample(m_imageReader, x, y, e);
+ sample_image_fn_(x, y, e);
if (e[0] == 0.0f || /* Is the edge not activated? */
e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
break;
}
- sample(m_imageReader, x - 1, y, e);
+ sample_image_fn_(x - 1, y, e);
if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
break;
}
@@ -728,16 +956,16 @@ void SMAABlendingWeightCalculationOperation::detectHorizontalCornerPattern(
/* Near the left corner */
if (d1 <= d2) {
- sample(m_imageReader, left, y + 1, e);
+ sample_image_fn_(left, y + 1, e);
factor[0] -= rounding * e[0];
- sample(m_imageReader, left, y - 2, e);
+ sample_image_fn_(left, y - 2, e);
factor[1] -= rounding * e[0];
}
/* Near the right corner */
if (d1 >= d2) {
- sample(m_imageReader, right + 1, y + 1, e);
+ sample_image_fn_(right + 1, y + 1, e);
factor[0] -= rounding * e[0];
- sample(m_imageReader, right + 1, y - 2, e);
+ sample_image_fn_(right + 1, y - 2, e);
factor[1] -= rounding * e[0];
}
@@ -757,16 +985,16 @@ void SMAABlendingWeightCalculationOperation::detectVerticalCornerPattern(
/* Near the top corner */
if (d1 <= d2) {
- sample(m_imageReader, x + 1, top, e);
+ sample_image_fn_(x + 1, top, e);
factor[0] -= rounding * e[1];
- sample(m_imageReader, x - 2, top, e);
+ sample_image_fn_(x - 2, top, e);
factor[1] -= rounding * e[1];
}
/* Near the bottom corner */
if (d1 >= d2) {
- sample(m_imageReader, x + 1, bottom + 1, e);
+ sample_image_fn_(x + 1, bottom + 1, e);
factor[0] -= rounding * e[1];
- sample(m_imageReader, x - 2, bottom + 1, e);
+ sample_image_fn_(x - 2, bottom + 1, e);
factor[1] -= rounding * e[1];
}
@@ -847,6 +1075,59 @@ void SMAANeighborhoodBlendingOperation::executePixel(float output[4],
madd_v4_v4fl(output, color2, weight2);
}
+void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &out_area,
+ Span<MemoryBuffer *> inputs)
+{
+ MemoryBuffer *image1 = inputs[0];
+ MemoryBuffer *image2 = inputs[1];
+ for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
+ const float x = it.x;
+ const float y = it.y;
+ float w[4];
+
+ /* Fetch the blending weights for current pixel: */
+ image2->read_elem_checked(x, y, w);
+ const float left = w[2], top = w[0];
+ image2->read_elem_checked(x + 1, y, w);
+ const float right = w[3];
+ image2->read_elem_checked(x, y + 1, w);
+ const float bottom = w[1];
+
+ /* Is there any blending weight with a value greater than 0.0? */
+ if (right + bottom + left + top < 1e-5f) {
+ image1->read_elem_checked(x, y, it.out);
+ continue;
+ }
+
+ /* Calculate the blending offsets: */
+ void (*sample_fn)(MemoryBuffer * reader, int x, int y, float xoffset, float color[4]);
+ float offset1, offset2, weight1, weight2, color1[4], color2[4];
+
+ if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */
+ sample_fn = sample_bilinear_horizontal;
+ offset1 = right;
+ offset2 = -left;
+ weight1 = right / (right + left);
+ weight2 = left / (right + left);
+ }
+ else {
+ sample_fn = sample_bilinear_vertical;
+ offset1 = bottom;
+ offset2 = -top;
+ weight1 = bottom / (bottom + top);
+ weight2 = top / (bottom + top);
+ }
+
+ /* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */
+ sample_fn(image1, x, y, offset1, color1);
+ sample_fn(image1, x, y, offset2, color2);
+
+ mul_v4_v4fl(it.out, color1, weight1);
+ madd_v4_v4fl(it.out, color2, weight2);
+ }
+}
+
void SMAANeighborhoodBlendingOperation::deinitExecution()
{
this->m_image1Reader = nullptr;
@@ -866,4 +1147,12 @@ bool SMAANeighborhoodBlendingOperation::determineDependingAreaOfInterest(
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
+void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int UNUSED(input_idx),
+ const rcti &output_area,
+ rcti &r_input_area)
+{
+ r_input_area = output_area;
+ expand_area_for_sampler(r_input_area, PixelSampler::Bilinear);
+}
+
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.h b/source/blender/compositor/operations/COM_SMAAOperation.h
index 781762202b4..91b9299ee43 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.h
+++ b/source/blender/compositor/operations/COM_SMAAOperation.h
@@ -20,14 +20,14 @@
#pragma once
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
namespace blender::compositor {
/*-----------------------------------------------------------------------------*/
/* Edge Detection (First Pass) */
-class SMAAEdgeDetectionOperation : public NodeOperation {
+class SMAAEdgeDetectionOperation : public MultiThreadedOperation {
protected:
SocketReader *m_imageReader;
SocketReader *m_valueReader;
@@ -60,15 +60,20 @@ class SMAAEdgeDetectionOperation : public NodeOperation {
bool determineDependingAreaOfInterest(rcti *input,
ReadBufferOperation *readOperation,
rcti *output) override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
/*-----------------------------------------------------------------------------*/
/* Blending Weight Calculation (Second Pass) */
-class SMAABlendingWeightCalculationOperation : public NodeOperation {
+class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation {
private:
SocketReader *m_imageReader;
-
+ std::function<void(int x, int y, float *out)> sample_image_fn_;
int m_corner_rounding;
public:
@@ -96,6 +101,14 @@ class SMAABlendingWeightCalculationOperation : public NodeOperation {
ReadBufferOperation *readOperation,
rcti *output) override;
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer_started(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
+
private:
/* Diagonal Search Functions */
int searchDiag1(int x, int y, int dir, bool *found);
@@ -117,7 +130,7 @@ class SMAABlendingWeightCalculationOperation : public NodeOperation {
/*-----------------------------------------------------------------------------*/
/* Neighborhood Blending (Third Pass) */
-class SMAANeighborhoodBlendingOperation : public NodeOperation {
+class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation {
private:
SocketReader *m_image1Reader;
SocketReader *m_image2Reader;
@@ -144,6 +157,11 @@ class SMAANeighborhoodBlendingOperation : public NodeOperation {
bool determineDependingAreaOfInterest(rcti *input,
ReadBufferOperation *readOperation,
rcti *output) override;
+
+ void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+ void update_memory_buffer_partial(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
};
} // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_VectorBlurOperation.cc b/source/blender/compositor/operations/COM_VectorBlurOperation.cc
index df65044afc1..5405e6d424a 100644
--- a/source/blender/compositor/operations/COM_VectorBlurOperation.cc
+++ b/source/blender/compositor/operations/COM_VectorBlurOperation.cc
@@ -57,6 +57,7 @@ VectorBlurOperation::VectorBlurOperation()
this->m_inputSpeedProgram = nullptr;
this->m_inputZProgram = nullptr;
flags.complex = true;
+ flags.is_fullframe_operation = true;
}
void VectorBlurOperation::initExecution()
{
@@ -121,6 +122,51 @@ bool VectorBlurOperation::determineDependingAreaOfInterest(rcti * /*input*/,
return false;
}
+void VectorBlurOperation::get_area_of_interest(const int UNUSED(input_idx),
+ const rcti &UNUSED(output_area),
+ rcti &r_input_area)
+{
+ r_input_area.xmin = 0;
+ r_input_area.xmax = this->getWidth();
+ r_input_area.ymin = 0;
+ r_input_area.ymax = this->getHeight();
+}
+
+void VectorBlurOperation::update_memory_buffer(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs)
+{
+ /* TODO(manzanilla): once tiled implementation is removed, run multi-threaded where possible. */
+ if (!m_cachedInstance) {
+ MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+ const bool is_image_inflated = image->is_a_single_elem();
+ image = is_image_inflated ? image->inflate() : image;
+
+ /* Must be a copy because it's modified in #generateVectorBlur. */
+ MemoryBuffer *speed = inputs[SPEED_INPUT_INDEX];
+ speed = speed->is_a_single_elem() ? speed->inflate() : new MemoryBuffer(*speed);
+
+ MemoryBuffer *z = inputs[Z_INPUT_INDEX];
+ const bool is_z_inflated = z->is_a_single_elem();
+ z = is_z_inflated ? z->inflate() : z;
+
+ m_cachedInstance = (float *)MEM_dupallocN(image->getBuffer());
+ this->generateVectorBlur(m_cachedInstance, image, speed, z);
+
+ if (is_image_inflated) {
+ delete image;
+ }
+ delete speed;
+ if (is_z_inflated) {
+ delete z;
+ }
+ }
+
+ const int num_channels = COM_data_type_num_channels(getOutputSocket()->getDataType());
+ MemoryBuffer buf(m_cachedInstance, num_channels, this->getWidth(), this->getHeight());
+ output->copy_from(&buf, area);
+}
+
void VectorBlurOperation::generateVectorBlur(float *data,
MemoryBuffer *inputImage,
MemoryBuffer *inputSpeed,
diff --git a/source/blender/compositor/operations/COM_VectorBlurOperation.h b/source/blender/compositor/operations/COM_VectorBlurOperation.h
index dfcf1fb16f7..c30c150db3c 100644
--- a/source/blender/compositor/operations/COM_VectorBlurOperation.h
+++ b/source/blender/compositor/operations/COM_VectorBlurOperation.h
@@ -26,6 +26,10 @@ namespace blender::compositor {
class VectorBlurOperation : public NodeOperation, public QualityStepHelper {
private:
+ static constexpr int IMAGE_INPUT_INDEX = 0;
+ static constexpr int Z_INPUT_INDEX = 1;
+ static constexpr int SPEED_INPUT_INDEX = 2;
+
/**
* \brief Cached reference to the inputProgram
*/
@@ -68,6 +72,13 @@ class VectorBlurOperation : public NodeOperation, public QualityStepHelper {
ReadBufferOperation *readOperation,
rcti *output) override;
+ void get_area_of_interest(const int input_idx,
+ const rcti &output_area,
+ rcti &r_input_area) override;
+ void update_memory_buffer(MemoryBuffer *output,
+ const rcti &area,
+ Span<MemoryBuffer *> inputs) override;
+
protected:
void generateVectorBlur(float *data,
MemoryBuffer *inputImage,