From 9d7cb5c4a1158266d2f8caa1fc19be2a00fdf101 Mon Sep 17 00:00:00 2001
From: Manuel Castilla <manzanillawork@gmail.com>
Date: Sat, 4 Sep 2021 15:23:28 +0200
Subject: Compositor: Full frame filter nodes

Adds full frame implementation to Anti-Aliasing, Defocus, Denoise,
Despeckle, Dilate/Erode, Directional Blur, Filter, Inpaint and
Vector Blur nodes. The other nodes in "Filter" sub-menu are
submitted separately.

Part of T88150.

Reviewed By: jbakker

Differential Revision: https://developer.blender.org/D12219
---
 .../blender/compositor/intern/COM_MemoryBuffer.h   |   6 +
 .../COM_ConvertDepthToRadiusOperation.cc           |  27 ++
 .../operations/COM_ConvertDepthToRadiusOperation.h |   8 +-
 .../COM_ConvolutionEdgeFilterOperation.cc          |  77 +++++
 .../COM_ConvolutionEdgeFilterOperation.h           |   4 +
 .../operations/COM_ConvolutionFilterOperation.cc   |  58 ++++
 .../operations/COM_ConvolutionFilterOperation.h    |  13 +-
 .../compositor/operations/COM_DenoiseOperation.cc  | 100 ++++--
 .../compositor/operations/COM_DenoiseOperation.h   |  15 +-
 .../operations/COM_DespeckleOperation.cc           | 107 +++++++
 .../compositor/operations/COM_DespeckleOperation.h |  12 +-
 .../operations/COM_DilateErodeOperation.cc         | 355 ++++++++++++++++++++-
 .../operations/COM_DilateErodeOperation.h          |  40 ++-
 .../operations/COM_DirectionalBlurOperation.cc     |  54 ++++
 .../operations/COM_DirectionalBlurOperation.h      |   9 +-
 .../compositor/operations/COM_InpaintOperation.cc  |  44 +++
 .../compositor/operations/COM_InpaintOperation.h   |   7 +
 .../compositor/operations/COM_SMAAOperation.cc     | 355 +++++++++++++++++++--
 .../compositor/operations/COM_SMAAOperation.h      |  28 +-
 .../operations/COM_VectorBlurOperation.cc          |  46 +++
 .../operations/COM_VectorBlurOperation.h           |  11 +
 21 files changed, 1286 insertions(+), 90 deletions(-)
diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.h b/source/blender/compositor/intern/COM_MemoryBuffer.h
index f3e15c2a495..f730d53acec 100644
--- a/source/blender/compositor/intern/COM_MemoryBuffer.h
+++ b/source/blender/compositor/intern/COM_MemoryBuffer.h
@@ -373,6 +373,12 @@ class MemoryBuffer {
     return this->m_buffer;
   }
 
+  float *release_ownership_buffer()
+  {
+    owns_data_ = false;
+    return this->m_buffer;
+  }
+
   MemoryBuffer *inflate() const;
 
   inline void wrap_pixel(int &x, int &y, MemoryBufferExtend extend_x, MemoryBufferExtend extend_y)
diff --git a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc
index a9c58b55d73..405ba03abf3 100644
--- a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc
+++ b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.cc
@@ -116,4 +116,31 @@ void ConvertDepthToRadiusOperation::deinitExecution()
   this->m_inputOperation = nullptr;
 }
 
+void ConvertDepthToRadiusOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                                 const rcti &area,
+                                                                 Span<MemoryBuffer *> inputs)
+{
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    const float z = *it.in(0);
+    if (z == 0.0f) {
+      *it.out = 0.0f;
+      continue;
+    }
+
+    const float inv_z = (1.0f / z);
+
+    /* Bug T6656 part 2b, do not re-scale. */
+#if 0
+    bcrad = 0.5f * fabs(aperture * (dof_sp * (cam_invfdist - iZ) - 1.0f));
+    /* Scale crad back to original maximum and blend:
+     * `crad->rect[px] = bcrad + wts->rect[px] * (scf * crad->rect[px] - bcrad);` */
+#endif
+    const float radius = 0.5f *
+                         fabsf(m_aperture * (m_dof_sp * (m_inverseFocalDistance - inv_z) - 1.0f));
+    /* Bug T6615, limit minimum radius to 1 pixel,
+     * not really a solution, but somewhat mitigates the problem. */
+    *it.out = CLAMPIS(radius, 0.0f, m_maxRadius);
+  }
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h
index 1f4e856b128..3d163843d06 100644
--- a/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h
+++ b/source/blender/compositor/operations/COM_ConvertDepthToRadiusOperation.h
@@ -19,7 +19,7 @@
 #pragma once
 
 #include "COM_FastGaussianBlurOperation.h"
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
 #include "DNA_object_types.h"
 
 namespace blender::compositor {
@@ -28,7 +28,7 @@ namespace blender::compositor {
  * this program converts an input color to an output value.
  * it assumes we are in sRGB color space.
  */
-class ConvertDepthToRadiusOperation : public NodeOperation {
+class ConvertDepthToRadiusOperation : public MultiThreadedOperation {
  private:
   /**
    * Cached reference to the inputProgram
@@ -83,6 +83,10 @@ class ConvertDepthToRadiusOperation : public NodeOperation {
   {
     this->m_blurPostOperation = operation;
   }
+
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc
index 5ead300a368..9127a871b04 100644
--- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc
+++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cc
@@ -95,4 +95,81 @@ void ConvolutionEdgeFilterOperation::executePixel(float output[4], int x, int y,
   output[3] = MAX2(output[3], 0.0f);
 }
 
+void ConvolutionEdgeFilterOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                                  const rcti &area,
+                                                                  Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+  const int last_x = getWidth() - 1;
+  const int last_y = getHeight() - 1;
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    const int left_offset = (it.x == 0) ? 0 : -image->elem_stride;
+    const int right_offset = (it.x == last_x) ? 0 : image->elem_stride;
+    const int down_offset = (it.y == 0) ? 0 : -image->row_stride;
+    const int up_offset = (it.y == last_y) ? 0 : image->row_stride;
+
+    const float *center_color = it.in(IMAGE_INPUT_INDEX);
+    float res1[4] = {0};
+    float res2[4] = {0};
+
+    const float *color = center_color + down_offset + left_offset;
+    madd_v3_v3fl(res1, color, m_filter[0]);
+    copy_v3_v3(res2, res1);
+
+    color = center_color + down_offset;
+    madd_v3_v3fl(res1, color, m_filter[1]);
+    madd_v3_v3fl(res2, color, m_filter[3]);
+
+    color = center_color + down_offset + right_offset;
+    madd_v3_v3fl(res1, color, m_filter[2]);
+    madd_v3_v3fl(res2, color, m_filter[6]);
+
+    color = center_color + left_offset;
+    madd_v3_v3fl(res1, color, m_filter[3]);
+    madd_v3_v3fl(res2, color, m_filter[1]);
+
+    {
+      float rgb_filtered[3];
+      mul_v3_v3fl(rgb_filtered, center_color, m_filter[4]);
+      add_v3_v3(res1, rgb_filtered);
+      add_v3_v3(res2, rgb_filtered);
+    }
+
+    color = center_color + right_offset;
+    madd_v3_v3fl(res1, color, m_filter[5]);
+    madd_v3_v3fl(res2, color, m_filter[7]);
+
+    color = center_color + up_offset + left_offset;
+    madd_v3_v3fl(res1, color, m_filter[6]);
+    madd_v3_v3fl(res2, color, m_filter[2]);
+
+    color = center_color + up_offset;
+    madd_v3_v3fl(res1, color, m_filter[7]);
+    madd_v3_v3fl(res2, color, m_filter[5]);
+
+    {
+      color = center_color + up_offset + right_offset;
+      float rgb_filtered[3];
+      mul_v3_v3fl(rgb_filtered, color, m_filter[8]);
+      add_v3_v3(res1, rgb_filtered);
+      add_v3_v3(res2, rgb_filtered);
+    }
+
+    it.out[0] = sqrt(res1[0] * res1[0] + res2[0] * res2[0]);
+    it.out[1] = sqrt(res1[1] * res1[1] + res2[1] * res2[1]);
+    it.out[2] = sqrt(res1[2] * res1[2] + res2[2] * res2[2]);
+
+    const float factor = *it.in(FACTOR_INPUT_INDEX);
+    const float m_factor = 1.0f - factor;
+    it.out[0] = it.out[0] * factor + center_color[0] * m_factor;
+    it.out[1] = it.out[1] * factor + center_color[1] * m_factor;
+    it.out[2] = it.out[2] * factor + center_color[2] * m_factor;
+
+    it.out[3] = center_color[3];
+
+    /* Make sure we don't return negative color. */
+    CLAMP4_MIN(it.out, 0.0f);
+  }
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h
index 319b424bd4a..bd38e27165a 100644
--- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h
+++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.h
@@ -25,6 +25,10 @@ namespace blender::compositor {
 class ConvolutionEdgeFilterOperation : public ConvolutionFilterOperation {
  public:
   void executePixel(float output[4], int x, int y, void *data) override;
+
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc
index 72cbbf4283a..11a077229fd 100644
--- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc
+++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cc
@@ -127,4 +127,62 @@ bool ConvolutionFilterOperation::determineDependingAreaOfInterest(
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void ConvolutionFilterOperation::get_area_of_interest(const int input_idx,
+                                                      const rcti &output_area,
+                                                      rcti &r_input_area)
+{
+  switch (input_idx) {
+    case IMAGE_INPUT_INDEX: {
+      const int add_x = (m_filterWidth - 1) / 2 + 1;
+      const int add_y = (m_filterHeight - 1) / 2 + 1;
+      r_input_area.xmin = output_area.xmin - add_x;
+      r_input_area.xmax = output_area.xmax + add_x;
+      r_input_area.ymin = output_area.ymin - add_y;
+      r_input_area.ymax = output_area.ymax + add_y;
+      break;
+    }
+    case FACTOR_INPUT_INDEX: {
+      r_input_area = output_area;
+      break;
+    }
+  }
+}
+
+void ConvolutionFilterOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                              const rcti &area,
+                                                              Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+  const int last_x = getWidth() - 1;
+  const int last_y = getHeight() - 1;
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    const int left_offset = (it.x == 0) ? 0 : -image->elem_stride;
+    const int right_offset = (it.x == last_x) ? 0 : image->elem_stride;
+    const int down_offset = (it.y == 0) ? 0 : -image->row_stride;
+    const int up_offset = (it.y == last_y) ? 0 : image->row_stride;
+
+    const float *center_color = it.in(IMAGE_INPUT_INDEX);
+    zero_v4(it.out);
+    madd_v4_v4fl(it.out, center_color + down_offset + left_offset, m_filter[0]);
+    madd_v4_v4fl(it.out, center_color + down_offset, m_filter[1]);
+    madd_v4_v4fl(it.out, center_color + down_offset + right_offset, m_filter[2]);
+    madd_v4_v4fl(it.out, center_color + left_offset, m_filter[3]);
+    madd_v4_v4fl(it.out, center_color, m_filter[4]);
+    madd_v4_v4fl(it.out, center_color + right_offset, m_filter[5]);
+    madd_v4_v4fl(it.out, center_color + up_offset + left_offset, m_filter[6]);
+    madd_v4_v4fl(it.out, center_color + up_offset, m_filter[7]);
+    madd_v4_v4fl(it.out, center_color + up_offset + right_offset, m_filter[8]);
+
+    const float factor = *it.in(FACTOR_INPUT_INDEX);
+    const float m_factor = 1.0f - factor;
+    it.out[0] = it.out[0] * factor + center_color[0] * m_factor;
+    it.out[1] = it.out[1] * factor + center_color[1] * m_factor;
+    it.out[2] = it.out[2] * factor + center_color[2] * m_factor;
+    it.out[3] = it.out[3] * factor + center_color[3] * m_factor;
+
+    /* Make sure we don't return negative color. */
+    CLAMP4_MIN(it.out, 0.0f);
+  }
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h
index 16dee502929..7e12c7faa5c 100644
--- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h
+++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.h
@@ -18,11 +18,15 @@
 
 #pragma once
 
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
 
 namespace blender::compositor {
 
-class ConvolutionFilterOperation : public NodeOperation {
+class ConvolutionFilterOperation : public MultiThreadedOperation {
+ protected:
+  static constexpr int IMAGE_INPUT_INDEX = 0;
+  static constexpr int FACTOR_INPUT_INDEX = 1;
+
  private:
   int m_filterWidth;
   int m_filterHeight;
@@ -43,6 +47,11 @@ class ConvolutionFilterOperation : public NodeOperation {
 
   void initExecution() override;
   void deinitExecution() override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final;
+  virtual void update_memory_buffer_partial(MemoryBuffer *output,
+                                            const rcti &area,
+                                            Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.cc b/source/blender/compositor/operations/COM_DenoiseOperation.cc
index ec11ad4d69a..e7f2d5a740a 100644
--- a/source/blender/compositor/operations/COM_DenoiseOperation.cc
+++ b/source/blender/compositor/operations/COM_DenoiseOperation.cc
@@ -35,6 +35,8 @@ DenoiseOperation::DenoiseOperation()
   this->addInputSocket(DataType::Color);
   this->addOutputSocket(DataType::Color);
   this->m_settings = nullptr;
+  flags.is_fullframe_operation = true;
+  output_rendered_ = false;
 }
 void DenoiseOperation::initExecution()
 {
@@ -63,8 +65,7 @@ MemoryBuffer *DenoiseOperation::createMemoryBuffer(rcti *rect2)
   rect.xmax = getWidth();
   rect.ymax = getHeight();
   MemoryBuffer *result = new MemoryBuffer(DataType::Color, rect);
-  float *data = result->getBuffer();
-  this->generateDenoise(data, tileColor, tileNormal, tileAlbedo, this->m_settings);
+  this->generateDenoise(result, tileColor, tileNormal, tileAlbedo, this->m_settings);
   return result;
 }
 
@@ -84,23 +85,33 @@ bool DenoiseOperation::determineDependingAreaOfInterest(rcti * /*input*/,
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
-void DenoiseOperation::generateDenoise(float *data,
-                                       MemoryBuffer *inputTileColor,
-                                       MemoryBuffer *inputTileNormal,
-                                       MemoryBuffer *inputTileAlbedo,
+void DenoiseOperation::generateDenoise(MemoryBuffer *output,
+                                       MemoryBuffer *input_color,
+                                       MemoryBuffer *input_normal,
+                                       MemoryBuffer *input_albedo,
                                        NodeDenoise *settings)
 {
-  float *inputBufferColor = inputTileColor->getBuffer();
-  BLI_assert(inputBufferColor);
-  if (!inputBufferColor) {
+  BLI_assert(input_color->getBuffer());
+  if (!input_color->getBuffer()) {
     return;
   }
+
 #ifdef WITH_OPENIMAGEDENOISE
   /* Always supported through Accelerate framework BNNS on macOS. */
 #  ifndef __APPLE__
   if (BLI_cpu_support_sse41())
 #  endif
   {
+    /* OpenImageDenoise needs full buffers. */
+    MemoryBuffer *buf_color = input_color->is_a_single_elem() ? input_color->inflate() :
+                                                                input_color;
+    MemoryBuffer *buf_normal = input_normal && input_normal->is_a_single_elem() ?
+                                   input_normal->inflate() :
+                                   input_normal;
+    MemoryBuffer *buf_albedo = input_albedo && input_albedo->is_a_single_elem() ?
+                                   input_albedo->inflate() :
+                                   input_albedo;
+
     /* Since it's memory intensive, it's better to run only one instance of OIDN at a time.
      * OpenImageDenoise is multithreaded internally and should use all available cores nonetheless.
      */
@@ -111,35 +122,35 @@ void DenoiseOperation::generateDenoise(float *data,
 
     oidn::FilterRef filter = device.newFilter("RT");
     filter.setImage("color",
-                    inputBufferColor,
+                    buf_color->getBuffer(),
                     oidn::Format::Float3,
-                    inputTileColor->getWidth(),
-                    inputTileColor->getHeight(),
+                    buf_color->getWidth(),
+                    buf_color->getHeight(),
                     0,
                     sizeof(float[4]));
-    if (inputTileNormal && inputTileNormal->getBuffer()) {
+    if (buf_normal && buf_normal->getBuffer()) {
       filter.setImage("normal",
-                      inputTileNormal->getBuffer(),
+                      buf_normal->getBuffer(),
                       oidn::Format::Float3,
-                      inputTileNormal->getWidth(),
-                      inputTileNormal->getHeight(),
+                      buf_normal->getWidth(),
+                      buf_normal->getHeight(),
                       0,
                       sizeof(float[3]));
     }
-    if (inputTileAlbedo && inputTileAlbedo->getBuffer()) {
+    if (buf_albedo && buf_albedo->getBuffer()) {
       filter.setImage("albedo",
-                      inputTileAlbedo->getBuffer(),
+                      buf_albedo->getBuffer(),
                       oidn::Format::Float3,
-                      inputTileAlbedo->getWidth(),
-                      inputTileAlbedo->getHeight(),
+                      buf_albedo->getWidth(),
+                      buf_albedo->getHeight(),
                       0,
                       sizeof(float[4]));
     }
     filter.setImage("output",
-                    data,
+                    output->getBuffer(),
                     oidn::Format::Float3,
-                    inputTileColor->getWidth(),
-                    inputTileColor->getHeight(),
+                    buf_color->getWidth(),
+                    buf_color->getHeight(),
                     0,
                     sizeof(float[4]));
 
@@ -153,19 +164,46 @@ void DenoiseOperation::generateDenoise(float *data,
     filter.execute();
     BLI_mutex_unlock(&oidn_lock);
 
-    /* copy the alpha channel, OpenImageDenoise currently only supports RGB */
-    size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight();
-    for (size_t i = 0; i < numPixels; i++) {
-      data[i * 4 + 3] = inputBufferColor[i * 4 + 3];
+    /* Copy the alpha channel, OpenImageDenoise currently only supports RGB. */
+    output->copy_from(input_color, input_color->get_rect(), 3, COM_DATA_TYPE_VALUE_CHANNELS, 3);
+
+    /* Delete inflated buffers. */
+    if (input_color->is_a_single_elem()) {
+      delete buf_color;
+    }
+    if (input_normal && input_normal->is_a_single_elem()) {
+      delete buf_normal;
     }
+    if (input_albedo && input_albedo->is_a_single_elem()) {
+      delete buf_albedo;
+    }
+
     return;
   }
 #endif
   /* If built without OIDN or running on an unsupported CPU, just pass through. */
-  UNUSED_VARS(inputTileAlbedo, inputTileNormal, settings);
-  ::memcpy(data,
-           inputBufferColor,
-           sizeof(float[4]) * inputTileColor->getWidth() * inputTileColor->getHeight());
+  UNUSED_VARS(input_albedo, input_normal, settings);
+  output->copy_from(input_color, input_color->get_rect());
+}
+
+void DenoiseOperation::get_area_of_interest(const int UNUSED(input_idx),
+                                            const rcti &UNUSED(output_area),
+                                            rcti &r_input_area)
+{
+  r_input_area.xmin = 0;
+  r_input_area.xmax = this->getWidth();
+  r_input_area.ymin = 0;
+  r_input_area.ymax = this->getHeight();
+}
+
+void DenoiseOperation::update_memory_buffer(MemoryBuffer *output,
+                                            const rcti &UNUSED(area),
+                                            Span<MemoryBuffer *> inputs)
+{
+  if (!output_rendered_) {
+    this->generateDenoise(output, inputs[0], inputs[1], inputs[2], m_settings);
+    output_rendered_ = true;
+  }
 }
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.h b/source/blender/compositor/operations/COM_DenoiseOperation.h
index a9298c17e92..48209c3eacf 100644
--- a/source/blender/compositor/operations/COM_DenoiseOperation.h
+++ b/source/blender/compositor/operations/COM_DenoiseOperation.h
@@ -37,6 +37,8 @@ class DenoiseOperation : public SingleThreadedOperation {
    */
   NodeDenoise *m_settings;
 
+  bool output_rendered_;
+
  public:
   DenoiseOperation();
   /**
@@ -57,11 +59,16 @@ class DenoiseOperation : public SingleThreadedOperation {
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
 
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer(MemoryBuffer *output,
+                            const rcti &area,
+                            Span<MemoryBuffer *> inputs) override;
+
  protected:
-  void generateDenoise(float *data,
-                       MemoryBuffer *inputTileColor,
-                       MemoryBuffer *inputTileNormal,
-                       MemoryBuffer *inputTileAlbedo,
+  void generateDenoise(MemoryBuffer *output,
+                       MemoryBuffer *input_color,
+                       MemoryBuffer *input_normal,
+                       MemoryBuffer *input_albedo,
                        NodeDenoise *settings);
 
   MemoryBuffer *createMemoryBuffer(rcti *rect) override;
diff --git a/source/blender/compositor/operations/COM_DespeckleOperation.cc b/source/blender/compositor/operations/COM_DespeckleOperation.cc
index fc8778c7d2e..19bd7b2af6f 100644
--- a/source/blender/compositor/operations/COM_DespeckleOperation.cc
+++ b/source/blender/compositor/operations/COM_DespeckleOperation.cc
@@ -127,6 +127,11 @@ void DespeckleOperation::executePixel(float output[4], int x, int y, void * /*da
   else {
     copy_v4_v4(output, color_org);
   }
+
+#undef TOT_DIV_ONE
+#undef TOT_DIV_CNR
+#undef WTOT
+#undef COLOR_ADD
 }
 
 bool DespeckleOperation::determineDependingAreaOfInterest(rcti *input,
@@ -144,4 +149,106 @@ bool DespeckleOperation::determineDependingAreaOfInterest(rcti *input,
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void DespeckleOperation::get_area_of_interest(const int input_idx,
+                                              const rcti &output_area,
+                                              rcti &r_input_area)
+{
+  switch (input_idx) {
+    case IMAGE_INPUT_INDEX: {
+      const int add_x = 2;  //(this->m_filterWidth - 1) / 2 + 1;
+      const int add_y = 2;  //(this->m_filterHeight - 1) / 2 + 1;
+      r_input_area.xmin = output_area.xmin - add_x;
+      r_input_area.xmax = output_area.xmax + add_x;
+      r_input_area.ymin = output_area.ymin - add_y;
+      r_input_area.ymax = output_area.ymax + add_y;
+      break;
+    }
+    case FACTOR_INPUT_INDEX: {
+      r_input_area = output_area;
+      break;
+    }
+  }
+}
+
+void DespeckleOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                      const rcti &area,
+                                                      Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+  const int last_x = getWidth() - 1;
+  const int last_y = getHeight() - 1;
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    const int x1 = MAX2(it.x - 1, 0);
+    const int x2 = it.x;
+    const int x3 = MIN2(it.x + 1, last_x);
+    const int y1 = MAX2(it.y - 1, 0);
+    const int y2 = it.y;
+    const int y3 = MIN2(it.y + 1, last_y);
+
+    float w = 0.0f;
+    const float *color_org = it.in(IMAGE_INPUT_INDEX);
+    float color_mid[4];
+    float color_mid_ok[4];
+    const float *in1 = nullptr;
+
+#define TOT_DIV_ONE 1.0f
+#define TOT_DIV_CNR (float)M_SQRT1_2
+
+#define WTOT (TOT_DIV_ONE * 4 + TOT_DIV_CNR * 4)
+
+#define COLOR_ADD(fac) \
+  { \
+    madd_v4_v4fl(color_mid, in1, fac); \
+    if (color_diff(in1, color_org, m_threshold)) { \
+      w += fac; \
+      madd_v4_v4fl(color_mid_ok, in1, fac); \
+    } \
+  }
+
+    zero_v4(color_mid);
+    zero_v4(color_mid_ok);
+
+    in1 = image->get_elem(x1, y1);
+    COLOR_ADD(TOT_DIV_CNR)
+    in1 = image->get_elem(x2, y1);
+    COLOR_ADD(TOT_DIV_ONE)
+    in1 = image->get_elem(x3, y1);
+    COLOR_ADD(TOT_DIV_CNR)
+    in1 = image->get_elem(x1, y2);
+    COLOR_ADD(TOT_DIV_ONE)
+
+#if 0
+  const float* in2 = image->get_elem(x2, y2);
+  madd_v4_v4fl(color_mid, in2, this->m_filter[4]);
+#endif
+
+    in1 = image->get_elem(x3, y2);
+    COLOR_ADD(TOT_DIV_ONE)
+    in1 = image->get_elem(x1, y3);
+    COLOR_ADD(TOT_DIV_CNR)
+    in1 = image->get_elem(x2, y3);
+    COLOR_ADD(TOT_DIV_ONE)
+    in1 = image->get_elem(x3, y3);
+    COLOR_ADD(TOT_DIV_CNR)
+
+    mul_v4_fl(color_mid, 1.0f / (4.0f + (4.0f * (float)M_SQRT1_2)));
+    // mul_v4_fl(color_mid, 1.0f / w);
+
+    if ((w != 0.0f) && ((w / WTOT) > (m_threshold_neighbor)) &&
+        color_diff(color_mid, color_org, m_threshold)) {
+      const float factor = *it.in(FACTOR_INPUT_INDEX);
+      mul_v4_fl(color_mid_ok, 1.0f / w);
+      interp_v4_v4v4(it.out, color_org, color_mid_ok, factor);
+    }
+    else {
+      copy_v4_v4(it.out, color_org);
+    }
+
+#undef TOT_DIV_ONE
+#undef TOT_DIV_CNR
+#undef WTOT
+#undef COLOR_ADD
+  }
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DespeckleOperation.h b/source/blender/compositor/operations/COM_DespeckleOperation.h
index e8d3461d2ec..70d6c2227f4 100644
--- a/source/blender/compositor/operations/COM_DespeckleOperation.h
+++ b/source/blender/compositor/operations/COM_DespeckleOperation.h
@@ -18,12 +18,15 @@
 
 #pragma once
 
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
 
 namespace blender::compositor {
 
-class DespeckleOperation : public NodeOperation {
+class DespeckleOperation : public MultiThreadedOperation {
  private:
+  constexpr static int IMAGE_INPUT_INDEX = 0;
+  constexpr static int FACTOR_INPUT_INDEX = 1;
+
   float m_threshold;
   float m_threshold_neighbor;
 
@@ -52,6 +55,11 @@ class DespeckleOperation : public NodeOperation {
 
   void initExecution() override;
   void deinitExecution() override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.cc b/source/blender/compositor/operations/COM_DilateErodeOperation.cc
index c459d09f02c..28b40021cd9 100644
--- a/source/blender/compositor/operations/COM_DilateErodeOperation.cc
+++ b/source/blender/compositor/operations/COM_DilateErodeOperation.cc
@@ -35,9 +35,9 @@ DilateErodeThresholdOperation::DilateErodeThresholdOperation()
   this->m__switch = 0.5f;
   this->m_distance = 0.0f;
 }
-void DilateErodeThresholdOperation::initExecution()
+
+void DilateErodeThresholdOperation::init_data()
 {
-  this->m_inputProgram = this->getInputSocketReader(0);
   if (this->m_distance < 0.0f) {
     this->m_scope = -this->m_distance + this->m_inset;
   }
@@ -54,6 +54,11 @@ void DilateErodeThresholdOperation::initExecution()
   }
 }
 
+void DilateErodeThresholdOperation::initExecution()
+{
+  this->m_inputProgram = this->getInputSocketReader(0);
+}
+
 void *DilateErodeThresholdOperation::initializeTileData(rcti * /*rect*/)
 {
   void *buffer = this->m_inputProgram->initializeTileData(nullptr);
@@ -160,6 +165,112 @@ bool DilateErodeThresholdOperation::determineDependingAreaOfInterest(
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void DilateErodeThresholdOperation::get_area_of_interest(const int input_idx,
+                                                         const rcti &output_area,
+                                                         rcti &r_input_area)
+{
+  BLI_assert(input_idx == 0);
+  UNUSED_VARS_NDEBUG(input_idx);
+  r_input_area.xmin = output_area.xmin - m_scope;
+  r_input_area.xmax = output_area.xmax + m_scope;
+  r_input_area.ymin = output_area.ymin - m_scope;
+  r_input_area.ymax = output_area.ymax + m_scope;
+}
+
+struct DilateErodeThresholdOperation::PixelData {
+  int x;
+  int y;
+  int xmin;
+  int xmax;
+  int ymin;
+  int ymax;
+  const float *elem;
+  float distance;
+  int elem_stride;
+  int row_stride;
+  /** Switch. */
+  float sw;
+};
+
+template<template<typename> typename TCompare>
+static float get_min_distance(DilateErodeThresholdOperation::PixelData &p)
+{
+  /* TODO(manzanilla): bad performance, generate a table with relative offsets on operation
+   * initialization to loop from less to greater distance and break as soon as #compare is
+   * true. */
+  const TCompare compare;
+  float min_dist = p.distance;
+  const float *row = p.elem + ((intptr_t)p.ymin - p.y) * p.row_stride +
+                     ((intptr_t)p.xmin - p.x) * p.elem_stride;
+  for (int yi = p.ymin; yi < p.ymax; yi++) {
+    const float dy = yi - p.y;
+    const float dist_y = dy * dy;
+    const float *elem = row;
+    for (int xi = p.xmin; xi < p.xmax; xi++) {
+      if (compare(*elem, p.sw)) {
+        const float dx = xi - p.x;
+        const float dist = dx * dx + dist_y;
+        min_dist = MIN2(min_dist, dist);
+      }
+      elem += p.elem_stride;
+    }
+    row += p.row_stride;
+  }
+  return min_dist;
+}
+
+void DilateErodeThresholdOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                                 const rcti &area,
+                                                                 Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *input = inputs[0];
+  const rcti &input_rect = input->get_rect();
+  const float rd = m_scope * m_scope;
+  const float inset = m_inset;
+
+  PixelData p;
+  p.sw = m__switch;
+  p.distance = rd * 2;
+  p.elem_stride = input->elem_stride;
+  p.row_stride = input->row_stride;
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    p.x = it.x;
+    p.y = it.y;
+    p.xmin = MAX2(p.x - m_scope, input_rect.xmin);
+    p.ymin = MAX2(p.y - m_scope, input_rect.ymin);
+    p.xmax = MIN2(p.x + m_scope, input_rect.xmax);
+    p.ymax = MIN2(p.y + m_scope, input_rect.ymax);
+    p.elem = it.in(0);
+
+    float pixel_value;
+    if (*p.elem > p.sw) {
+      pixel_value = -sqrtf(get_min_distance<std::less>(p));
+    }
+    else {
+      pixel_value = sqrtf(get_min_distance<std::greater>(p));
+    }
+
+    if (m_distance > 0.0f) {
+      const float delta = m_distance - pixel_value;
+      if (delta >= 0.0f) {
+        *it.out = delta >= inset ? 1.0f : delta / inset;
+      }
+      else {
+        *it.out = 0.0f;
+      }
+    }
+    else {
+      const float delta = -m_distance + pixel_value;
+      if (delta < 0.0f) {
+        *it.out = delta < -inset ? 1.0f : (-delta) / inset;
+      }
+      else {
+        *it.out = 0.0f;
+      }
+    }
+  }
+}
+
 /* Dilate Distance. */
 DilateDistanceOperation::DilateDistanceOperation()
 {
@@ -170,15 +281,20 @@ DilateDistanceOperation::DilateDistanceOperation()
   flags.complex = true;
   flags.open_cl = true;
 }
-void DilateDistanceOperation::initExecution()
+
+void DilateDistanceOperation::init_data()
 {
-  this->m_inputProgram = this->getInputSocketReader(0);
   this->m_scope = this->m_distance;
   if (this->m_scope < 3) {
     this->m_scope = 3;
   }
 }
 
+void DilateDistanceOperation::initExecution()
+{
+  this->m_inputProgram = this->getInputSocketReader(0);
+}
+
 void *DilateDistanceOperation::initializeTileData(rcti * /*rect*/)
 {
   void *buffer = this->m_inputProgram->initializeTileData(nullptr);
@@ -258,6 +374,92 @@ void DilateDistanceOperation::executeOpenCL(OpenCLDevice *device,
   device->COM_clEnqueueRange(dilateKernel, outputMemoryBuffer, 7, this);
 }
 
+void DilateDistanceOperation::get_area_of_interest(const int input_idx,
+                                                   const rcti &output_area,
+                                                   rcti &r_input_area)
+{
+  BLI_assert(input_idx == 0);
+  UNUSED_VARS_NDEBUG(input_idx);
+  r_input_area.xmin = output_area.xmin - m_scope;
+  r_input_area.xmax = output_area.xmax + m_scope;
+  r_input_area.ymin = output_area.ymin - m_scope;
+  r_input_area.ymax = output_area.ymax + m_scope;
+}
+
+struct DilateDistanceOperation::PixelData {
+  int x;
+  int y;
+  int xmin;
+  int xmax;
+  int ymin;
+  int ymax;
+  const float *elem;
+  float min_distance;
+  int scope;
+  int elem_stride;
+  int row_stride;
+  const rcti &input_rect;
+
+  PixelData(MemoryBuffer *input, const int distance, const int scope)
+      : min_distance(distance * distance),
+        scope(scope),
+        elem_stride(input->elem_stride),
+        row_stride(input->row_stride),
+        input_rect(input->get_rect())
+  {
+  }
+
+  void update(BuffersIterator<float> &it)
+  {
+    x = it.x;
+    y = it.y;
+    xmin = MAX2(x - scope, input_rect.xmin);
+    ymin = MAX2(y - scope, input_rect.ymin);
+    xmax = MIN2(x + scope, input_rect.xmax);
+    ymax = MIN2(y + scope, input_rect.ymax);
+    elem = it.in(0);
+  }
+};
+
+template<template<typename> typename TCompare>
+static float get_distance_value(DilateDistanceOperation::PixelData &p, const float start_value)
+{
+  /* TODO(manzanilla): bad performance, only loop elements within minimum distance removing
+   * coordinates and conditional if `dist <= min_dist`. May need to generate a table of offsets. */
+  const TCompare compare;
+  const float min_dist = p.min_distance;
+  float value = start_value;
+  const float *row = p.elem + ((intptr_t)p.ymin - p.y) * p.row_stride +
+                     ((intptr_t)p.xmin - p.x) * p.elem_stride;
+  for (int yi = p.ymin; yi < p.ymax; yi++) {
+    const float dy = yi - p.y;
+    const float dist_y = dy * dy;
+    const float *elem = row;
+    for (int xi = p.xmin; xi < p.xmax; xi++) {
+      const float dx = xi - p.x;
+      const float dist = dx * dx + dist_y;
+      if (dist <= min_dist) {
+        value = compare(*elem, value) ? *elem : value;
+      }
+      elem += p.elem_stride;
+    }
+    row += p.row_stride;
+  }
+
+  return value;
+}
+
+void DilateDistanceOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                           const rcti &area,
+                                                           Span<MemoryBuffer *> inputs)
+{
+  PixelData p(inputs[0], m_distance, m_scope);
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    p.update(it);
+    *it.out = get_distance_value<std::greater>(p, 0.0f);
+  }
+}
+
 /* Erode Distance */
 ErodeDistanceOperation::ErodeDistanceOperation() : DilateDistanceOperation()
 {
@@ -318,6 +520,17 @@ void ErodeDistanceOperation::executeOpenCL(OpenCLDevice *device,
   device->COM_clEnqueueRange(erodeKernel, outputMemoryBuffer, 7, this);
 }
 
+void ErodeDistanceOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                          const rcti &area,
+                                                          Span<MemoryBuffer *> inputs)
+{
+  PixelData p(inputs[0], m_distance, m_scope);
+  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
+    p.update(it);
+    *it.out = get_distance_value<std::less>(p, 1.0f);
+  }
+}
+
 /* Dilate step */
 DilateStepOperation::DilateStepOperation()
 {
@@ -475,6 +688,126 @@ bool DilateStepOperation::determineDependingAreaOfInterest(rcti *input,
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void DilateStepOperation::get_area_of_interest(const int input_idx,
+                                               const rcti &output_area,
+                                               rcti &r_input_area)
+{
+  BLI_assert(input_idx == 0);
+  UNUSED_VARS_NDEBUG(input_idx);
+  r_input_area.xmin = output_area.xmin - m_iterations;
+  r_input_area.xmax = output_area.xmax + m_iterations;
+  r_input_area.ymin = output_area.ymin - m_iterations;
+  r_input_area.ymax = output_area.ymax + m_iterations;
+}
+
+template<typename TCompareSelector>
+static void step_update_memory_buffer(MemoryBuffer *output,
+                                      const MemoryBuffer *input,
+                                      const rcti &area,
+                                      const int num_iterations,
+                                      const float compare_min_value)
+{
+  TCompareSelector selector;
+
+  const int width = output->getWidth();
+  const int height = output->getHeight();
+
+  const int half_window = num_iterations;
+  const int window = half_window * 2 + 1;
+
+  const int xmin = MAX2(0, area.xmin - half_window);
+  const int ymin = MAX2(0, area.ymin - half_window);
+  const int xmax = MIN2(width, area.xmax + half_window);
+  const int ymax = MIN2(height, area.ymax + half_window);
+
+  const int bwidth = area.xmax - area.xmin;
+  const int bheight = area.ymax - area.ymin;
+
+  /* NOTE: #result has area width, but new height.
+   * We have to calculate the additional rows in the first pass,
+   * to have valid data available for the second pass. */
+  rcti result_area;
+  BLI_rcti_init(&result_area, area.xmin, area.xmax, ymin, ymax);
+  MemoryBuffer result(DataType::Value, result_area);
+
+  /* #temp holds maxima for every step in the algorithm, #buf holds a
+   * single row or column of input values, padded with #limit values to
+   * simplify the logic. */
+  float *temp = (float *)MEM_mallocN(sizeof(float) * (2 * window - 1), "dilate erode temp");
+  float *buf = (float *)MEM_mallocN(sizeof(float) * (MAX2(bwidth, bheight) + 5 * half_window),
+                                    "dilate erode buf");
+
+  /* The following is based on the van Herk/Gil-Werman algorithm for morphology operations. */
+  /* First pass, horizontal dilate/erode. */
+  for (int y = ymin; y < ymax; y++) {
+    for (int x = 0; x < bwidth + 5 * half_window; x++) {
+      buf[x] = compare_min_value;
+    }
+    for (int x = xmin; x < xmax; x++) {
+      buf[x - area.xmin + window - 1] = input->get_value(x, y, 0);
+    }
+
+    for (int i = 0; i < (bwidth + 3 * half_window) / window; i++) {
+      int start = (i + 1) * window - 1;
+
+      temp[window - 1] = buf[start];
+      for (int x = 1; x < window; x++) {
+        temp[window - 1 - x] = selector(temp[window - x], buf[start - x]);
+        temp[window - 1 + x] = selector(temp[window + x - 2], buf[start + x]);
+      }
+
+      start = half_window + (i - 1) * window + 1;
+      for (int x = -MIN2(0, start); x < window - MAX2(0, start + window - bwidth); x++) {
+        result.get_value(start + x + area.xmin, y, 0) = selector(temp[x], temp[x + window - 1]);
+      }
+    }
+  }
+
+  /* Second pass, vertical dilate/erode. */
+  for (int x = 0; x < bwidth; x++) {
+    for (int y = 0; y < bheight + 5 * half_window; y++) {
+      buf[y] = compare_min_value;
+    }
+    for (int y = ymin; y < ymax; y++) {
+      buf[y - area.ymin + window - 1] = result.get_value(x + area.xmin, y, 0);
+    }
+
+    for (int i = 0; i < (bheight + 3 * half_window) / window; i++) {
+      int start = (i + 1) * window - 1;
+
+      temp[window - 1] = buf[start];
+      for (int y = 1; y < window; y++) {
+        temp[window - 1 - y] = selector(temp[window - y], buf[start - y]);
+        temp[window - 1 + y] = selector(temp[window + y - 2], buf[start + y]);
+      }
+
+      start = half_window + (i - 1) * window + 1;
+      for (int y = -MIN2(0, start); y < window - MAX2(0, start + window - bheight); y++) {
+        result.get_value(x, y + start + area.ymin, 0) = selector(temp[y], temp[y + window - 1]);
+      }
+    }
+  }
+
+  MEM_freeN(temp);
+  MEM_freeN(buf);
+
+  output->copy_from(&result, area);
+}
+
+struct Max2Selector {
+  float operator()(float f1, float f2) const
+  {
+    return MAX2(f1, f2);
+  }
+};
+
+void DilateStepOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                       const rcti &area,
+                                                       Span<MemoryBuffer *> inputs)
+{
+  step_update_memory_buffer<Max2Selector>(output, inputs[0], area, m_iterations, -FLT_MAX);
+}
+
 /* Erode step */
 ErodeStepOperation::ErodeStepOperation() : DilateStepOperation()
 {
@@ -571,4 +904,18 @@ void *ErodeStepOperation::initializeTileData(rcti *rect)
   return result;
 }
 
+struct Min2Selector {
+  float operator()(float f1, float f2) const
+  {
+    return MIN2(f1, f2);
+  }
+};
+
+void ErodeStepOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                      const rcti &area,
+                                                      Span<MemoryBuffer *> inputs)
+{
+  step_update_memory_buffer<Min2Selector>(output, inputs[0], area, m_iterations, FLT_MAX);
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.h b/source/blender/compositor/operations/COM_DilateErodeOperation.h
index a489e293e8e..9c32a5ac1fd 100644
--- a/source/blender/compositor/operations/COM_DilateErodeOperation.h
+++ b/source/blender/compositor/operations/COM_DilateErodeOperation.h
@@ -18,11 +18,14 @@
 
 #pragma once
 
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
 
 namespace blender::compositor {
 
-class DilateErodeThresholdOperation : public NodeOperation {
+class DilateErodeThresholdOperation : public MultiThreadedOperation {
+ public:
+  struct PixelData;
+
  private:
   /**
    * Cached reference to the inputProgram
@@ -47,6 +50,7 @@ class DilateErodeThresholdOperation : public NodeOperation {
    */
   void executePixel(float output[4], int x, int y, void *data) override;
 
+  void init_data() override;
   /**
    * Initialize the execution
    */
@@ -74,10 +78,17 @@ class DilateErodeThresholdOperation : public NodeOperation {
   bool determineDependingAreaOfInterest(rcti *input,
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
-class DilateDistanceOperation : public NodeOperation {
- private:
+class DilateDistanceOperation : public MultiThreadedOperation {
+ public:
+  struct PixelData;
+
  protected:
   /**
    * Cached reference to the inputProgram
@@ -94,6 +105,7 @@ class DilateDistanceOperation : public NodeOperation {
    */
   void executePixel(float output[4], int x, int y, void *data) override;
 
+  void init_data() override;
   /**
    * Initialize the execution
    */
@@ -119,7 +131,13 @@ class DilateDistanceOperation : public NodeOperation {
                      MemoryBuffer **inputMemoryBuffers,
                      std::list<cl_mem> *clMemToCleanUp,
                      std::list<cl_kernel> *clKernelsToCleanUp) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final;
+  virtual void update_memory_buffer_partial(MemoryBuffer *output,
+                                            const rcti &area,
+                                            Span<MemoryBuffer *> inputs) override;
 };
+
 class ErodeDistanceOperation : public DilateDistanceOperation {
  public:
   ErodeDistanceOperation();
@@ -135,9 +153,13 @@ class ErodeDistanceOperation : public DilateDistanceOperation {
                      MemoryBuffer **inputMemoryBuffers,
                      std::list<cl_mem> *clMemToCleanUp,
                      std::list<cl_kernel> *clKernelsToCleanUp) override;
+
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
-class DilateStepOperation : public NodeOperation {
+class DilateStepOperation : public MultiThreadedOperation {
  protected:
   /**
    * Cached reference to the inputProgram
@@ -174,6 +196,11 @@ class DilateStepOperation : public NodeOperation {
   bool determineDependingAreaOfInterest(rcti *input,
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) final;
+  virtual void update_memory_buffer_partial(MemoryBuffer *output,
+                                            const rcti &area,
+                                            Span<MemoryBuffer *> inputs) override;
 };
 
 class ErodeStepOperation : public DilateStepOperation {
@@ -181,6 +208,9 @@ class ErodeStepOperation : public DilateStepOperation {
   ErodeStepOperation();
 
   void *initializeTileData(rcti *rect) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc b/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc
index 97bdc25af3b..102025ed915 100644
--- a/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc
+++ b/source/blender/compositor/operations/COM_DirectionalBlurOperation.cc
@@ -146,4 +146,58 @@ bool DirectionalBlurOperation::determineDependingAreaOfInterest(rcti * /*input*/
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void DirectionalBlurOperation::get_area_of_interest(const int input_idx,
+                                                    const rcti &UNUSED(output_area),
+                                                    rcti &r_input_area)
+{
+  BLI_assert(input_idx == 0);
+  UNUSED_VARS_NDEBUG(input_idx);
+  r_input_area.xmin = 0;
+  r_input_area.xmax = this->getWidth();
+  r_input_area.ymin = 0;
+  r_input_area.ymax = this->getHeight();
+}
+
+void DirectionalBlurOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                            const rcti &area,
+                                                            Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *input = inputs[0];
+  const int iterations = pow(2.0f, this->m_data->iter);
+  for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
+    const int x = it.x;
+    const int y = it.y;
+    float color_accum[4];
+    input->read_elem_bilinear(x, y, color_accum);
+
+    /* Blur pixel. */
+    /* TODO(manzanilla): Many values used on iterations can be calculated beforehand. Create a
+     * table on operation initialization. */
+    float ltx = this->m_tx;
+    float lty = this->m_ty;
+    float lsc = this->m_sc;
+    float lrot = this->m_rot;
+    for (int i = 0; i < iterations; i++) {
+      const float cs = cosf(lrot), ss = sinf(lrot);
+      const float isc = 1.0f / (1.0f + lsc);
+
+      const float v = isc * (y - this->m_center_y_pix) + lty;
+      const float u = isc * (x - this->m_center_x_pix) + ltx;
+
+      float color[4];
+      input->read_elem_bilinear(
+          cs * u + ss * v + this->m_center_x_pix, cs * v - ss * u + this->m_center_y_pix, color);
+      add_v4_v4(color_accum, color);
+
+      /* Double transformations. */
+      ltx += this->m_tx;
+      lty += this->m_ty;
+      lrot += this->m_rot;
+      lsc += this->m_sc;
+    }
+
+    mul_v4_v4fl(it.out, color_accum, 1.0f / (iterations + 1));
+  }
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_DirectionalBlurOperation.h b/source/blender/compositor/operations/COM_DirectionalBlurOperation.h
index 5555520462b..9a982bf6481 100644
--- a/source/blender/compositor/operations/COM_DirectionalBlurOperation.h
+++ b/source/blender/compositor/operations/COM_DirectionalBlurOperation.h
@@ -18,12 +18,12 @@
 
 #pragma once
 
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
 #include "COM_QualityStepHelper.h"
 
 namespace blender::compositor {
 
-class DirectionalBlurOperation : public NodeOperation, public QualityStepHelper {
+class DirectionalBlurOperation : public MultiThreadedOperation, public QualityStepHelper {
  private:
   SocketReader *m_inputProgram;
   NodeDBlurData *m_data;
@@ -65,6 +65,11 @@ class DirectionalBlurOperation : public NodeOperation, public QualityStepHelper
                      MemoryBuffer **inputMemoryBuffers,
                      std::list<cl_mem> *clMemToCleanUp,
                      std::list<cl_kernel> *clKernelsToCleanUp) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_InpaintOperation.cc b/source/blender/compositor/operations/COM_InpaintOperation.cc
index bfcd504177f..5e76c41752c 100644
--- a/source/blender/compositor/operations/COM_InpaintOperation.cc
+++ b/source/blender/compositor/operations/COM_InpaintOperation.cc
@@ -39,6 +39,7 @@ InpaintSimpleOperation::InpaintSimpleOperation()
   this->m_manhattan_distance = nullptr;
   this->m_cached_buffer = nullptr;
   this->m_cached_buffer_ready = false;
+  flags.is_fullframe_operation = true;
 }
 void InpaintSimpleOperation::initExecution()
 {
@@ -286,4 +287,47 @@ bool InpaintSimpleOperation::determineDependingAreaOfInterest(rcti * /*input*/,
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void InpaintSimpleOperation::get_area_of_interest(const int input_idx,
+                                                  const rcti &UNUSED(output_area),
+                                                  rcti &r_input_area)
+{
+  BLI_assert(input_idx == 0);
+  UNUSED_VARS_NDEBUG(input_idx);
+  r_input_area.xmin = 0;
+  r_input_area.xmax = this->getWidth();
+  r_input_area.ymin = 0;
+  r_input_area.ymax = this->getHeight();
+}
+
+void InpaintSimpleOperation::update_memory_buffer(MemoryBuffer *output,
+                                                  const rcti &area,
+                                                  Span<MemoryBuffer *> inputs)
+{
+  /* TODO(manzanilla): once tiled implementation is removed, run multi-threaded where possible. */
+  MemoryBuffer *input = inputs[0];
+  if (!m_cached_buffer_ready) {
+    if (input->is_a_single_elem()) {
+      MemoryBuffer *tmp = input->inflate();
+      m_cached_buffer = tmp->release_ownership_buffer();
+      delete tmp;
+    }
+    else {
+      m_cached_buffer = (float *)MEM_dupallocN(input->getBuffer());
+    }
+
+    this->calc_manhattan_distance();
+
+    int curr = 0;
+    int x, y;
+    while (this->next_pixel(x, y, curr, this->m_iterations)) {
+      this->pix_step(x, y);
+    }
+    m_cached_buffer_ready = true;
+  }
+
+  const int num_channels = COM_data_type_num_channels(getOutputSocket()->getDataType());
+  MemoryBuffer buf(m_cached_buffer, num_channels, input->getWidth(), input->getHeight());
+  output->copy_from(&buf, area);
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_InpaintOperation.h b/source/blender/compositor/operations/COM_InpaintOperation.h
index e3d27bf7704..e11610bd263 100644
--- a/source/blender/compositor/operations/COM_InpaintOperation.h
+++ b/source/blender/compositor/operations/COM_InpaintOperation.h
@@ -66,6 +66,13 @@ class InpaintSimpleOperation : public NodeOperation {
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
 
+  void get_area_of_interest(const int input_idx,
+                            const rcti &output_area,
+                            rcti &r_input_area) override;
+  void update_memory_buffer(MemoryBuffer *output,
+                            const rcti &area,
+                            Span<MemoryBuffer *> inputs) override;
+
  private:
   void calc_manhattan_distance();
   void clamp_xy(int &x, int &y);
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.cc b/source/blender/compositor/operations/COM_SMAAOperation.cc
index b078d85372d..4153b9c8523 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.cc
+++ b/source/blender/compositor/operations/COM_SMAAOperation.cc
@@ -61,6 +61,8 @@ namespace blender::compositor {
 /*-----------------------------------------------------------------------------*/
 /* Internal Functions to Sample Pixel Color from Image */
 
+/* TODO(manzanilla): to be removed with tiled implementation. Replace it with
+ * #buffer->read_elem_checked. */
 static inline void sample(SocketReader *reader, int x, int y, float color[4])
 {
   if (x < 0 || x >= reader->getWidth() || y < 0 || y >= reader->getHeight()) {
@@ -71,8 +73,13 @@ static inline void sample(SocketReader *reader, int x, int y, float color[4])
   reader->read(color, x, y, nullptr);
 }
 
-static void sample_bilinear_vertical(
-    SocketReader *reader, int x, int y, float yoffset, float color[4])
+static inline void sample(MemoryBuffer *reader, int x, int y, float color[4])
+{
+  reader->read_elem_checked(x, y, color);
+}
+
+template<typename T>
+static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4])
 {
   float iy = floorf(yoffset);
   float fy = yoffset - iy;
@@ -89,8 +96,8 @@ static void sample_bilinear_vertical(
   color[3] = interpf(color01[3], color00[3], fy);
 }
 
-static void sample_bilinear_horizontal(
-    SocketReader *reader, int x, int y, float xoffset, float color[4])
+template<typename T>
+static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4])
 {
   float ix = floorf(xoffset);
   float fx = xoffset - ix;
@@ -162,7 +169,7 @@ static void area_diag(int d1, int d2, int e1, int e2, float weights[2])
 SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation()
 {
   this->addInputSocket(DataType::Color); /* image */
-  this->addInputSocket(DataType::Value); /* depth, material ID, etc. */
+  this->addInputSocket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */
   this->addOutputSocket(DataType::Color);
   this->flags.complex = true;
   this->m_imageReader = nullptr;
@@ -207,6 +214,16 @@ bool SMAAEdgeDetectionOperation::determineDependingAreaOfInterest(
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void SMAAEdgeDetectionOperation::get_area_of_interest(const int UNUSED(input_idx),
+                                                      const rcti &output_area,
+                                                      rcti &r_input_area)
+{
+  r_input_area.xmax = output_area.xmax + 1;
+  r_input_area.xmin = output_area.xmin - 2;
+  r_input_area.ymax = output_area.ymax + 1;
+  r_input_area.ymin = output_area.ymin - 2;
+}
+
 void SMAAEdgeDetectionOperation::executePixel(float output[4], int x, int y, void * /*data*/)
 {
   float color[4];
@@ -288,6 +305,94 @@ void SMAAEdgeDetectionOperation::executePixel(float output[4], int x, int y, voi
   }
 }
 
+void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                              const rcti &area,
+                                                              Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *image = inputs[0];
+  for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
+    float color[4];
+    const int x = it.x;
+    const int y = it.y;
+
+    /* Calculate luma deltas: */
+    image->read_elem_checked(x, y, color);
+    const float L = IMB_colormanagement_get_luminance(color);
+    image->read_elem_checked(x - 1, y, color);
+    const float Lleft = IMB_colormanagement_get_luminance(color);
+    image->read_elem_checked(x, y - 1, color);
+    const float Ltop = IMB_colormanagement_get_luminance(color);
+    const float Dleft = fabsf(L - Lleft);
+    const float Dtop = fabsf(L - Ltop);
+
+    /* We do the usual threshold: */
+    it.out[0] = (x > 0 && Dleft >= m_threshold) ? 1.0f : 0.0f;
+    it.out[1] = (y > 0 && Dtop >= m_threshold) ? 1.0f : 0.0f;
+    it.out[2] = 0.0f;
+    it.out[3] = 1.0f;
+
+    /* Then discard if there is no edge: */
+    if (is_zero_v2(it.out)) {
+      continue;
+    }
+
+    /* Calculate right and bottom deltas: */
+    image->read_elem_checked(x + 1, y, color);
+    const float Lright = IMB_colormanagement_get_luminance(color);
+    image->read_elem_checked(x, y + 1, color);
+    const float Lbottom = IMB_colormanagement_get_luminance(color);
+    const float Dright = fabsf(L - Lright);
+    const float Dbottom = fabsf(L - Lbottom);
+
+    /* Calculate the maximum delta in the direct neighborhood: */
+    float maxDelta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom));
+
+    /* Calculate luma used for both left and top edges: */
+    image->read_elem_checked(x - 1, y - 1, color);
+    const float Llefttop = IMB_colormanagement_get_luminance(color);
+
+    /* Left edge */
+    if (it.out[0] != 0.0f) {
+      /* Calculate deltas around the left pixel: */
+      image->read_elem_checked(x - 2, y, color);
+      const float Lleftleft = IMB_colormanagement_get_luminance(color);
+      image->read_elem_checked(x - 1, y + 1, color);
+      const float Lleftbottom = IMB_colormanagement_get_luminance(color);
+      const float Dleftleft = fabsf(Lleft - Lleftleft);
+      const float Dlefttop = fabsf(Lleft - Llefttop);
+      const float Dleftbottom = fabsf(Lleft - Lleftbottom);
+
+      /* Calculate the final maximum delta: */
+      maxDelta = fmaxf(maxDelta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom)));
+
+      /* Local contrast adaptation: */
+      if (maxDelta > m_contrast_limit * Dleft) {
+        it.out[0] = 0.0f;
+      }
+    }
+
+    /* Top edge */
+    if (it.out[1] != 0.0f) {
+      /* Calculate top-top delta: */
+      image->read_elem_checked(x, y - 2, color);
+      const float Ltoptop = IMB_colormanagement_get_luminance(color);
+      image->read_elem_checked(x + 1, y - 1, color);
+      const float Ltopright = IMB_colormanagement_get_luminance(color);
+      const float Dtoptop = fabsf(Ltop - Ltoptop);
+      const float Dtopleft = fabsf(Ltop - Llefttop);
+      const float Dtopright = fabsf(Ltop - Ltopright);
+
+      /* Calculate the final maximum delta: */
+      maxDelta = fmaxf(maxDelta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright)));
+
+      /* Local contrast adaptation: */
+      if (maxDelta > m_contrast_limit * Dtop) {
+        it.out[1] = 0.0f;
+      }
+    }
+  }
+}
+
 /*-----------------------------------------------------------------------------*/
 /* Blending Weight Calculation (Second Pass) */
 /*-----------------------------------------------------------------------------*/
@@ -309,6 +414,9 @@ void *SMAABlendingWeightCalculationOperation::initializeTileData(rcti *rect)
 void SMAABlendingWeightCalculationOperation::initExecution()
 {
   this->m_imageReader = this->getInputSocketReader(0);
+  if (execution_model_ == eExecutionModel::Tiled) {
+    sample_image_fn_ = [=](int x, int y, float *out) { sample(m_imageReader, x, y, out); };
+  }
 }
 
 void SMAABlendingWeightCalculationOperation::setCornerRounding(float rounding)
@@ -414,6 +522,113 @@ void SMAABlendingWeightCalculationOperation::executePixel(float output[4],
   }
 }
 
+void SMAABlendingWeightCalculationOperation::update_memory_buffer_started(
+    MemoryBuffer *UNUSED(output), const rcti &UNUSED(out_area), Span<MemoryBuffer *> inputs)
+{
+  const MemoryBuffer *image = inputs[0];
+  sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); };
+}
+
+void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial(
+    MemoryBuffer *output, const rcti &out_area, Span<MemoryBuffer *> UNUSED(inputs))
+{
+  for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
+    const int x = it.x;
+    const int y = it.y;
+    zero_v4(it.out);
+
+    float edges[4];
+    sample_image_fn_(x, y, edges);
+
+    /* Edge at north */
+    float c[4];
+    if (edges[1] > 0.0f) {
+      /* Diagonals have both north and west edges, so calculating weights for them */
+      /* in one of the boundaries is enough. */
+      calculateDiagWeights(x, y, edges, it.out);
+
+      /* We give priority to diagonals, so if we find a diagonal we skip. */
+      /* horizontal/vertical processing. */
+      if (!is_zero_v2(it.out)) {
+        continue;
+      }
+
+      /* Find the distance to the left and the right: */
+      int left = searchXLeft(x, y);
+      int right = searchXRight(x, y);
+      int d1 = x - left, d2 = right - x;
+
+      /* Fetch the left and right crossing edges: */
+      int e1 = 0, e2 = 0;
+      sample_image_fn_(left, y - 1, c);
+      if (c[0] > 0.0) {
+        e1 += 1;
+      }
+      sample_image_fn_(left, y, c);
+      if (c[0] > 0.0) {
+        e1 += 2;
+      }
+      sample_image_fn_(right + 1, y - 1, c);
+      if (c[0] > 0.0) {
+        e2 += 1;
+      }
+      sample_image_fn_(right + 1, y, c);
+      if (c[0] > 0.0) {
+        e2 += 2;
+      }
+
+      /* Ok, we know how this pattern looks like, now it is time for getting */
+      /* the actual area: */
+      area(d1, d2, e1, e2, it.out); /* R, G */
+
+      /* Fix corners: */
+      if (m_corner_rounding) {
+        detectHorizontalCornerPattern(it.out, left, right, y, d1, d2);
+      }
+    }
+
+    /* Edge at west */
+    if (edges[0] > 0.0f) {
+      /* Did we already do diagonal search for this west edge from the left neighboring pixel? */
+      if (isVerticalSearchUnneeded(x, y)) {
+        continue;
+      }
+
+      /* Find the distance to the top and the bottom: */
+      int top = searchYUp(x, y);
+      int bottom = searchYDown(x, y);
+      int d1 = y - top, d2 = bottom - y;
+
+      /* Fetch the top and bottom crossing edges: */
+      int e1 = 0, e2 = 0;
+      sample_image_fn_(x - 1, top, c);
+      if (c[1] > 0.0) {
+        e1 += 1;
+      }
+      sample_image_fn_(x, top, c);
+      if (c[1] > 0.0) {
+        e1 += 2;
+      }
+      sample_image_fn_(x - 1, bottom + 1, c);
+      if (c[1] > 0.0) {
+        e2 += 1;
+      }
+      sample_image_fn_(x, bottom + 1, c);
+      if (c[1] > 0.0) {
+        e2 += 2;
+      }
+
+      /* Get the area for this direction: */
+      area(d1, d2, e1, e2, it.out + 2); /* B, A */
+
+      /* Fix corners: */
+      if (m_corner_rounding) {
+        detectVerticalCornerPattern(it.out + 2, x, top, bottom, d1, d2);
+      }
+    }
+  }
+}
+
 void SMAABlendingWeightCalculationOperation::deinitExecution()
 {
   this->m_imageReader = nullptr;
@@ -434,6 +649,19 @@ bool SMAABlendingWeightCalculationOperation::determineDependingAreaOfInterest(
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int UNUSED(input_idx),
+                                                                  const rcti &output_area,
+                                                                  rcti &r_input_area)
+{
+  r_input_area.xmax = output_area.xmax +
+                      fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+  r_input_area.xmin = output_area.xmin -
+                      fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+  r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
+  r_input_area.ymin = output_area.ymin -
+                      fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG);
+}
+
 /*-----------------------------------------------------------------------------*/
 /* Diagonal Search Functions */
 
@@ -449,7 +677,7 @@ int SMAABlendingWeightCalculationOperation::searchDiag1(int x, int y, int dir, b
   while (x != end) {
     x += dir;
     y -= dir;
-    sample(m_imageReader, x, y, e);
+    sample_image_fn_(x, y, e);
     if (e[1] == 0.0f) {
       *found = true;
       break;
@@ -472,12 +700,12 @@ int SMAABlendingWeightCalculationOperation::searchDiag2(int x, int y, int dir, b
   while (x != end) {
     x += dir;
     y += dir;
-    sample(m_imageReader, x, y, e);
+    sample_image_fn_(x, y, e);
     if (e[1] == 0.0f) {
       *found = true;
       break;
     }
-    sample(m_imageReader, x + 1, y, e);
+    sample_image_fn_(x + 1, y, e);
     if (e[0] == 0.0f) {
       *found = true;
       return (dir > 0) ? x : x - dir;
@@ -522,11 +750,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
       /* Fetch the crossing edges: */
       int left = x - d1, bottom = y + d1;
 
-      sample(m_imageReader, left - 1, bottom, c);
+      sample_image_fn_(left - 1, bottom, c);
       if (c[1] > 0.0) {
         e1 += 2;
       }
-      sample(m_imageReader, left, bottom, c);
+      sample_image_fn_(left, bottom, c);
       if (c[0] > 0.0) {
         e1 += 1;
       }
@@ -536,11 +764,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
       /* Fetch the crossing edges: */
       int right = x + d2, top = y - d2;
 
-      sample(m_imageReader, right + 1, top, c);
+      sample_image_fn_(right + 1, top, c);
       if (c[1] > 0.0) {
         e2 += 2;
       }
-      sample(m_imageReader, right + 1, top - 1, c);
+      sample_image_fn_(right + 1, top - 1, c);
       if (c[0] > 0.0) {
         e2 += 1;
       }
@@ -552,7 +780,7 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
 
   /* Search for the line ends: */
   d1 = x - searchDiag2(x, y, -1, &d1_found);
-  sample(m_imageReader, x + 1, y, e);
+  sample_image_fn_(x + 1, y, e);
   if (e[0] > 0.0f) {
     d2 = searchDiag2(x, y, 1, &d2_found) - x;
   }
@@ -568,11 +796,11 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
       /* Fetch the crossing edges: */
       int left = x - d1, top = y - d1;
 
-      sample(m_imageReader, left - 1, top, c);
+      sample_image_fn_(left - 1, top, c);
       if (c[1] > 0.0) {
         e1 += 2;
       }
-      sample(m_imageReader, left, top - 1, c);
+      sample_image_fn_(left, top - 1, c);
       if (c[0] > 0.0) {
         e1 += 1;
       }
@@ -582,7 +810,7 @@ void SMAABlendingWeightCalculationOperation::calculateDiagWeights(int x,
       /* Fetch the crossing edges: */
       int right = x + d2, bottom = y + d2;
 
-      sample(m_imageReader, right + 1, bottom, c);
+      sample_image_fn_(right + 1, bottom, c);
       if (c[1] > 0.0) {
         e2 += 2;
       }
@@ -610,7 +838,7 @@ bool SMAABlendingWeightCalculationOperation::isVerticalSearchUnneeded(int x, int
   }
 
   /* Search for the line ends: */
-  sample(m_imageReader, x - 1, y, e);
+  sample_image_fn_(x - 1, y, e);
   if (e[1] > 0.0f) {
     d1 = x - searchDiag2(x - 1, y, -1, &found);
   }
@@ -631,14 +859,14 @@ int SMAABlendingWeightCalculationOperation::searchXLeft(int x, int y)
   float e[4];
 
   while (x > end) {
-    sample(m_imageReader, x, y, e);
+    sample_image_fn_(x, y, e);
     if (e[1] == 0.0f) { /* Is the edge not activated? */
       break;
     }
     if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       return x;
     }
-    sample(m_imageReader, x, y - 1, e);
+    sample_image_fn_(x, y - 1, e);
     if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       return x;
     }
@@ -655,12 +883,12 @@ int SMAABlendingWeightCalculationOperation::searchXRight(int x, int y)
 
   while (x < end) {
     x++;
-    sample(m_imageReader, x, y, e);
+    sample_image_fn_(x, y, e);
     if (e[1] == 0.0f || /* Is the edge not activated? */
         e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       break;
     }
-    sample(m_imageReader, x, y - 1, e);
+    sample_image_fn_(x, y - 1, e);
     if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       break;
     }
@@ -675,14 +903,14 @@ int SMAABlendingWeightCalculationOperation::searchYUp(int x, int y)
   float e[4];
 
   while (y > end) {
-    sample(m_imageReader, x, y, e);
+    sample_image_fn_(x, y, e);
     if (e[0] == 0.0f) { /* Is the edge not activated? */
       break;
     }
     if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       return y;
     }
-    sample(m_imageReader, x - 1, y, e);
+    sample_image_fn_(x - 1, y, e);
     if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       return y;
     }
@@ -699,12 +927,12 @@ int SMAABlendingWeightCalculationOperation::searchYDown(int x, int y)
 
   while (y < end) {
     y++;
-    sample(m_imageReader, x, y, e);
+    sample_image_fn_(x, y, e);
     if (e[0] == 0.0f || /* Is the edge not activated? */
         e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       break;
     }
-    sample(m_imageReader, x - 1, y, e);
+    sample_image_fn_(x - 1, y, e);
     if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
       break;
     }
@@ -728,16 +956,16 @@ void SMAABlendingWeightCalculationOperation::detectHorizontalCornerPattern(
 
   /* Near the left corner */
   if (d1 <= d2) {
-    sample(m_imageReader, left, y + 1, e);
+    sample_image_fn_(left, y + 1, e);
     factor[0] -= rounding * e[0];
-    sample(m_imageReader, left, y - 2, e);
+    sample_image_fn_(left, y - 2, e);
     factor[1] -= rounding * e[0];
   }
   /* Near the right corner */
   if (d1 >= d2) {
-    sample(m_imageReader, right + 1, y + 1, e);
+    sample_image_fn_(right + 1, y + 1, e);
     factor[0] -= rounding * e[0];
-    sample(m_imageReader, right + 1, y - 2, e);
+    sample_image_fn_(right + 1, y - 2, e);
     factor[1] -= rounding * e[0];
   }
 
@@ -757,16 +985,16 @@ void SMAABlendingWeightCalculationOperation::detectVerticalCornerPattern(
 
   /* Near the top corner */
   if (d1 <= d2) {
-    sample(m_imageReader, x + 1, top, e);
+    sample_image_fn_(x + 1, top, e);
     factor[0] -= rounding * e[1];
-    sample(m_imageReader, x - 2, top, e);
+    sample_image_fn_(x - 2, top, e);
     factor[1] -= rounding * e[1];
   }
   /* Near the bottom corner */
   if (d1 >= d2) {
-    sample(m_imageReader, x + 1, bottom + 1, e);
+    sample_image_fn_(x + 1, bottom + 1, e);
     factor[0] -= rounding * e[1];
-    sample(m_imageReader, x - 2, bottom + 1, e);
+    sample_image_fn_(x - 2, bottom + 1, e);
     factor[1] -= rounding * e[1];
   }
 
@@ -847,6 +1075,59 @@ void SMAANeighborhoodBlendingOperation::executePixel(float output[4],
   madd_v4_v4fl(output, color2, weight2);
 }
 
+void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                                     const rcti &out_area,
+                                                                     Span<MemoryBuffer *> inputs)
+{
+  MemoryBuffer *image1 = inputs[0];
+  MemoryBuffer *image2 = inputs[1];
+  for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
+    const float x = it.x;
+    const float y = it.y;
+    float w[4];
+
+    /* Fetch the blending weights for current pixel: */
+    image2->read_elem_checked(x, y, w);
+    const float left = w[2], top = w[0];
+    image2->read_elem_checked(x + 1, y, w);
+    const float right = w[3];
+    image2->read_elem_checked(x, y + 1, w);
+    const float bottom = w[1];
+
+    /* Is there any blending weight with a value greater than 0.0? */
+    if (right + bottom + left + top < 1e-5f) {
+      image1->read_elem_checked(x, y, it.out);
+      continue;
+    }
+
+    /* Calculate the blending offsets: */
+    void (*sample_fn)(MemoryBuffer * reader, int x, int y, float xoffset, float color[4]);
+    float offset1, offset2, weight1, weight2, color1[4], color2[4];
+
+    if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */
+      sample_fn = sample_bilinear_horizontal;
+      offset1 = right;
+      offset2 = -left;
+      weight1 = right / (right + left);
+      weight2 = left / (right + left);
+    }
+    else {
+      sample_fn = sample_bilinear_vertical;
+      offset1 = bottom;
+      offset2 = -top;
+      weight1 = bottom / (bottom + top);
+      weight2 = top / (bottom + top);
+    }
+
+    /* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */
+    sample_fn(image1, x, y, offset1, color1);
+    sample_fn(image1, x, y, offset2, color2);
+
+    mul_v4_v4fl(it.out, color1, weight1);
+    madd_v4_v4fl(it.out, color2, weight2);
+  }
+}
+
 void SMAANeighborhoodBlendingOperation::deinitExecution()
 {
   this->m_image1Reader = nullptr;
@@ -866,4 +1147,12 @@ bool SMAANeighborhoodBlendingOperation::determineDependingAreaOfInterest(
   return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
 }
 
+void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int UNUSED(input_idx),
+                                                             const rcti &output_area,
+                                                             rcti &r_input_area)
+{
+  r_input_area = output_area;
+  expand_area_for_sampler(r_input_area, PixelSampler::Bilinear);
+}
+
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.h b/source/blender/compositor/operations/COM_SMAAOperation.h
index 781762202b4..91b9299ee43 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.h
+++ b/source/blender/compositor/operations/COM_SMAAOperation.h
@@ -20,14 +20,14 @@
 
 #pragma once
 
-#include "COM_NodeOperation.h"
+#include "COM_MultiThreadedOperation.h"
 
 namespace blender::compositor {
 
 /*-----------------------------------------------------------------------------*/
 /* Edge Detection (First Pass) */
 
-class SMAAEdgeDetectionOperation : public NodeOperation {
+class SMAAEdgeDetectionOperation : public MultiThreadedOperation {
  protected:
   SocketReader *m_imageReader;
   SocketReader *m_valueReader;
@@ -60,15 +60,20 @@ class SMAAEdgeDetectionOperation : public NodeOperation {
   bool determineDependingAreaOfInterest(rcti *input,
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 /*-----------------------------------------------------------------------------*/
 /*  Blending Weight Calculation (Second Pass) */
 
-class SMAABlendingWeightCalculationOperation : public NodeOperation {
+class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation {
  private:
   SocketReader *m_imageReader;
-
+  std::function<void(int x, int y, float *out)> sample_image_fn_;
   int m_corner_rounding;
 
  public:
@@ -96,6 +101,14 @@ class SMAABlendingWeightCalculationOperation : public NodeOperation {
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
 
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer_started(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
+
  private:
   /* Diagonal Search Functions */
   int searchDiag1(int x, int y, int dir, bool *found);
@@ -117,7 +130,7 @@ class SMAABlendingWeightCalculationOperation : public NodeOperation {
 /*-----------------------------------------------------------------------------*/
 /* Neighborhood Blending (Third Pass) */
 
-class SMAANeighborhoodBlendingOperation : public NodeOperation {
+class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation {
  private:
   SocketReader *m_image1Reader;
   SocketReader *m_image2Reader;
@@ -144,6 +157,11 @@ class SMAANeighborhoodBlendingOperation : public NodeOperation {
   bool determineDependingAreaOfInterest(rcti *input,
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+  void update_memory_buffer_partial(MemoryBuffer *output,
+                                    const rcti &area,
+                                    Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_VectorBlurOperation.cc b/source/blender/compositor/operations/COM_VectorBlurOperation.cc
index df65044afc1..5405e6d424a 100644
--- a/source/blender/compositor/operations/COM_VectorBlurOperation.cc
+++ b/source/blender/compositor/operations/COM_VectorBlurOperation.cc
@@ -57,6 +57,7 @@ VectorBlurOperation::VectorBlurOperation()
   this->m_inputSpeedProgram = nullptr;
   this->m_inputZProgram = nullptr;
   flags.complex = true;
+  flags.is_fullframe_operation = true;
 }
 void VectorBlurOperation::initExecution()
 {
@@ -121,6 +122,51 @@ bool VectorBlurOperation::determineDependingAreaOfInterest(rcti * /*input*/,
   return false;
 }
 
+void VectorBlurOperation::get_area_of_interest(const int UNUSED(input_idx),
+                                               const rcti &UNUSED(output_area),
+                                               rcti &r_input_area)
+{
+  r_input_area.xmin = 0;
+  r_input_area.xmax = this->getWidth();
+  r_input_area.ymin = 0;
+  r_input_area.ymax = this->getHeight();
+}
+
+void VectorBlurOperation::update_memory_buffer(MemoryBuffer *output,
+                                               const rcti &area,
+                                               Span<MemoryBuffer *> inputs)
+{
+  /* TODO(manzanilla): once tiled implementation is removed, run multi-threaded where possible. */
+  if (!m_cachedInstance) {
+    MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
+    const bool is_image_inflated = image->is_a_single_elem();
+    image = is_image_inflated ? image->inflate() : image;
+
+    /* Must be a copy because it's modified in #generateVectorBlur. */
+    MemoryBuffer *speed = inputs[SPEED_INPUT_INDEX];
+    speed = speed->is_a_single_elem() ? speed->inflate() : new MemoryBuffer(*speed);
+
+    MemoryBuffer *z = inputs[Z_INPUT_INDEX];
+    const bool is_z_inflated = z->is_a_single_elem();
+    z = is_z_inflated ? z->inflate() : z;
+
+    m_cachedInstance = (float *)MEM_dupallocN(image->getBuffer());
+    this->generateVectorBlur(m_cachedInstance, image, speed, z);
+
+    if (is_image_inflated) {
+      delete image;
+    }
+    delete speed;
+    if (is_z_inflated) {
+      delete z;
+    }
+  }
+
+  const int num_channels = COM_data_type_num_channels(getOutputSocket()->getDataType());
+  MemoryBuffer buf(m_cachedInstance, num_channels, this->getWidth(), this->getHeight());
+  output->copy_from(&buf, area);
+}
+
 void VectorBlurOperation::generateVectorBlur(float *data,
                                              MemoryBuffer *inputImage,
                                              MemoryBuffer *inputSpeed,
diff --git a/source/blender/compositor/operations/COM_VectorBlurOperation.h b/source/blender/compositor/operations/COM_VectorBlurOperation.h
index dfcf1fb16f7..c30c150db3c 100644
--- a/source/blender/compositor/operations/COM_VectorBlurOperation.h
+++ b/source/blender/compositor/operations/COM_VectorBlurOperation.h
@@ -26,6 +26,10 @@ namespace blender::compositor {
 
 class VectorBlurOperation : public NodeOperation, public QualityStepHelper {
  private:
+  static constexpr int IMAGE_INPUT_INDEX = 0;
+  static constexpr int Z_INPUT_INDEX = 1;
+  static constexpr int SPEED_INPUT_INDEX = 2;
+
   /**
    * \brief Cached reference to the inputProgram
    */
@@ -68,6 +72,13 @@ class VectorBlurOperation : public NodeOperation, public QualityStepHelper {
                                         ReadBufferOperation *readOperation,
                                         rcti *output) override;
 
+  void get_area_of_interest(const int input_idx,
+                            const rcti &output_area,
+                            rcti &r_input_area) override;
+  void update_memory_buffer(MemoryBuffer *output,
+                            const rcti &area,
+                            Span<MemoryBuffer *> inputs) override;
+
  protected:
   void generateVectorBlur(float *data,
                           MemoryBuffer *inputImage,
-- 
cgit v1.2.3