1 files changed, 154 insertions, 0 deletions
diff --git a/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.cc b/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.cc
new file mode 100644
index 00000000000..959f599fab4
--- /dev/null
+++ b/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.cc
@@ -0,0 +1,154 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#include "COM_GaussianBlurBaseOperation.h"
+
+namespace blender::compositor {
+
+GaussianBlurBaseOperation::GaussianBlurBaseOperation(eDimension dim)
+    : BlurBaseOperation(DataType::Color)
+{
+  m_gausstab = nullptr;
+#ifdef BLI_HAVE_SSE2
+  m_gausstab_sse = nullptr;
+#endif
+  m_filtersize = 0;
+  rad_ = 0.0f;
+  dimension_ = dim;
+}
+
+void GaussianBlurBaseOperation::init_data()
+{
+  BlurBaseOperation::init_data();
+  if (execution_model_ == eExecutionModel::FullFrame) {
+    rad_ = max_ff(m_size * this->get_blur_size(dimension_), 0.0f);
+    rad_ = min_ff(rad_, MAX_GAUSSTAB_RADIUS);
+    m_filtersize = min_ii(ceil(rad_), MAX_GAUSSTAB_RADIUS);
+  }
+}
+
+void GaussianBlurBaseOperation::initExecution()
+{
+  BlurBaseOperation::initExecution();
+  if (execution_model_ == eExecutionModel::FullFrame) {
+    m_gausstab = BlurBaseOperation::make_gausstab(rad_, m_filtersize);
+#ifdef BLI_HAVE_SSE2
+    m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(m_gausstab, m_filtersize);
+#endif
+  }
+}
+
+void GaussianBlurBaseOperation::deinitExecution()
+{
+  BlurBaseOperation::deinitExecution();
+
+  if (m_gausstab) {
+    MEM_freeN(m_gausstab);
+    m_gausstab = nullptr;
+  }
+#ifdef BLI_HAVE_SSE2
+  if (m_gausstab_sse) {
+    MEM_freeN(m_gausstab_sse);
+    m_gausstab_sse = nullptr;
+  }
+#endif
+}
+
+void GaussianBlurBaseOperation::get_area_of_interest(const int input_idx,
+                                                     const rcti &output_area,
+                                                     rcti &r_input_area)
+{
+  if (input_idx != IMAGE_INPUT_INDEX) {
+    BlurBaseOperation::get_area_of_interest(input_idx, output_area, r_input_area);
+    return;
+  }
+
+  r_input_area = output_area;
+  switch (dimension_) {
+    case eDimension::X:
+      r_input_area.xmin = output_area.xmin - m_filtersize - 1;
+      r_input_area.xmax = output_area.xmax + m_filtersize + 1;
+      break;
+    case eDimension::Y:
+      r_input_area.ymin = output_area.ymin - m_filtersize - 1;
+      r_input_area.ymax = output_area.ymax + m_filtersize + 1;
+      break;
+  }
+}
+
+void GaussianBlurBaseOperation::update_memory_buffer_partial(MemoryBuffer *output,
+                                                             const rcti &area,
+                                                             Span<MemoryBuffer *> inputs)
+{
+  MemoryBuffer *input = inputs[IMAGE_INPUT_INDEX];
+  const rcti &input_rect = input->get_rect();
+  BuffersIterator<float> it = output->iterate_with({input}, area);
+
+  int min_input_coord = -1;
+  int max_input_coord = -1;
+  int elem_stride = -1;
+  std::function<int()> get_current_coord;
+  switch (dimension_) {
+    case eDimension::X:
+      min_input_coord = input_rect.xmin;
+      max_input_coord = input_rect.xmax;
+      elem_stride = input->elem_stride;
+      get_current_coord = [&] { return it.x; };
+      break;
+    case eDimension::Y:
+      min_input_coord = input_rect.ymin;
+      max_input_coord = input_rect.ymax;
+      elem_stride = input->row_stride;
+      get_current_coord = [&] { return it.y; };
+      break;
+  }
+
+  for (; !it.is_end(); ++it) {
+    const int coord = get_current_coord();
+    const int coord_min = max_ii(coord - m_filtersize, min_input_coord);
+    const int coord_max = min_ii(coord + m_filtersize + 1, max_input_coord);
+
+    float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+    float multiplier_accum = 0.0f;
+
+    const int step = QualityStepHelper::getStep();
+    const float *in = it.in(0) + ((intptr_t)coord_min - coord) * elem_stride;
+    const int in_stride = elem_stride * step;
+    int gauss_idx = (coord_min - coord) + m_filtersize;
+    const int gauss_end = gauss_idx + (coord_max - coord_min);
+#ifdef BLI_HAVE_SSE2
+    __m128 accum_r = _mm_load_ps(color_accum);
+    for (; gauss_idx < gauss_end; in += in_stride, gauss_idx += step) {
+      __m128 reg_a = _mm_load_ps(in);
+      reg_a = _mm_mul_ps(reg_a, m_gausstab_sse[gauss_idx]);
+      accum_r = _mm_add_ps(accum_r, reg_a);
+      multiplier_accum += m_gausstab[gauss_idx];
+    }
+    _mm_store_ps(color_accum, accum_r);
+#else
+    for (; gauss_idx < gauss_end; in += in_stride, gauss_idx += step) {
+      const float multiplier = m_gausstab[gauss_idx];
+      madd_v4_v4fl(color_accum, in, multiplier);
+      multiplier_accum += multiplier;
+    }
+#endif
+    mul_v4_v4fl(it.out, color_accum, 1.0f / multiplier_accum);
+  }
+}
+
+}  // namespace blender::compositor