diff options
author | Manuel Castilla <manzanillawork@gmail.com> | 2021-08-08 15:50:20 +0300 |
---|---|---|
committer | Manuel Castilla <manzanillawork@gmail.com> | 2021-08-08 18:11:54 +0300 |
commit | 723c45a8e60077b570e20d6b1b774c83d82a8e70 (patch) | |
tree | 538b7c58e088d4404c84300dbad7b57a8f633a41 /source/blender/compositor/operations | |
parent | 80484794165c8a0a62dcb6309c111dede6f0c9be (diff) |
Compositor: Full frame Bokeh Blur and Blur nodes
Diffstat (limited to 'source/blender/compositor/operations')
24 files changed, 1309 insertions, 174 deletions
diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.cc b/source/blender/compositor/operations/COM_BlurBaseOperation.cc index 8b73624ca79..a1075e9feb9 100644 --- a/source/blender/compositor/operations/COM_BlurBaseOperation.cc +++ b/source/blender/compositor/operations/COM_BlurBaseOperation.cc @@ -17,6 +17,8 @@ */ #include "COM_BlurBaseOperation.h" +#include "COM_ConstantOperation.h" + #include "BLI_math.h" #include "MEM_guardedalloc.h" @@ -36,11 +38,15 @@ BlurBaseOperation::BlurBaseOperation(DataType data_type) this->m_size = 1.0f; this->m_sizeavailable = false; this->m_extend_bounds = false; + use_variable_size_ = false; } -void BlurBaseOperation::initExecution() + +void BlurBaseOperation::init_data() { - this->m_inputProgram = this->getInputSocketReader(0); - this->m_inputSize = this->getInputSocketReader(1); + if (execution_model_ == eExecutionModel::FullFrame) { + updateSize(); + } + this->m_data.image_in_width = this->getWidth(); this->m_data.image_in_height = this->getHeight(); if (this->m_data.relative) { @@ -61,6 +67,12 @@ void BlurBaseOperation::initExecution() this->m_data.sizex = round_fl_to_int(this->m_data.percentx * 0.01f * sizex); this->m_data.sizey = round_fl_to_int(this->m_data.percenty * 0.01f * sizey); } +} + +void BlurBaseOperation::initExecution() +{ + this->m_inputProgram = this->getInputSocketReader(0); + this->m_inputSize = this->getInputSocketReader(1); QualityStepHelper::initExecution(COM_QH_MULTIPLY); } @@ -165,23 +177,87 @@ void BlurBaseOperation::setData(const NodeBlurData *data) memcpy(&m_data, data, sizeof(NodeBlurData)); } +int BlurBaseOperation::get_blur_size(eDimension dim) const +{ + switch (dim) { + case eDimension::X: + return m_data.sizex; + case eDimension::Y: + return m_data.sizey; + } + return -1; +} + void BlurBaseOperation::updateSize() { - if (!this->m_sizeavailable) { - float result[4]; - this->getInputSocketReader(1)->readSampled(result, 0, 0, PixelSampler::Nearest); - this->m_size = result[0]; - this->m_sizeavailable = true; + if (this->m_sizeavailable || use_variable_size_) { + return; + } + + switch (execution_model_) { + case eExecutionModel::Tiled: { + float result[4]; + this->getInputSocketReader(1)->readSampled(result, 0, 0, PixelSampler::Nearest); + this->m_size = result[0]; + break; + } + case eExecutionModel::FullFrame: { + NodeOperation *size_input = get_input_operation(SIZE_INPUT_INDEX); + if (size_input->get_flags().is_constant_operation) { + m_size = *static_cast<ConstantOperation *>(size_input)->get_constant_elem(); + } /* Else use default. */ + break; + } } + this->m_sizeavailable = true; +} + +static int round_to_even(float value) +{ + return ceilf(value * 0.5f) * 2.0f; } void BlurBaseOperation::determineResolution(unsigned int resolution[2], unsigned int preferredResolution[2]) { - NodeOperation::determineResolution(resolution, preferredResolution); - if (this->m_extend_bounds) { - resolution[0] += 2 * this->m_size * m_data.sizex; - resolution[1] += 2 * this->m_size * m_data.sizey; + if (!m_extend_bounds) { + NodeOperation::determineResolution(resolution, preferredResolution); + return; + } + + switch (execution_model_) { + case eExecutionModel::Tiled: { + NodeOperation::determineResolution(resolution, preferredResolution); + resolution[0] += 2 * m_size * m_data.sizex; + resolution[1] += 2 * m_size * m_data.sizey; + break; + } + case eExecutionModel::FullFrame: { + /* Setting a modifier ensures all non main inputs have extended bounds as preferred + * resolution, avoiding unnecessary resolution convertions that would hide constant + * operations. */ + set_determined_resolution_modifier([=](unsigned int res[2]) { + /* Rounding to even prevents jiggling in backdrop while switching size values. */ + res[0] += round_to_even(2 * m_size * m_data.sizex); + res[1] += round_to_even(2 * m_size * m_data.sizey); + }); + NodeOperation::determineResolution(resolution, preferredResolution); + break; + } + } +} + +void BlurBaseOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + switch (input_idx) { + case 0: + r_input_area = output_area; + break; + case 1: + r_input_area = use_variable_size_ ? output_area : COM_SINGLE_ELEM_AREA; + break; } } diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.h b/source/blender/compositor/operations/COM_BlurBaseOperation.h index 7937ebd69dc..78b1e919aa6 100644 --- a/source/blender/compositor/operations/COM_BlurBaseOperation.h +++ b/source/blender/compositor/operations/COM_BlurBaseOperation.h @@ -18,7 +18,7 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" #include "COM_QualityStepHelper.h" #define MAX_GAUSSTAB_RADIUS 30000 @@ -27,10 +27,16 @@ namespace blender::compositor { -class BlurBaseOperation : public NodeOperation, public QualityStepHelper { +class BlurBaseOperation : public MultiThreadedOperation, public QualityStepHelper { private: + bool m_extend_bounds; + + protected: + static constexpr int IMAGE_INPUT_INDEX = 0; + static constexpr int SIZE_INPUT_INDEX = 1; + protected: - BlurBaseOperation(DataType data_type); + BlurBaseOperation(DataType data_type8); float *make_gausstab(float rad, int size); #ifdef BLI_HAVE_SSE2 __m128 *convert_gausstab_sse(const float *gausstab, int size); @@ -49,9 +55,11 @@ class BlurBaseOperation : public NodeOperation, public QualityStepHelper { float m_size; bool m_sizeavailable; - bool m_extend_bounds; + /* Flags for inheriting classes. */ + bool use_variable_size_; public: + virtual void init_data() override; /** * Initialize the execution */ @@ -75,8 +83,14 @@ class BlurBaseOperation : public NodeOperation, public QualityStepHelper { this->m_extend_bounds = extend_bounds; } + int get_blur_size(eDimension dim) const; + void determineResolution(unsigned int resolution[2], unsigned int preferredResolution[2]) override; + + virtual void get_area_of_interest(int input_idx, + const rcti &output_area, + rcti &r_input_area) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_BokehBlurOperation.cc b/source/blender/compositor/operations/COM_BokehBlurOperation.cc index 3f98732b403..a0172fabde4 100644 --- a/source/blender/compositor/operations/COM_BokehBlurOperation.cc +++ b/source/blender/compositor/operations/COM_BokehBlurOperation.cc @@ -17,6 +17,8 @@ */ #include "COM_BokehBlurOperation.h" +#include "COM_ConstantOperation.h" + #include "BLI_math.h" #include "COM_OpenCLDevice.h" @@ -24,6 +26,11 @@ namespace blender::compositor { +constexpr int IMAGE_INPUT_INDEX = 0; +constexpr int BOKEH_INPUT_INDEX = 1; +constexpr int BOUNDING_BOX_INPUT_INDEX = 2; +constexpr int SIZE_INPUT_INDEX = 3; + BokehBlurOperation::BokehBlurOperation() { this->addInputSocket(DataType::Color); @@ -44,6 +51,23 @@ BokehBlurOperation::BokehBlurOperation() this->m_extend_bounds = false; } +void BokehBlurOperation::init_data() +{ + if (execution_model_ == eExecutionModel::FullFrame) { + updateSize(); + } + + NodeOperation *bokeh = get_input_operation(BOKEH_INPUT_INDEX); + const int width = bokeh->getWidth(); + const int height = bokeh->getHeight(); + + const float dimension = MIN2(width, height); + + m_bokehMidX = width / 2.0f; + m_bokehMidY = height / 2.0f; + m_bokehDimension = dimension / 2.0f; +} + void *BokehBlurOperation::initializeTileData(rcti * /*rect*/) { lockMutex(); @@ -58,18 +82,11 @@ void *BokehBlurOperation::initializeTileData(rcti * /*rect*/) void BokehBlurOperation::initExecution() { initMutex(); + this->m_inputProgram = getInputSocketReader(0); this->m_inputBokehProgram = getInputSocketReader(1); this->m_inputBoundingBoxReader = getInputSocketReader(2); - int width = this->m_inputBokehProgram->getWidth(); - int height = this->m_inputBokehProgram->getHeight(); - - float dimension = MIN2(width, height); - - this->m_bokehMidX = width / 2.0f; - this->m_bokehMidY = height / 2.0f; - this->m_bokehDimension = dimension / 2.0f; QualityStepHelper::initExecution(COM_QH_INCREASE); } @@ -225,23 +242,151 @@ void BokehBlurOperation::executeOpenCL(OpenCLDevice *device, void BokehBlurOperation::updateSize() { - if (!this->m_sizeavailable) { - float result[4]; - this->getInputSocketReader(3)->readSampled(result, 0, 0, PixelSampler::Nearest); - this->m_size = result[0]; - CLAMP(this->m_size, 0.0f, 10.0f); - this->m_sizeavailable = true; + if (this->m_sizeavailable) { + return; + } + + switch (execution_model_) { + case eExecutionModel::Tiled: { + float result[4]; + this->getInputSocketReader(3)->readSampled(result, 0, 0, PixelSampler::Nearest); + this->m_size = result[0]; + CLAMP(this->m_size, 0.0f, 10.0f); + break; + } + case eExecutionModel::FullFrame: { + NodeOperation *size_input = get_input_operation(SIZE_INPUT_INDEX); + if (size_input->get_flags().is_constant_operation) { + m_size = *static_cast<ConstantOperation *>(size_input)->get_constant_elem(); + CLAMP(m_size, 0.0f, 10.0f); + } /* Else use default. */ + break; + } } + this->m_sizeavailable = true; +} + +static float round_to_even(float value) +{ + return roundf(value * 0.5f) * 2.0f; } void BokehBlurOperation::determineResolution(unsigned int resolution[2], unsigned int preferredResolution[2]) { - NodeOperation::determineResolution(resolution, preferredResolution); - if (this->m_extend_bounds) { - const float max_dim = MAX2(resolution[0], resolution[1]); - resolution[0] += 2 * this->m_size * max_dim / 100.0f; - resolution[1] += 2 * this->m_size * max_dim / 100.0f; + if (!m_extend_bounds) { + NodeOperation::determineResolution(resolution, preferredResolution); + return; + } + + switch (execution_model_) { + case eExecutionModel::Tiled: { + NodeOperation::determineResolution(resolution, preferredResolution); + const float max_dim = MAX2(resolution[0], resolution[1]); + resolution[0] += 2 * this->m_size * max_dim / 100.0f; + resolution[1] += 2 * this->m_size * max_dim / 100.0f; + break; + } + case eExecutionModel::FullFrame: { + set_determined_resolution_modifier([=](unsigned int res[2]) { + const float max_dim = MAX2(res[0], res[1]); + /* Rounding to even prevents image jiggling in backdrop while switching size values. */ + float add_size = round_to_even(2 * this->m_size * max_dim / 100.0f); + res[0] += add_size; + res[1] += add_size; + }); + NodeOperation::determineResolution(resolution, preferredResolution); + break; + } + } +} + +void BokehBlurOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + switch (input_idx) { + case IMAGE_INPUT_INDEX: { + const float max_dim = MAX2(this->getWidth(), this->getHeight()); + const float add_size = m_size * max_dim / 100.0f; + r_input_area.xmin = output_area.xmin - add_size; + r_input_area.xmax = output_area.xmax + add_size; + r_input_area.ymin = output_area.ymin - add_size; + r_input_area.ymax = output_area.ymax + add_size; + break; + } + case BOKEH_INPUT_INDEX: { + NodeOperation *bokeh_input = getInputOperation(BOKEH_INPUT_INDEX); + r_input_area.xmin = 0; + r_input_area.xmax = bokeh_input->getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = bokeh_input->getHeight(); + break; + } + case BOUNDING_BOX_INPUT_INDEX: + r_input_area = output_area; + break; + case SIZE_INPUT_INDEX: { + r_input_area = COM_SINGLE_ELEM_AREA; + break; + } + } +} + +void BokehBlurOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + const float max_dim = MAX2(this->getWidth(), this->getHeight()); + const int pixel_size = m_size * max_dim / 100.0f; + const float m = m_bokehDimension / pixel_size; + + const MemoryBuffer *image_input = inputs[IMAGE_INPUT_INDEX]; + const MemoryBuffer *bokeh_input = inputs[BOKEH_INPUT_INDEX]; + MemoryBuffer *bounding_input = inputs[BOUNDING_BOX_INPUT_INDEX]; + BuffersIterator<float> it = output->iterate_with({bounding_input}, area); + const rcti &image_rect = image_input->get_rect(); + for (; !it.is_end(); ++it) { + const int x = it.x; + const int y = it.y; + const float bounding_box = *it.in(0); + if (bounding_box <= 0.0f) { + image_input->read_elem(x, y, it.out); + continue; + } + + float color_accum[4] = {0}; + float multiplier_accum[4] = {0}; + if (pixel_size < 2) { + image_input->read_elem(x, y, color_accum); + multiplier_accum[0] = 1.0f; + multiplier_accum[1] = 1.0f; + multiplier_accum[2] = 1.0f; + multiplier_accum[3] = 1.0f; + } + const int miny = MAX2(y - pixel_size, image_rect.ymin); + const int maxy = MIN2(y + pixel_size, image_rect.ymax); + const int minx = MAX2(x - pixel_size, image_rect.xmin); + const int maxx = MIN2(x + pixel_size, image_rect.xmax); + const int step = getStep(); + const int elem_stride = image_input->elem_stride * step; + const int row_stride = image_input->row_stride * step; + const float *row_color = image_input->get_elem(minx, miny); + for (int ny = miny; ny < maxy; ny += step, row_color += row_stride) { + const float *color = row_color; + const float v = m_bokehMidY - (ny - y) * m; + for (int nx = minx; nx < maxx; nx += step, color += elem_stride) { + const float u = m_bokehMidX - (nx - x) * m; + float bokeh[4]; + bokeh_input->read_elem_checked(u, v, bokeh); + madd_v4_v4v4(color_accum, bokeh, color); + add_v4_v4(multiplier_accum, bokeh); + } + } + it.out[0] = color_accum[0] * (1.0f / multiplier_accum[0]); + it.out[1] = color_accum[1] * (1.0f / multiplier_accum[1]); + it.out[2] = color_accum[2] * (1.0f / multiplier_accum[2]); + it.out[3] = color_accum[3] * (1.0f / multiplier_accum[3]); } } diff --git a/source/blender/compositor/operations/COM_BokehBlurOperation.h b/source/blender/compositor/operations/COM_BokehBlurOperation.h index 3ce06adb5d6..59c14305393 100644 --- a/source/blender/compositor/operations/COM_BokehBlurOperation.h +++ b/source/blender/compositor/operations/COM_BokehBlurOperation.h @@ -18,12 +18,12 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" #include "COM_QualityStepHelper.h" namespace blender::compositor { -class BokehBlurOperation : public NodeOperation, public QualityStepHelper { +class BokehBlurOperation : public MultiThreadedOperation, public QualityStepHelper { private: SocketReader *m_inputProgram; SocketReader *m_inputBokehProgram; @@ -31,6 +31,7 @@ class BokehBlurOperation : public NodeOperation, public QualityStepHelper { void updateSize(); float m_size; bool m_sizeavailable; + float m_bokehMidX; float m_bokehMidY; float m_bokehDimension; @@ -39,6 +40,8 @@ class BokehBlurOperation : public NodeOperation, public QualityStepHelper { public: BokehBlurOperation(); + void init_data() override; + void *initializeTileData(rcti *rect) override; /** * The inner loop of this operation. @@ -79,6 +82,11 @@ class BokehBlurOperation : public NodeOperation, public QualityStepHelper { void determineResolution(unsigned int resolution[2], unsigned int preferredResolution[2]) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cc b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cc index 3804e6ec646..e0fc45811cb 100644 --- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cc +++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cc @@ -62,6 +62,13 @@ bool FastGaussianBlurOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void FastGaussianBlurOperation::init_data() +{ + BlurBaseOperation::init_data(); + this->m_sx = this->m_data.sizex * this->m_size / 2.0f; + this->m_sy = this->m_data.sizey * this->m_size / 2.0f; +} + void FastGaussianBlurOperation::initExecution() { BlurBaseOperation::initExecution(); @@ -117,6 +124,7 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, unsigned int chan, unsigned int xy) { + BLI_assert(!src->is_a_single_elem()); double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3]; double *X, *Y, *W; const unsigned int src_width = src->getWidth(); @@ -257,6 +265,64 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, #undef YVV } +void FastGaussianBlurOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + switch (input_idx) { + case IMAGE_INPUT_INDEX: + r_input_area.xmin = 0; + r_input_area.xmax = getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = getHeight(); + break; + default: + BlurBaseOperation::get_area_of_interest(input_idx, output_area, r_input_area); + return; + } +} + +void FastGaussianBlurOperation::update_memory_buffer_started(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + /* TODO(manzanilla): Add a render test and make #IIR_gauss multi-threaded with support for + * an output buffer. */ + const MemoryBuffer *input = inputs[IMAGE_INPUT_INDEX]; + MemoryBuffer *image = nullptr; + const bool is_full_output = BLI_rcti_compare(&output->get_rect(), &area); + if (is_full_output) { + image = output; + } + else { + image = new MemoryBuffer(getOutputSocket()->getDataType(), area); + } + image->copy_from(input, area); + + if ((this->m_sx == this->m_sy) && (this->m_sx > 0.0f)) { + for (const int c : IndexRange(COM_DATA_TYPE_COLOR_CHANNELS)) { + IIR_gauss(image, this->m_sx, c, 3); + } + } + else { + if (this->m_sx > 0.0f) { + for (const int c : IndexRange(COM_DATA_TYPE_COLOR_CHANNELS)) { + IIR_gauss(image, this->m_sx, c, 1); + } + } + if (this->m_sy > 0.0f) { + for (const int c : IndexRange(COM_DATA_TYPE_COLOR_CHANNELS)) { + IIR_gauss(image, this->m_sy, c, 2); + } + } + } + + if (!is_full_output) { + output->copy_from(image, area); + delete image; + } +} + FastGaussianBlurValueOperation::FastGaussianBlurValueOperation() { this->addInputSocket(DataType::Value); @@ -341,4 +407,44 @@ void *FastGaussianBlurValueOperation::initializeTileData(rcti *rect) return this->m_iirgaus; } +void FastGaussianBlurValueOperation::get_area_of_interest(const int UNUSED(input_idx), + const rcti &UNUSED(output_area), + rcti &r_input_area) +{ + r_input_area.xmin = 0; + r_input_area.xmax = getWidth(); + r_input_area.ymin = 0; + r_input_area.ymax = getHeight(); +} + +void FastGaussianBlurValueOperation::update_memory_buffer_started(MemoryBuffer *UNUSED(output), + const rcti &UNUSED(area), + Span<MemoryBuffer *> inputs) +{ + if (m_iirgaus == nullptr) { + const MemoryBuffer *image = inputs[0]; + MemoryBuffer *gauss = new MemoryBuffer(*image); + FastGaussianBlurOperation::IIR_gauss(gauss, m_sigma, 0, 3); + m_iirgaus = gauss; + } +} + +void FastGaussianBlurValueOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + MemoryBuffer *image = inputs[0]; + BuffersIterator<float> it = output->iterate_with({image, m_iirgaus}, area); + if (this->m_overlay == FAST_GAUSS_OVERLAY_MIN) { + for (; !it.is_end(); ++it) { + *it.out = MIN2(*it.in(0), *it.in(1)); + } + } + else if (this->m_overlay == FAST_GAUSS_OVERLAY_MAX) { + for (; !it.is_end(); ++it) { + *it.out = MAX2(*it.in(0), *it.in(1)); + } + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h index c25afe6c4a4..f42fc76a119 100644 --- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h +++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h @@ -38,8 +38,19 @@ class FastGaussianBlurOperation : public BlurBaseOperation { static void IIR_gauss(MemoryBuffer *src, float sigma, unsigned int channel, unsigned int xy); void *initializeTileData(rcti *rect) override; + void init_data() override; void deinitExecution() override; void initExecution() override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_started(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; + void update_memory_buffer_partial(MemoryBuffer *UNUSED(output), + const rcti &UNUSED(area), + Span<MemoryBuffer *> UNUSED(inputs)) override + { + } }; enum { @@ -48,7 +59,7 @@ enum { FAST_GAUSS_OVERLAY_MAX = 1, }; -class FastGaussianBlurValueOperation : public NodeOperation { +class FastGaussianBlurValueOperation : public MultiThreadedOperation { private: float m_sigma; MemoryBuffer *m_iirgaus; @@ -80,6 +91,14 @@ class FastGaussianBlurValueOperation : public NodeOperation { { this->m_overlay = overlay; } + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_started(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GammaCorrectOperation.cc b/source/blender/compositor/operations/COM_GammaCorrectOperation.cc index 16b79fddd06..1bff3b965c6 100644 --- a/source/blender/compositor/operations/COM_GammaCorrectOperation.cc +++ b/source/blender/compositor/operations/COM_GammaCorrectOperation.cc @@ -26,6 +26,7 @@ GammaCorrectOperation::GammaCorrectOperation() this->addInputSocket(DataType::Color); this->addOutputSocket(DataType::Color); this->m_inputProgram = nullptr; + flags.can_be_constant = true; } void GammaCorrectOperation::initExecution() { @@ -58,6 +59,34 @@ void GammaCorrectOperation::executePixelSampled(float output[4], } } +void GammaCorrectOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + const MemoryBuffer *input = inputs[0]; + for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) { + float color[4]; + input->read_elem(it.x, it.y, color); + if (color[3] > 0.0f) { + color[0] /= color[3]; + color[1] /= color[3]; + color[2] /= color[3]; + } + + /* Check for negative to avoid nan's. */ + it.out[0] = color[0] > 0.0f ? color[0] * color[0] : 0.0f; + it.out[1] = color[1] > 0.0f ? color[1] * color[1] : 0.0f; + it.out[2] = color[2] > 0.0f ? color[2] * color[2] : 0.0f; + it.out[3] = color[3]; + + if (color[3] > 0.0f) { + it.out[0] *= color[3]; + it.out[1] *= color[3]; + it.out[2] *= color[3]; + } + } +} + void GammaCorrectOperation::deinitExecution() { this->m_inputProgram = nullptr; @@ -68,6 +97,7 @@ GammaUncorrectOperation::GammaUncorrectOperation() this->addInputSocket(DataType::Color); this->addOutputSocket(DataType::Color); this->m_inputProgram = nullptr; + flags.can_be_constant = true; } void GammaUncorrectOperation::initExecution() { @@ -100,6 +130,33 @@ void GammaUncorrectOperation::executePixelSampled(float output[4], } } +void GammaUncorrectOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + const MemoryBuffer *input = inputs[0]; + for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) { + float color[4]; + input->read_elem(it.x, it.y, color); + if (color[3] > 0.0f) { + color[0] /= color[3]; + color[1] /= color[3]; + color[2] /= color[3]; + } + + it.out[0] = color[0] > 0.0f ? sqrtf(color[0]) : 0.0f; + it.out[1] = color[1] > 0.0f ? sqrtf(color[1]) : 0.0f; + it.out[2] = color[2] > 0.0f ? sqrtf(color[2]) : 0.0f; + it.out[3] = color[3]; + + if (color[3] > 0.0f) { + it.out[0] *= color[3]; + it.out[1] *= color[3]; + it.out[2] *= color[3]; + } + } +} + void GammaUncorrectOperation::deinitExecution() { this->m_inputProgram = nullptr; diff --git a/source/blender/compositor/operations/COM_GammaCorrectOperation.h b/source/blender/compositor/operations/COM_GammaCorrectOperation.h index ac3d45b94b1..2a9fde70e87 100644 --- a/source/blender/compositor/operations/COM_GammaCorrectOperation.h +++ b/source/blender/compositor/operations/COM_GammaCorrectOperation.h @@ -18,11 +18,11 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" namespace blender::compositor { -class GammaCorrectOperation : public NodeOperation { +class GammaCorrectOperation : public MultiThreadedOperation { private: /** * Cached reference to the inputProgram @@ -46,9 +46,13 @@ class GammaCorrectOperation : public NodeOperation { * Deinitialize the execution */ void deinitExecution() override; + + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; -class GammaUncorrectOperation : public NodeOperation { +class GammaUncorrectOperation : public MultiThreadedOperation { private: /** * Cached reference to the inputProgram @@ -72,6 +76,10 @@ class GammaUncorrectOperation : public NodeOperation { * Deinitialize the execution */ void deinitExecution() override; + + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianAlphaBlurBaseOperation.cc b/source/blender/compositor/operations/COM_GaussianAlphaBlurBaseOperation.cc new file mode 100644 index 00000000000..9bdc652b466 --- /dev/null +++ b/source/blender/compositor/operations/COM_GaussianAlphaBlurBaseOperation.cc @@ -0,0 +1,168 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_GaussianAlphaBlurBaseOperation.h" + +namespace blender::compositor { + +GaussianAlphaBlurBaseOperation::GaussianAlphaBlurBaseOperation(eDimension dim) + : BlurBaseOperation(DataType::Value) +{ + this->m_gausstab = nullptr; + this->m_filtersize = 0; + this->m_falloff = -1; /* Intentionally invalid, so we can detect uninitialized values. */ + dimension_ = dim; +} + +void GaussianAlphaBlurBaseOperation::init_data() +{ + BlurBaseOperation::init_data(); + if (execution_model_ == eExecutionModel::FullFrame) { + rad_ = max_ff(m_size * this->get_blur_size(dimension_), 0.0f); + rad_ = min_ff(rad_, MAX_GAUSSTAB_RADIUS); + m_filtersize = min_ii(ceil(rad_), MAX_GAUSSTAB_RADIUS); + } +} + +void GaussianAlphaBlurBaseOperation::initExecution() +{ + BlurBaseOperation::initExecution(); + if (execution_model_ == eExecutionModel::FullFrame) { + m_gausstab = BlurBaseOperation::make_gausstab(rad_, m_filtersize); + m_distbuf_inv = BlurBaseOperation::make_dist_fac_inverse(rad_, m_filtersize, m_falloff); + } +} + +void GaussianAlphaBlurBaseOperation::deinitExecution() +{ + BlurBaseOperation::deinitExecution(); + + if (this->m_gausstab) { + MEM_freeN(this->m_gausstab); + this->m_gausstab = nullptr; + } + + if (this->m_distbuf_inv) { + MEM_freeN(this->m_distbuf_inv); + this->m_distbuf_inv = nullptr; + } +} + +void GaussianAlphaBlurBaseOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + if (input_idx != IMAGE_INPUT_INDEX) { + BlurBaseOperation::get_area_of_interest(input_idx, output_area, r_input_area); + return; + } + + r_input_area = output_area; + switch (dimension_) { + case eDimension::X: + r_input_area.xmin = output_area.xmin - m_filtersize - 1; + r_input_area.xmax = output_area.xmax + m_filtersize + 1; + break; + case eDimension::Y: + r_input_area.ymin = output_area.ymin - m_filtersize - 1; + r_input_area.ymax = output_area.ymax + m_filtersize + 1; + break; + } +} + +BLI_INLINE float finv_test(const float f, const bool test) +{ + return (LIKELY(test == false)) ? f : 1.0f - f; +} + +void GaussianAlphaBlurBaseOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + MemoryBuffer *input = inputs[IMAGE_INPUT_INDEX]; + const rcti &input_rect = input->get_rect(); + BuffersIterator<float> it = output->iterate_with({input}, area); + + int min_input_coord = -1; + int max_input_coord = -1; + int elem_stride = -1; + std::function<int()> get_current_coord; + switch (dimension_) { + case eDimension::X: + min_input_coord = input_rect.xmin; + max_input_coord = input_rect.xmax; + get_current_coord = [&] { return it.x; }; + elem_stride = input->elem_stride; + break; + case eDimension::Y: + min_input_coord = input_rect.ymin; + max_input_coord = input_rect.ymax; + get_current_coord = [&] { return it.y; }; + elem_stride = input->row_stride; + break; + } + + for (; !it.is_end(); ++it) { + const int coord = get_current_coord(); + const int coord_min = max_ii(coord - m_filtersize, min_input_coord); + const int coord_max = min_ii(coord + m_filtersize + 1, max_input_coord); + + /* *** This is the main part which is different to #GaussianBlurBaseOperation. *** */ + /* Gauss. */ + float alpha_accum = 0.0f; + float multiplier_accum = 0.0f; + + /* Dilate. */ + const bool do_invert = m_do_subtract; + /* Init with the current color to avoid unneeded lookups. */ + float value_max = finv_test(*it.in(0), do_invert); + float distfacinv_max = 1.0f; /* 0 to 1 */ + + const int step = QualityStepHelper::getStep(); + const float *in = it.in(0) + ((intptr_t)coord_min - coord) * elem_stride; + const int in_stride = elem_stride * step; + int index = (coord_min - coord) + m_filtersize; + const int index_end = index + (coord_max - coord_min); + for (; index < index_end; in += in_stride, index += step) { + float value = finv_test(*in, do_invert); + + /* Gauss. */ + float multiplier = m_gausstab[index]; + alpha_accum += value * multiplier; + multiplier_accum += multiplier; + + /* Dilate - find most extreme color. */ + if (value > value_max) { + multiplier = m_distbuf_inv[index]; + value *= multiplier; + if (value > value_max) { + value_max = value; + distfacinv_max = multiplier; + } + } + } + + /* Blend between the max value and gauss blue - gives nice feather. */ + const float value_blur = alpha_accum / multiplier_accum; + const float value_final = (value_max * distfacinv_max) + + (value_blur * (1.0f - distfacinv_max)); + *it.out = finv_test(value_final, do_invert); + } +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianAlphaBlurBaseOperation.h b/source/blender/compositor/operations/COM_GaussianAlphaBlurBaseOperation.h new file mode 100644 index 00000000000..d7ca975ca0a --- /dev/null +++ b/source/blender/compositor/operations/COM_GaussianAlphaBlurBaseOperation.h @@ -0,0 +1,62 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "COM_BlurBaseOperation.h" + +namespace blender::compositor { + +class GaussianAlphaBlurBaseOperation : public BlurBaseOperation { + protected: + float *m_gausstab; + float *m_distbuf_inv; + int m_falloff; /* Falloff for #distbuf_inv. */ + bool m_do_subtract; + int m_filtersize; + float rad_; + eDimension dimension_; + + public: + GaussianAlphaBlurBaseOperation(eDimension dim); + + virtual void init_data() override; + virtual void initExecution() override; + virtual void deinitExecution() override; + + void get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) final; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) final; + + /** + * Set subtract for Dilate/Erode functionality + */ + void setSubtract(bool subtract) + { + this->m_do_subtract = subtract; + } + void setFalloff(int falloff) + { + this->m_falloff = falloff; + } +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.cc b/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.cc index 7ca5dc4ca76..6710ed3cf5b 100644 --- a/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.cc +++ b/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.cc @@ -24,11 +24,9 @@ namespace blender::compositor { -GaussianAlphaXBlurOperation::GaussianAlphaXBlurOperation() : BlurBaseOperation(DataType::Value) +GaussianAlphaXBlurOperation::GaussianAlphaXBlurOperation() + : GaussianAlphaBlurBaseOperation(eDimension::X) { - this->m_gausstab = nullptr; - this->m_filtersize = 0; - this->m_falloff = -1; /* intentionally invalid, so we can detect uninitialized values */ } void *GaussianAlphaXBlurOperation::initializeTileData(rcti * /*rect*/) @@ -44,12 +42,11 @@ void *GaussianAlphaXBlurOperation::initializeTileData(rcti * /*rect*/) void GaussianAlphaXBlurOperation::initExecution() { - /* Until we support size input - comment this. */ - // BlurBaseOperation::initExecution(); + GaussianAlphaBlurBaseOperation::initExecution(); initMutex(); - if (this->m_sizeavailable) { + if (this->m_sizeavailable && execution_model_ == eExecutionModel::Tiled) { float rad = max_ff(m_size * m_data.sizex, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); @@ -144,7 +141,7 @@ void GaussianAlphaXBlurOperation::executePixel(float output[4], int x, int y, vo void GaussianAlphaXBlurOperation::deinitExecution() { - BlurBaseOperation::deinitExecution(); + GaussianAlphaBlurBaseOperation::deinitExecution(); if (this->m_gausstab) { MEM_freeN(this->m_gausstab); diff --git a/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.h index 949956fae04..2a44c639665 100644 --- a/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianAlphaXBlurOperation.h @@ -18,18 +18,13 @@ #pragma once -#include "COM_BlurBaseOperation.h" -#include "COM_NodeOperation.h" +#include "COM_GaussianAlphaBlurBaseOperation.h" namespace blender::compositor { -class GaussianAlphaXBlurOperation : public BlurBaseOperation { +/* TODO(manzanilla): everything to be removed with tiled implementation except the constructor. */ +class GaussianAlphaXBlurOperation : public GaussianAlphaBlurBaseOperation { private: - float *m_gausstab; - float *m_distbuf_inv; - int m_falloff; /* falloff for distbuf_inv */ - bool m_do_subtract; - int m_filtersize; void updateGauss(); public: @@ -54,18 +49,6 @@ class GaussianAlphaXBlurOperation : public BlurBaseOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; - - /** - * Set subtract for Dilate/Erode functionality - */ - void setSubtract(bool subtract) - { - this->m_do_subtract = subtract; - } - void setFalloff(int falloff) - { - this->m_falloff = falloff; - } }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.cc b/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.cc index d2385a972dd..09aeddb6573 100644 --- a/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.cc +++ b/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.cc @@ -24,11 +24,9 @@ namespace blender::compositor { -GaussianAlphaYBlurOperation::GaussianAlphaYBlurOperation() : BlurBaseOperation(DataType::Value) +GaussianAlphaYBlurOperation::GaussianAlphaYBlurOperation() + : GaussianAlphaBlurBaseOperation(eDimension::Y) { - this->m_gausstab = nullptr; - this->m_filtersize = 0; - this->m_falloff = -1; /* intentionally invalid, so we can detect uninitialized values */ } void *GaussianAlphaYBlurOperation::initializeTileData(rcti * /*rect*/) @@ -42,14 +40,14 @@ void *GaussianAlphaYBlurOperation::initializeTileData(rcti * /*rect*/) return buffer; } +/* TODO(manzanilla): to be removed with tiled implementation. */ void GaussianAlphaYBlurOperation::initExecution() { - /* Until we support size input - comment this. */ - // BlurBaseOperation::initExecution(); + GaussianAlphaBlurBaseOperation::initExecution(); initMutex(); - if (this->m_sizeavailable) { + if (this->m_sizeavailable && execution_model_ == eExecutionModel::Tiled) { float rad = max_ff(m_size * m_data.sizey, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); @@ -58,6 +56,7 @@ void GaussianAlphaYBlurOperation::initExecution() } } +/* TODO(manzanilla): to be removed with tiled implementation. */ void GaussianAlphaYBlurOperation::updateGauss() { if (this->m_gausstab == nullptr) { @@ -143,7 +142,7 @@ void GaussianAlphaYBlurOperation::executePixel(float output[4], int x, int y, vo void GaussianAlphaYBlurOperation::deinitExecution() { - BlurBaseOperation::deinitExecution(); + GaussianAlphaBlurBaseOperation::deinitExecution(); if (this->m_gausstab) { MEM_freeN(this->m_gausstab); diff --git a/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.h index d25770386c4..ef01f7e0f92 100644 --- a/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianAlphaYBlurOperation.h @@ -18,18 +18,13 @@ #pragma once -#include "COM_BlurBaseOperation.h" -#include "COM_NodeOperation.h" +#include "COM_GaussianAlphaBlurBaseOperation.h" namespace blender::compositor { -class GaussianAlphaYBlurOperation : public BlurBaseOperation { +/* TODO(manzanilla): everything to be removed with tiled implementation except the constructor. */ +class GaussianAlphaYBlurOperation : public GaussianAlphaBlurBaseOperation { private: - float *m_gausstab; - float *m_distbuf_inv; - bool m_do_subtract; - int m_falloff; - int m_filtersize; void updateGauss(); public: @@ -54,18 +49,6 @@ class GaussianAlphaYBlurOperation : public BlurBaseOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; - - /** - * Set subtract for Dilate/Erode functionality - */ - void setSubtract(bool subtract) - { - this->m_do_subtract = subtract; - } - void setFalloff(int falloff) - { - this->m_falloff = falloff; - } }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.cc b/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.cc new file mode 100644 index 00000000000..959f599fab4 --- /dev/null +++ b/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.cc @@ -0,0 +1,154 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "COM_GaussianBlurBaseOperation.h" + +namespace blender::compositor { + +GaussianBlurBaseOperation::GaussianBlurBaseOperation(eDimension dim) + : BlurBaseOperation(DataType::Color) +{ + m_gausstab = nullptr; +#ifdef BLI_HAVE_SSE2 + m_gausstab_sse = nullptr; +#endif + m_filtersize = 0; + rad_ = 0.0f; + dimension_ = dim; +} + +void GaussianBlurBaseOperation::init_data() +{ + BlurBaseOperation::init_data(); + if (execution_model_ == eExecutionModel::FullFrame) { + rad_ = max_ff(m_size * this->get_blur_size(dimension_), 0.0f); + rad_ = min_ff(rad_, MAX_GAUSSTAB_RADIUS); + m_filtersize = min_ii(ceil(rad_), MAX_GAUSSTAB_RADIUS); + } +} + +void GaussianBlurBaseOperation::initExecution() +{ + BlurBaseOperation::initExecution(); + if (execution_model_ == eExecutionModel::FullFrame) { + m_gausstab = BlurBaseOperation::make_gausstab(rad_, m_filtersize); +#ifdef BLI_HAVE_SSE2 + m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(m_gausstab, m_filtersize); +#endif + } +} + +void GaussianBlurBaseOperation::deinitExecution() +{ + BlurBaseOperation::deinitExecution(); + + if (m_gausstab) { + MEM_freeN(m_gausstab); + m_gausstab = nullptr; + } +#ifdef BLI_HAVE_SSE2 + if (m_gausstab_sse) { + MEM_freeN(m_gausstab_sse); + m_gausstab_sse = nullptr; + } +#endif +} + +void GaussianBlurBaseOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + if (input_idx != IMAGE_INPUT_INDEX) { + BlurBaseOperation::get_area_of_interest(input_idx, output_area, r_input_area); + return; + } + + r_input_area = output_area; + switch (dimension_) { + case eDimension::X: + r_input_area.xmin = output_area.xmin - m_filtersize - 1; + r_input_area.xmax = output_area.xmax + m_filtersize + 1; + break; + case eDimension::Y: + r_input_area.ymin = output_area.ymin - m_filtersize - 1; + r_input_area.ymax = output_area.ymax + m_filtersize + 1; + break; + } +} + +void GaussianBlurBaseOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + MemoryBuffer *input = inputs[IMAGE_INPUT_INDEX]; + const rcti &input_rect = input->get_rect(); + BuffersIterator<float> it = output->iterate_with({input}, area); + + int min_input_coord = -1; + int max_input_coord = -1; + int elem_stride = -1; + std::function<int()> get_current_coord; + switch (dimension_) { + case eDimension::X: + min_input_coord = input_rect.xmin; + max_input_coord = input_rect.xmax; + elem_stride = input->elem_stride; + get_current_coord = [&] { return it.x; }; + break; + case eDimension::Y: + min_input_coord = input_rect.ymin; + max_input_coord = input_rect.ymax; + elem_stride = input->row_stride; + get_current_coord = [&] { return it.y; }; + break; + } + + for (; !it.is_end(); ++it) { + const int coord = get_current_coord(); + const int coord_min = max_ii(coord - m_filtersize, min_input_coord); + const int coord_max = min_ii(coord + m_filtersize + 1, max_input_coord); + + float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + float multiplier_accum = 0.0f; + + const int step = QualityStepHelper::getStep(); + const float *in = it.in(0) + ((intptr_t)coord_min - coord) * elem_stride; + const int in_stride = elem_stride * step; + int gauss_idx = (coord_min - coord) + m_filtersize; + const int gauss_end = gauss_idx + (coord_max - coord_min); +#ifdef BLI_HAVE_SSE2 + __m128 accum_r = _mm_load_ps(color_accum); + for (; gauss_idx < gauss_end; in += in_stride, gauss_idx += step) { + __m128 reg_a = _mm_load_ps(in); + reg_a = _mm_mul_ps(reg_a, m_gausstab_sse[gauss_idx]); + accum_r = _mm_add_ps(accum_r, reg_a); + multiplier_accum += m_gausstab[gauss_idx]; + } + _mm_store_ps(color_accum, accum_r); +#else + for (; gauss_idx < gauss_end; in += in_stride, gauss_idx += step) { + const float multiplier = m_gausstab[gauss_idx]; + madd_v4_v4fl(color_accum, in, multiplier); + multiplier_accum += multiplier; + } +#endif + mul_v4_v4fl(it.out, color_accum, 1.0f / multiplier_accum); + } +} + +} // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.h b/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.h new file mode 100644 index 00000000000..c0b27078a24 --- /dev/null +++ b/source/blender/compositor/operations/COM_GaussianBlurBaseOperation.h @@ -0,0 +1,50 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#include "COM_BlurBaseOperation.h" + +namespace blender::compositor { + +class GaussianBlurBaseOperation : public BlurBaseOperation { + protected: + float *m_gausstab; +#ifdef BLI_HAVE_SSE2 + __m128 *m_gausstab_sse; +#endif + int m_filtersize; + float rad_; + eDimension dimension_; + + public: + GaussianBlurBaseOperation(eDimension dim); + + virtual void init_data() override; + virtual void initExecution() override; + virtual void deinitExecution() override; + + void get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) override; + virtual void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; +}; + +} // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.cc b/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.cc index b2c65ff2c96..5dc97b3d5f6 100644 --- a/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.cc +++ b/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.cc @@ -40,6 +40,28 @@ void *GaussianBokehBlurOperation::initializeTileData(rcti * /*rect*/) return buffer; } +void GaussianBokehBlurOperation::init_data() +{ + BlurBaseOperation::init_data(); + const float width = this->getWidth(); + const float height = this->getHeight(); + + /* TODO(manzanilla): to be removed with tiled implementation. */ + if (!this->m_sizeavailable) { + updateSize(); + } + + radxf_ = this->m_size * (float)this->m_data.sizex; + CLAMP(radxf_, 0.0f, width / 2.0f); + + /* vertical */ + radyf_ = this->m_size * (float)this->m_data.sizey; + CLAMP(radyf_, 0.0f, height / 2.0f); + + this->m_radx = ceil(radxf_); + this->m_rady = ceil(radyf_); +} + void GaussianBokehBlurOperation::initExecution() { BlurBaseOperation::initExecution(); @@ -54,39 +76,17 @@ void GaussianBokehBlurOperation::initExecution() void GaussianBokehBlurOperation::updateGauss() { if (this->m_gausstab == nullptr) { - float radxf; - float radyf; - int n; - float *dgauss; - float *ddgauss; - int j, i; - const float width = this->getWidth(); - const float height = this->getHeight(); - if (!this->m_sizeavailable) { - updateSize(); - } - radxf = this->m_size * (float)this->m_data.sizex; - CLAMP(radxf, 0.0f, width / 2.0f); - - /* vertical */ - radyf = this->m_size * (float)this->m_data.sizey; - CLAMP(radyf, 0.0f, height / 2.0f); - - this->m_radx = ceil(radxf); - this->m_rady = ceil(radyf); - int ddwidth = 2 * this->m_radx + 1; int ddheight = 2 * this->m_rady + 1; - n = ddwidth * ddheight; - + int n = ddwidth * ddheight; /* create a full filter image */ - ddgauss = (float *)MEM_mallocN(sizeof(float) * n, __func__); - dgauss = ddgauss; + float *ddgauss = (float *)MEM_mallocN(sizeof(float) * n, __func__); + float *dgauss = ddgauss; float sum = 0.0f; - float facx = (radxf > 0.0f ? 1.0f / radxf : 0.0f); - float facy = (radyf > 0.0f ? 1.0f / radyf : 0.0f); - for (j = -this->m_rady; j <= this->m_rady; j++) { - for (i = -this->m_radx; i <= this->m_radx; i++, dgauss++) { + float facx = (radxf_ > 0.0f ? 1.0f / radxf_ : 0.0f); + float facy = (radyf_ > 0.0f ? 1.0f / radyf_ : 0.0f); + for (int j = -this->m_rady; j <= this->m_rady; j++) { + for (int i = -this->m_radx; i <= this->m_radx; i++, dgauss++) { float fj = (float)j * facy; float fi = (float)i * facx; float dist = sqrt(fj * fj + fi * fi); @@ -99,7 +99,7 @@ void GaussianBokehBlurOperation::updateGauss() if (sum > 0.0f) { /* normalize */ float norm = 1.0f / sum; - for (j = n - 1; j >= 0; j--) { + for (int j = n - 1; j >= 0; j--) { ddgauss[j] *= norm; } } @@ -196,23 +196,69 @@ bool GaussianBokehBlurOperation::determineDependingAreaOfInterest( return BlurBaseOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void GaussianBokehBlurOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + if (input_idx != IMAGE_INPUT_INDEX) { + BlurBaseOperation::get_area_of_interest(input_idx, output_area, r_input_area); + return; + } + + r_input_area.xmax = output_area.xmax + m_radx; + r_input_area.xmin = output_area.xmin - m_radx; + r_input_area.ymax = output_area.ymax + m_rady; + r_input_area.ymin = output_area.ymin - m_rady; +} + +void GaussianBokehBlurOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + const MemoryBuffer *input = inputs[IMAGE_INPUT_INDEX]; + BuffersIterator<float> it = output->iterate_with({}, area); + const rcti &input_rect = input->get_rect(); + for (; !it.is_end(); ++it) { + const int x = it.x; + const int y = it.y; + + const int ymin = max_ii(y - this->m_rady, input_rect.ymin); + const int ymax = min_ii(y + this->m_rady + 1, input_rect.ymax); + const int xmin = max_ii(x - this->m_radx, input_rect.xmin); + const int xmax = min_ii(x + this->m_radx + 1, input_rect.xmax); + + float tempColor[4] = {0}; + float multiplier_accum = 0; + const int step = QualityStepHelper::getStep(); + const int elem_step = step * input->elem_stride; + const int add_const = (xmin - x + this->m_radx); + const int mul_const = (this->m_radx * 2 + 1); + for (int ny = ymin; ny < ymax; ny += step) { + const float *color = input->get_elem(xmin, ny); + int gauss_index = ((ny - y) + this->m_rady) * mul_const + add_const; + const int gauss_end = gauss_index + (xmax - xmin); + for (; gauss_index < gauss_end; gauss_index += step, color += elem_step) { + const float multiplier = this->m_gausstab[gauss_index]; + madd_v4_v4fl(tempColor, color, multiplier); + multiplier_accum += multiplier; + } + } + + mul_v4_v4fl(it.out, tempColor, 1.0f / multiplier_accum); + } +} + // reference image GaussianBlurReferenceOperation::GaussianBlurReferenceOperation() : BlurBaseOperation(DataType::Color) { this->m_maintabs = nullptr; + use_variable_size_ = true; } -void *GaussianBlurReferenceOperation::initializeTileData(rcti * /*rect*/) -{ - void *buffer = getInputOperation(0)->initializeTileData(nullptr); - return buffer; -} - -void GaussianBlurReferenceOperation::initExecution() +void GaussianBlurReferenceOperation::init_data() { - BlurBaseOperation::initExecution(); - // setup gaustab + /* Setup variables for gausstab and area of interest. */ this->m_data.image_in_width = this->getWidth(); this->m_data.image_in_height = this->getHeight(); if (this->m_data.relative) { @@ -232,7 +278,7 @@ void GaussianBlurReferenceOperation::initExecution() } } - /* horizontal */ + /* Horizontal. */ m_filtersizex = (float)this->m_data.sizex; int imgx = getWidth() / 2; if (m_filtersizex > imgx) { @@ -243,7 +289,7 @@ void GaussianBlurReferenceOperation::initExecution() } m_radx = (float)m_filtersizex; - /* vertical */ + /* Vertical. */ m_filtersizey = (float)this->m_data.sizey; int imgy = getHeight() / 2; if (m_filtersizey > imgy) { @@ -253,6 +299,18 @@ void GaussianBlurReferenceOperation::initExecution() m_filtersizey = 1; } m_rady = (float)m_filtersizey; +} + +void *GaussianBlurReferenceOperation::initializeTileData(rcti * /*rect*/) +{ + void *buffer = getInputOperation(0)->initializeTileData(nullptr); + return buffer; +} + +void GaussianBlurReferenceOperation::initExecution() +{ + BlurBaseOperation::initExecution(); + updateGauss(); } @@ -363,4 +421,78 @@ bool GaussianBlurReferenceOperation::determineDependingAreaOfInterest( return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } +void GaussianBlurReferenceOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + if (input_idx != IMAGE_INPUT_INDEX) { + BlurBaseOperation::get_area_of_interest(input_idx, output_area, r_input_area); + return; + } + + const int add_x = this->m_data.sizex + 2; + const int add_y = this->m_data.sizey + 2; + r_input_area.xmax = output_area.xmax + add_x; + r_input_area.xmin = output_area.xmin - add_x; + r_input_area.ymax = output_area.ymax + add_y; + r_input_area.ymin = output_area.ymin - add_y; +} + +void GaussianBlurReferenceOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + const MemoryBuffer *image_input = inputs[IMAGE_INPUT_INDEX]; + MemoryBuffer *size_input = inputs[SIZE_INPUT_INDEX]; + for (BuffersIterator<float> it = output->iterate_with({size_input}, area); !it.is_end(); ++it) { + const float ref_size = *it.in(0); + int ref_radx = (int)(ref_size * m_radx); + int ref_rady = (int)(ref_size * m_rady); + if (ref_radx > m_filtersizex) { + ref_radx = m_filtersizex; + } + else if (ref_radx < 1) { + ref_radx = 1; + } + if (ref_rady > m_filtersizey) { + ref_rady = m_filtersizey; + } + else if (ref_rady < 1) { + ref_rady = 1; + } + + const int x = it.x; + const int y = it.y; + if (ref_radx == 1 && ref_rady == 1) { + image_input->read_elem(x, y, it.out); + continue; + } + + const int w = getWidth(); + const int height = getHeight(); + const int minxr = x - ref_radx < 0 ? -x : -ref_radx; + const int maxxr = x + ref_radx > w ? w - x : ref_radx; + const int minyr = y - ref_rady < 0 ? -y : -ref_rady; + const int maxyr = y + ref_rady > height ? height - y : ref_rady; + + const float *gausstabx = m_maintabs[ref_radx - 1]; + const float *gausstabcentx = gausstabx + ref_radx; + const float *gausstaby = m_maintabs[ref_rady - 1]; + const float *gausstabcenty = gausstaby + ref_rady; + + float gauss_sum = 0.0f; + float color_sum[4] = {0}; + const float *row_color = image_input->get_elem(x + minxr, y + minyr); + for (int i = minyr; i < maxyr; i++, row_color += image_input->row_stride) { + const float *color = row_color; + for (int j = minxr; j < maxxr; j++, color += image_input->elem_stride) { + const float val = gausstabcenty[i] * gausstabcentx[j]; + gauss_sum += val; + madd_v4_v4fl(color_sum, color, val); + } + } + mul_v4_v4fl(it.out, color_sum, 1.0f / gauss_sum); + } +} + } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.h b/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.h index 59ba3d06619..a64b5b327b0 100644 --- a/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianBokehBlurOperation.h @@ -28,10 +28,13 @@ class GaussianBokehBlurOperation : public BlurBaseOperation { private: float *m_gausstab; int m_radx, m_rady; + float radxf_; + float radyf_; void updateGauss(); public: GaussianBokehBlurOperation(); + void init_data() override; void initExecution() override; void *initializeTileData(rcti *rect) override; /** @@ -47,6 +50,13 @@ class GaussianBokehBlurOperation : public BlurBaseOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; + + void get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; class GaussianBlurReferenceOperation : public BlurBaseOperation { @@ -61,6 +71,7 @@ class GaussianBlurReferenceOperation : public BlurBaseOperation { public: GaussianBlurReferenceOperation(); + void init_data() override; void initExecution() override; void *initializeTileData(rcti *rect) override; /** @@ -76,6 +87,13 @@ class GaussianBlurReferenceOperation : public BlurBaseOperation { bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override; + + void get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cc b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cc index 4b46cfc8776..8d686265231 100644 --- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cc +++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cc @@ -25,13 +25,8 @@ namespace blender::compositor { -GaussianXBlurOperation::GaussianXBlurOperation() : BlurBaseOperation(DataType::Color) +GaussianXBlurOperation::GaussianXBlurOperation() : GaussianBlurBaseOperation(eDimension::X) { - this->m_gausstab = nullptr; -#ifdef BLI_HAVE_SSE2 - this->m_gausstab_sse = nullptr; -#endif - this->m_filtersize = 0; } void *GaussianXBlurOperation::initializeTileData(rcti * /*rect*/) @@ -45,13 +40,14 @@ void *GaussianXBlurOperation::initializeTileData(rcti * /*rect*/) return buffer; } +/* TODO(manzanilla): to be removed with tiled implementation. */ void GaussianXBlurOperation::initExecution() { - BlurBaseOperation::initExecution(); + GaussianBlurBaseOperation::initExecution(); initMutex(); - if (this->m_sizeavailable) { + if (this->m_sizeavailable && execution_model_ == eExecutionModel::Tiled) { float rad = max_ff(m_size * m_data.sizex, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); @@ -63,6 +59,7 @@ void GaussianXBlurOperation::initExecution() } } +/* TODO(manzanilla): to be removed with tiled implementation. */ void GaussianXBlurOperation::updateGauss() { if (this->m_gausstab == nullptr) { @@ -158,7 +155,7 @@ void GaussianXBlurOperation::executeOpenCL(OpenCLDevice *device, void GaussianXBlurOperation::deinitExecution() { - BlurBaseOperation::deinitExecution(); + GaussianBlurBaseOperation::deinitExecution(); if (this->m_gausstab) { MEM_freeN(this->m_gausstab); diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h index 15277f0a42d..e09e57bad67 100644 --- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h @@ -18,18 +18,13 @@ #pragma once -#include "COM_BlurBaseOperation.h" -#include "COM_NodeOperation.h" +#include "COM_GaussianBlurBaseOperation.h" namespace blender::compositor { -class GaussianXBlurOperation : public BlurBaseOperation { +/* TODO(manzanilla): everything to be removed with tiled implementation except the constructor. */ +class GaussianXBlurOperation : public GaussianBlurBaseOperation { private: - float *m_gausstab; -#ifdef BLI_HAVE_SSE2 - __m128 *m_gausstab_sse; -#endif - int m_filtersize; void updateGauss(); public: diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cc b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cc index 590ac5faa6a..32d469a0ae4 100644 --- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cc +++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cc @@ -25,13 +25,8 @@ namespace blender::compositor { -GaussianYBlurOperation::GaussianYBlurOperation() : BlurBaseOperation(DataType::Color) +GaussianYBlurOperation::GaussianYBlurOperation() : GaussianBlurBaseOperation(eDimension::Y) { - this->m_gausstab = nullptr; -#ifdef BLI_HAVE_SSE2 - this->m_gausstab_sse = nullptr; -#endif - this->m_filtersize = 0; } void *GaussianYBlurOperation::initializeTileData(rcti * /*rect*/) @@ -47,11 +42,11 @@ void *GaussianYBlurOperation::initializeTileData(rcti * /*rect*/) void GaussianYBlurOperation::initExecution() { - BlurBaseOperation::initExecution(); + GaussianBlurBaseOperation::initExecution(); initMutex(); - if (this->m_sizeavailable) { + if (this->m_sizeavailable && execution_model_ == eExecutionModel::Tiled) { float rad = max_ff(m_size * m_data.sizey, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); @@ -158,7 +153,7 @@ void GaussianYBlurOperation::executeOpenCL(OpenCLDevice *device, void GaussianYBlurOperation::deinitExecution() { - BlurBaseOperation::deinitExecution(); + GaussianBlurBaseOperation::deinitExecution(); if (this->m_gausstab) { MEM_freeN(this->m_gausstab); diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h index 56d40849ba4..bb33f8b74cb 100644 --- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h @@ -18,18 +18,13 @@ #pragma once -#include "COM_BlurBaseOperation.h" -#include "COM_NodeOperation.h" +#include "COM_GaussianBlurBaseOperation.h" namespace blender::compositor { -class GaussianYBlurOperation : public BlurBaseOperation { +/* TODO(manzanilla): everything to be removed with tiled implementation except the constructor. */ +class GaussianYBlurOperation : public GaussianBlurBaseOperation { private: - float *m_gausstab; -#ifdef BLI_HAVE_SSE2 - __m128 *m_gausstab_sse; -#endif - int m_filtersize; void updateGauss(); public: diff --git a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cc b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cc index 19cd5a53084..ec456bd96f1 100644 --- a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cc +++ b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cc @@ -18,6 +18,7 @@ #include "COM_VariableSizeBokehBlurOperation.h" #include "BLI_math.h" +#include "COM_ExecutionSystem.h" #include "COM_OpenCLDevice.h" #include "RE_pipeline.h" @@ -276,6 +277,166 @@ bool VariableSizeBokehBlurOperation::determineDependingAreaOfInterest( return false; } +void VariableSizeBokehBlurOperation::get_area_of_interest(const int input_idx, + const rcti &output_area, + rcti &r_input_area) +{ + switch (input_idx) { + case IMAGE_INPUT_INDEX: + case SIZE_INPUT_INDEX: { + const float max_dim = MAX2(getWidth(), getHeight()); + const float scalar = m_do_size_scale ? (max_dim / 100.0f) : 1.0f; + const int max_blur_scalar = m_maxBlur * scalar; + r_input_area.xmax = output_area.xmax + max_blur_scalar + 2; + r_input_area.xmin = output_area.xmin - max_blur_scalar + 2; + r_input_area.ymax = output_area.ymax + max_blur_scalar - 2; + r_input_area.ymin = output_area.ymin - max_blur_scalar - 2; + break; + } + case BOKEH_INPUT_INDEX: { + r_input_area.xmax = COM_BLUR_BOKEH_PIXELS; + r_input_area.xmin = 0; + r_input_area.ymax = COM_BLUR_BOKEH_PIXELS; + r_input_area.ymin = 0; + break; + } +#ifdef COM_DEFOCUS_SEARCH + case DEFOCUS_INPUT_INDEX: { + r_input_area.xmax = (output_area.xmax / InverseSearchRadiusOperation::DIVIDER) + 1; + r_input_area.xmin = (output_area.xmin / InverseSearchRadiusOperation::DIVIDER) - 1; + r_input_area.ymax = (output_area.ymax / InverseSearchRadiusOperation::DIVIDER) + 1; + r_input_area.ymin = (output_area.ymin / InverseSearchRadiusOperation::DIVIDER) - 1; + break; + } +#endif + } +} + +struct PixelData { + float multiplier_accum[4]; + float color_accum[4]; + float threshold; + float scalar; + float size_center; + int max_blur_scalar; + int step; + MemoryBuffer *bokeh_input; + MemoryBuffer *size_input; + MemoryBuffer *image_input; + int image_width; + int image_height; +}; + +static void blur_pixel(int x, int y, PixelData &p) +{ + BLI_assert(p.bokeh_input->getWidth() == COM_BLUR_BOKEH_PIXELS); + BLI_assert(p.bokeh_input->getHeight() == COM_BLUR_BOKEH_PIXELS); + +#ifdef COM_DEFOCUS_SEARCH + float search[4]; + inputs[DEFOCUS_INPUT_INDEX]->read_elem_checked(x / InverseSearchRadiusOperation::DIVIDER, + y / InverseSearchRadiusOperation::DIVIDER, + search); + const int minx = search[0]; + const int miny = search[1]; + const int maxx = search[2]; + const int maxy = search[3]; +#else + const int minx = MAX2(x - p.max_blur_scalar, 0); + const int miny = MAX2(y - p.max_blur_scalar, 0); + const int maxx = MIN2(x + p.max_blur_scalar, p.image_width); + const int maxy = MIN2(y + p.max_blur_scalar, p.image_height); +#endif + + const int color_row_stride = p.image_input->row_stride * p.step; + const int color_elem_stride = p.image_input->elem_stride * p.step; + const int size_row_stride = p.size_input->row_stride * p.step; + const int size_elem_stride = p.size_input->elem_stride * p.step; + const float *row_color = p.image_input->get_elem(minx, miny); + const float *row_size = p.size_input->get_elem(minx, miny); + for (int ny = miny; ny < maxy; + ny += p.step, row_size += size_row_stride, row_color += color_row_stride) { + const float dy = ny - y; + const float *size_elem = row_size; + const float *color = row_color; + for (int nx = minx; nx < maxx; + nx += p.step, size_elem += size_elem_stride, color += color_elem_stride) { + if (nx == x && ny == y) { + continue; + } + const float size = MIN2(size_elem[0] * p.scalar, p.size_center); + if (size <= p.threshold) { + continue; + } + const float dx = nx - x; + if (size <= fabsf(dx) || size <= fabsf(dy)) { + continue; + } + + /* XXX: There is no way to ensure bokeh input is an actual bokeh with #COM_BLUR_BOKEH_PIXELS + * size, anything may be connected. Use the real input size and remove asserts? */ + const float u = (float)(COM_BLUR_BOKEH_PIXELS / 2) + + (dx / size) * (float)((COM_BLUR_BOKEH_PIXELS / 2) - 1); + const float v = (float)(COM_BLUR_BOKEH_PIXELS / 2) + + (dy / size) * (float)((COM_BLUR_BOKEH_PIXELS / 2) - 1); + float bokeh[4]; + p.bokeh_input->read_elem_checked(u, v, bokeh); + madd_v4_v4v4(p.color_accum, bokeh, color); + add_v4_v4(p.multiplier_accum, bokeh); + } + } +} + +void VariableSizeBokehBlurOperation::update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) +{ + PixelData p; + p.bokeh_input = inputs[BOKEH_INPUT_INDEX]; + p.size_input = inputs[SIZE_INPUT_INDEX]; + p.image_input = inputs[IMAGE_INPUT_INDEX]; + p.step = QualityStepHelper::getStep(); + p.threshold = m_threshold; + p.image_width = this->getWidth(); + p.image_height = this->getHeight(); + + rcti scalar_area; + this->get_area_of_interest(SIZE_INPUT_INDEX, area, scalar_area); + BLI_rcti_isect(&scalar_area, &p.size_input->get_rect(), &scalar_area); + const float max_size = p.size_input->get_max_value(scalar_area); + + const float max_dim = MAX2(this->getWidth(), this->getHeight()); + p.scalar = m_do_size_scale ? (max_dim / 100.0f) : 1.0f; + p.max_blur_scalar = static_cast<int>(max_size * p.scalar); + CLAMP(p.max_blur_scalar, 1, m_maxBlur); + + for (BuffersIterator<float> it = output->iterate_with({p.image_input, p.size_input}, area); + !it.is_end(); + ++it) { + const float *color = it.in(0); + const float size = *it.in(1); + copy_v4_v4(p.color_accum, color); + copy_v4_fl(p.multiplier_accum, 1.0f); + p.size_center = size * p.scalar; + + if (p.size_center > p.threshold) { + blur_pixel(it.x, it.y, p); + } + + it.out[0] = p.color_accum[0] / p.multiplier_accum[0]; + it.out[1] = p.color_accum[1] / p.multiplier_accum[1]; + it.out[2] = p.color_accum[2] / p.multiplier_accum[2]; + it.out[3] = p.color_accum[3] / p.multiplier_accum[3]; + + /* Blend in out values over the threshold, otherwise we get sharp, ugly transitions. */ + if ((p.size_center > p.threshold) && (p.size_center < p.threshold * 2.0f)) { + /* Factor from 0-1. */ + const float fac = (p.size_center - p.threshold) / p.threshold; + interp_v4_v4v4(it.out, color, it.out, fac); + } + } +} + #ifdef COM_DEFOCUS_SEARCH // InverseSearchRadiusOperation InverseSearchRadiusOperation::InverseSearchRadiusOperation() diff --git a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h index baeab6a646e..d6df9f5b858 100644 --- a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h +++ b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h @@ -18,15 +18,22 @@ #pragma once -#include "COM_NodeOperation.h" +#include "COM_MultiThreadedOperation.h" #include "COM_QualityStepHelper.h" namespace blender::compositor { //#define COM_DEFOCUS_SEARCH -class VariableSizeBokehBlurOperation : public NodeOperation, public QualityStepHelper { +class VariableSizeBokehBlurOperation : public MultiThreadedOperation, public QualityStepHelper { private: + static constexpr int IMAGE_INPUT_INDEX = 0; + static constexpr int BOKEH_INPUT_INDEX = 1; + static constexpr int SIZE_INPUT_INDEX = 2; +#ifdef COM_DEFOCUS_SEARCH + static constexpr int DEFOCUS_INPUT_INDEX = 3; +#endif + int m_maxBlur; float m_threshold; bool m_do_size_scale; /* scale size, matching 'BokehBlurNode' */ @@ -84,8 +91,14 @@ class VariableSizeBokehBlurOperation : public NodeOperation, public QualityStepH MemoryBuffer **inputMemoryBuffers, std::list<cl_mem> *clMemToCleanUp, std::list<cl_kernel> *clKernelsToCleanUp) override; + + void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; + void update_memory_buffer_partial(MemoryBuffer *output, + const rcti &area, + Span<MemoryBuffer *> inputs) override; }; +/* Currently unused. If ever used, it needs fullframe implementation. */ #ifdef COM_DEFOCUS_SEARCH class InverseSearchRadiusOperation : public NodeOperation { private: |