diff options
Diffstat (limited to 'source/blender/compositor')
7 files changed, 98 insertions, 6 deletions
diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.cpp b/source/blender/compositor/intern/COM_MemoryBuffer.cpp index 04828bfe3f8..c1916f4a68f 100644 --- a/source/blender/compositor/intern/COM_MemoryBuffer.cpp +++ b/source/blender/compositor/intern/COM_MemoryBuffer.cpp @@ -46,7 +46,7 @@ MemoryBuffer::MemoryBuffer(MemoryProxy *memoryProxy, unsigned int chunkNumber, r BLI_rcti_init(&this->m_rect, rect->xmin, rect->xmax, rect->ymin, rect->ymax); this->m_memoryProxy = memoryProxy; this->m_chunkNumber = chunkNumber; - this->m_buffer = (float *)MEM_mallocN(sizeof(float) * determineBufferSize() * COM_NUMBER_OF_CHANNELS, "COM_MemoryBuffer"); + this->m_buffer = (float *)MEM_mallocN_aligned(sizeof(float) * determineBufferSize() * COM_NUMBER_OF_CHANNELS, 16, "COM_MemoryBuffer"); this->m_state = COM_MB_ALLOCATED; this->m_datatype = COM_DT_COLOR; this->m_chunkWidth = this->m_rect.xmax - this->m_rect.xmin; @@ -57,7 +57,7 @@ MemoryBuffer::MemoryBuffer(MemoryProxy *memoryProxy, rcti *rect) BLI_rcti_init(&this->m_rect, rect->xmin, rect->xmax, rect->ymin, rect->ymax); this->m_memoryProxy = memoryProxy; this->m_chunkNumber = -1; - this->m_buffer = (float *)MEM_mallocN(sizeof(float) * determineBufferSize() * COM_NUMBER_OF_CHANNELS, "COM_MemoryBuffer"); + this->m_buffer = (float *)MEM_mallocN_aligned(sizeof(float) * determineBufferSize() * COM_NUMBER_OF_CHANNELS, 16, "COM_MemoryBuffer"); this->m_state = COM_MB_TEMPORARILY; this->m_datatype = COM_DT_COLOR; this->m_chunkWidth = this->m_rect.xmax - this->m_rect.xmin; diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.cpp b/source/blender/compositor/operations/COM_BlurBaseOperation.cpp index e7af9319f88..d5aafc7c2ae 100644 --- a/source/blender/compositor/operations/COM_BlurBaseOperation.cpp +++ b/source/blender/compositor/operations/COM_BlurBaseOperation.cpp @@ -91,6 +91,18 @@ float *BlurBaseOperation::make_gausstab(float rad, int size) return gausstab; } +#ifdef __SSE2__ +__m128 *BlurBaseOperation::convert_gausstab_sse(const float *gausstab, float rad, int size) +{ + int n = 2 * size + 1; + __m128 *gausstab_sse = (__m128 *) MEM_mallocN_aligned(sizeof(__m128) * n, 16, "gausstab sse"); + for (int i = 0; i < n; ++i) { + gausstab_sse[i] = _mm_set1_ps(gausstab[i]); + } + return gausstab_sse; +} +#endif + /* normalized distance from the current (inverted so 1.0 is close and 0.0 is far) * 'ease' is applied after, looks nicer */ float *BlurBaseOperation::make_dist_fac_inverse(float rad, int size, int falloff) diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.h b/source/blender/compositor/operations/COM_BlurBaseOperation.h index 052a525ef2c..e97dd4d766d 100644 --- a/source/blender/compositor/operations/COM_BlurBaseOperation.h +++ b/source/blender/compositor/operations/COM_BlurBaseOperation.h @@ -27,6 +27,10 @@ #define MAX_GAUSSTAB_RADIUS 30000 +#ifdef __SSE2__ +# include <emmintrin.h> +#endif + class BlurBaseOperation : public NodeOperation, public QualityStepHelper { private: @@ -34,6 +38,9 @@ protected: BlurBaseOperation(DataType data_type); float *make_gausstab(float rad, int size); +#ifdef __SSE2__ + __m128 *convert_gausstab_sse(const float *gaustab, float rad, int size); +#endif float *make_dist_fac_inverse(float rad, int size, int falloff); void updateSize(); diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp index d08924ca4ef..0aefba3bb7c 100644 --- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp @@ -31,6 +31,9 @@ extern "C" { GaussianXBlurOperation::GaussianXBlurOperation() : BlurBaseOperation(COM_DT_COLOR) { this->m_gausstab = NULL; +#ifdef __SSE2__ + this->m_gausstab_sse = NULL; +#endif this->m_filtersize = 0; } @@ -54,8 +57,14 @@ void GaussianXBlurOperation::initExecution() if (this->m_sizeavailable) { float rad = max_ff(m_size * m_data.sizex, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); - + + /* TODO(sergey): De-duplicate with the case below and Y blur. */ this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); +#ifdef __SSE2__ + this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, + rad, + m_filtersize); +#endif } } @@ -65,8 +74,13 @@ void GaussianXBlurOperation::updateGauss() updateSize(); float rad = max_ff(m_size * m_data.sizex, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); - + this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); +#ifdef __SSE2__ + this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, + rad, + m_filtersize); +#endif } } @@ -88,12 +102,25 @@ void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *d int step = getStep(); int offsetadd = getOffsetAdd(); int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth); + +#ifdef __SSE2__ + __m128 accum_r = _mm_load_ps(color_accum); + for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax; nx += step, index += step) { + __m128 reg_a = _mm_load_ps(&buffer[bufferindex]); + reg_a = _mm_mul_ps(reg_a, this->m_gausstab_sse[index]); + accum_r = _mm_add_ps(accum_r, reg_a); + multiplier_accum += this->m_gausstab[index]; + bufferindex += offsetadd; + } + _mm_store_ps(color_accum, accum_r); +#else for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax; nx += step, index += step) { const float multiplier = this->m_gausstab[index]; madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier); multiplier_accum += multiplier; bufferindex += offsetadd; } +#endif mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum); } @@ -105,6 +132,12 @@ void GaussianXBlurOperation::deinitExecution() MEM_freeN(this->m_gausstab); this->m_gausstab = NULL; } +#ifdef __SSE2__ + if (this->m_gausstab_sse) { + MEM_freeN(this->m_gausstab_sse); + this->m_gausstab_sse = NULL; + } +#endif deinitMutex(); } diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h index 6442f214138..e391320a007 100644 --- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h @@ -28,6 +28,9 @@ class GaussianXBlurOperation : public BlurBaseOperation { private: float *m_gausstab; +#ifdef __SSE2__ + __m128 *m_gausstab_sse; +#endif int m_filtersize; void updateGauss(); public: diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp index 8216b79372f..a05a1ab6a23 100644 --- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp @@ -31,6 +31,9 @@ extern "C" { GaussianYBlurOperation::GaussianYBlurOperation() : BlurBaseOperation(COM_DT_COLOR) { this->m_gausstab = NULL; +#ifdef __SSE2__ + this->m_gausstab_sse = NULL; +#endif this->m_filtersize = 0; } @@ -54,8 +57,13 @@ void GaussianYBlurOperation::initExecution() if (this->m_sizeavailable) { float rad = max_ff(m_size * m_data.sizey, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); - + this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); +#ifdef __SSE2__ + this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, + rad, + m_filtersize); +#endif } } @@ -65,8 +73,13 @@ void GaussianYBlurOperation::updateGauss() updateSize(); float rad = max_ff(m_size * m_data.sizey, 0.0f); m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); - + this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); +#ifdef __SSE2__ + this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, + rad, + m_filtersize); +#endif } } @@ -88,6 +101,20 @@ void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *d int index; int step = getStep(); const int bufferIndexx = ((xmin - bufferstartx) * 4); + +#ifdef __SSE2__ + __m128 accum_r = _mm_load_ps(color_accum); + for (int ny = ymin; ny < ymax; ny += step) { + index = (ny - y) + this->m_filtersize; + int bufferindex = bufferIndexx + ((ny - bufferstarty) * 4 * bufferwidth); + const float multiplier = this->m_gausstab[index]; + __m128 reg_a = _mm_load_ps(&buffer[bufferindex]); + reg_a = _mm_mul_ps(reg_a, this->m_gausstab_sse[index]); + accum_r = _mm_add_ps(accum_r, reg_a); + multiplier_accum += multiplier; + } + _mm_store_ps(color_accum, accum_r); +#else for (int ny = ymin; ny < ymax; ny += step) { index = (ny - y) + this->m_filtersize; int bufferindex = bufferIndexx + ((ny - bufferstarty) * 4 * bufferwidth); @@ -95,6 +122,7 @@ void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *d madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier); multiplier_accum += multiplier; } +#endif mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum); } @@ -106,6 +134,12 @@ void GaussianYBlurOperation::deinitExecution() MEM_freeN(this->m_gausstab); this->m_gausstab = NULL; } +#ifdef __SSE2__ + if (this->m_gausstab_sse) { + MEM_freeN(this->m_gausstab_sse); + this->m_gausstab_sse = NULL; + } +#endif deinitMutex(); } diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h index 16503360de2..22b6562077d 100644 --- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h @@ -28,6 +28,9 @@ class GaussianYBlurOperation : public BlurBaseOperation { private: float *m_gausstab; +#ifdef __SSE2__ + __m128 *m_gausstab_sse; +#endif int m_filtersize; void updateGauss(); public: |