diff options
author | Campbell Barton <ideasman42@gmail.com> | 2012-06-16 13:52:38 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2012-06-16 13:52:38 +0400 |
commit | 2f29f8d18656e9c8796b68671a60812d0cffcb70 (patch) | |
tree | 73418dba2888b792df0c272699fe391a2bb9062b /source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp | |
parent | 250e919b7c1fa3c70925c87d625fa5e0f2d298ab (diff) |
speedup for fast gauss blue (approx 10% - 15%)
- get the image width and height once rather then calculating on every access (was doing min/max subtract).
- use unsigned int's - faster for looping.
Diffstat (limited to 'source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp')
-rw-r--r-- | source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp | 44 |
1 files changed, 25 insertions, 19 deletions
diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp index 7830eef829c..48cfbeb36f8 100644 --- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp @@ -20,6 +20,8 @@ * Monique Dewanchand */ +#include <limits.h> + #include "COM_FastGaussianBlurOperation.h" #include "MEM_guardedalloc.h" #include "BLI_utildefines.h" @@ -74,7 +76,7 @@ void FastGaussianBlurOperation::deinitExecution() delete this->iirgaus; this->iirgaus = NULL; } - BlurBaseOperation::deinitMutex(); + BlurBaseOperation::deinitMutex(); } void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers) @@ -84,7 +86,7 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m MemoryBuffer *newBuf = (MemoryBuffer *)this->inputProgram->initializeTileData(rect, memoryBuffers); MemoryBuffer *copy = newBuf->duplicate(); updateSize(memoryBuffers); - + int c; sx = data->sizex * this->size / 2.0f; sy = data->sizey * this->size / 2.0f; @@ -109,11 +111,14 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m return iirgaus; } -void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int chan, int xy) +void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsigned int chan, unsigned int xy) { double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3]; double *X, *Y, *W; - int i, x, y, sz; + const unsigned int src_width = src->getWidth(); + const unsigned int src_height = src->getHeight(); + unsigned int x, y, sz; + unsigned int i; float *buffer = src->getBuffer(); // <0.5 not valid, though can have a possibly useful sort of sharpening effect @@ -123,8 +128,8 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch // XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels, // so just skiping blur along faulty direction if src's def is below that limit! - if (src->getWidth() < 3) xy &= ~(int) 1; - if (src->getHeight() < 3) xy &= ~(int) 2; + if (src_width < 3) xy &= ~(int) 1; + if (src_height < 3) xy &= ~(int) 2; if (xy < 1) return; // see "Recursive Gabor Filtering" by Young/VanVliet @@ -178,33 +183,34 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \ Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \ Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \ - for (i = L - 4; i >= 0; i--) { \ + /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \ + for (i = L - 4; i != UINT_MAX; i--) { \ Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \ } \ } (void)0 // intermediate buffers - sz = MAX2(src->getWidth(), src->getHeight()); + sz = MAX2(src_width, src_height); X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf"); Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf"); W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf"); if (xy & 1) { // H - for (y = 0; y < src->getHeight(); ++y) { - const int yx = y * src->getWidth(); - for (x = 0; x < src->getWidth(); ++x) + for (y = 0; y < src_height; ++y) { + const int yx = y * src_width; + for (x = 0; x < src_width; ++x) X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan]; - YVV(src->getWidth()); - for (x = 0; x < src->getWidth(); ++x) + YVV(src_width); + for (x = 0; x < src_width; ++x) buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x]; } } if (xy & 2) { // V - for (x = 0; x < src->getWidth(); ++x) { - for (y = 0; y < src->getHeight(); ++y) - X[y] = buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan]; - YVV(src->getHeight()); - for (y = 0; y < src->getHeight(); ++y) - buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan] = Y[y]; + for (x = 0; x < src_width; ++x) { + for (y = 0; y < src_height; ++y) + X[y] = buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan]; + YVV(src_height); + for (y = 0; y < src_height; ++y) + buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan] = Y[y]; } } |