Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2012-06-16 13:52:38 +0400
committerCampbell Barton <ideasman42@gmail.com>2012-06-16 13:52:38 +0400
commit2f29f8d18656e9c8796b68671a60812d0cffcb70 (patch)
tree73418dba2888b792df0c272699fe391a2bb9062b /source/blender/nodes/composite/node_composite_util.c
parent250e919b7c1fa3c70925c87d625fa5e0f2d298ab (diff)
speedup for fast gauss blue (approx 10% - 15%)
- get the image width and height once rather then calculating on every access (was doing min/max subtract). - use unsigned int's - faster for looping.
Diffstat (limited to 'source/blender/nodes/composite/node_composite_util.c')
-rw-r--r--source/blender/nodes/composite/node_composite_util.c129
1 files changed, 68 insertions, 61 deletions
diff --git a/source/blender/nodes/composite/node_composite_util.c b/source/blender/nodes/composite/node_composite_util.c
index afd10d96e99..70788dfe0c8 100644
--- a/source/blender/nodes/composite/node_composite_util.c
+++ b/source/blender/nodes/composite/node_composite_util.c
@@ -32,6 +32,8 @@
#include "node_composite_util.h"
+#include <limits.h>
+
CompBuf *alloc_compbuf(int sizex, int sizey, int type, int alloc)
{
CompBuf *cbuf= MEM_callocN(sizeof(CompBuf), "compbuf");
@@ -1300,33 +1302,35 @@ void IIR_gauss(CompBuf* src, float sigma, int chan, int xy)
{
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
double *X, *Y, *W;
- int i, x, y, sz;
+ const unsigned int src_width = src->x;
+ const unsigned int src_height = src->y;
+ unsigned int i, x, y, sz;
// <0.5 not valid, though can have a possibly useful sort of sharpening effect
if (sigma < 0.5f) return;
-
+
if ((xy < 1) || (xy > 3)) xy = 3;
-
+
// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
// so just skiping blur along faulty direction if src's def is below that limit!
- if (src->x < 3) xy &= ~(int) 1;
- if (src->y < 3) xy &= ~(int) 2;
+ if (src_width < 3) xy &= ~(int) 1;
+ if (src_height < 3) xy &= ~(int) 2;
if (xy < 1) return;
// see "Recursive Gabor Filtering" by Young/VanVliet
// all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
if (sigma >= 3.556f)
- q = 0.9804f*(sigma - 3.556f) + 2.5091f;
- else // sigma >= 0.5
- q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
- q2 = q*q;
- sc = (1.1668 + q)*(3.203729649 + (2.21566 + q)*q);
+ q = 0.9804f * (sigma - 3.556f) + 2.5091f;
+ else // sigma >= 0.5
+ q = (0.0561f * sigma + 0.5784f) * sigma - 0.2568f;
+ q2 = q * q;
+ sc = (1.1668 + q) * (3.203729649 + (2.21566 + q) * q);
// no gabor filtering here, so no complex multiplies, just the regular coefs.
// all negated here, so as not to have to recalc Triggs/Sdika matrix
- cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
- cf[2] = -q2*(3.38246 + 3.0*q)/sc;
+ cf[1] = q * (5.788961737 + (6.76492 + 3.0 * q) * q) / sc;
+ cf[2] = -q2 * (3.38246 + 3.0 * q) / sc;
// 0 & 3 unchanged
- cf[3] = q2*q/sc;
+ cf[3] = q2 * q / sc;
cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
// Triggs/Sdika border corrections,
@@ -1336,59 +1340,62 @@ void IIR_gauss(CompBuf* src, float sigma, int chan, int xy)
// but neither seem to be quite the same, result seems to be ok so far anyway.
// Extra scale factor here to not have to do it in filter,
// though maybe this had something to with the precision errors
- sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
- tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
- tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
- tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
- tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
- tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
- tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
- tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
- tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
- tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
-
-#define YVV(L) \
-{ \
- W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0]; \
- W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0]; \
- W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0]; \
- for (i=3; i<L; i++) \
- W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3]; \
- tsu[0] = W[L-1] - X[L-1]; \
- tsu[1] = W[L-2] - X[L-1]; \
- tsu[2] = W[L-3] - X[L-1]; \
- tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1]; \
- tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1]; \
- tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1]; \
- Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2]; \
- Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1]; \
- Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0]; \
- for (i=L-4; i>=0; i--) \
- Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3]; \
+ sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
+ tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
+ tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
+ tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
+ tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
+ tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
+ tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
+ tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
+ tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
+ tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
+
+#define YVV(L) \
+{ \
+ W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0]; \
+ W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0]; \
+ W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0]; \
+ for (i = 3; i < L; i++) { \
+ W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3]; \
+ } \
+ tsu[0] = W[L - 1] - X[L - 1]; \
+ tsu[1] = W[L - 2] - X[L - 1]; \
+ tsu[2] = W[L - 3] - X[L - 1]; \
+ tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1]; \
+ tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1]; \
+ tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1]; \
+ Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \
+ Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \
+ Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
+ /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \
+ for (i = L - 4; i != UINT_MAX; i--) { \
+ Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \
+ } \
} (void)0
// intermediate buffers
- sz = MAX2(src->x, src->y);
- X = MEM_callocN(sz*sizeof(double), "IIR_gauss X buf");
- Y = MEM_callocN(sz*sizeof(double), "IIR_gauss Y buf");
- W = MEM_callocN(sz*sizeof(double), "IIR_gauss W buf");
- if (xy & 1) { // H
- for (y=0; y<src->y; ++y) {
- const int yx = y*src->x;
- for (x=0; x<src->x; ++x)
- X[x] = src->rect[(x + yx)*src->type + chan];
- YVV(src->x);
- for (x=0; x<src->x; ++x)
- src->rect[(x + yx)*src->type + chan] = Y[x];
+ sz = MAX2(src_width, src_height);
+ X = MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
+ Y = MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
+ W = MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
+ if (xy & 1) { // H
+ for (y = 0; y < src_height; ++y) {
+ const int yx = y * src_width;
+ for (x = 0; x < src_width; ++x)
+ X[x] = src->rect[(x + yx) * src->type + chan];
+ YVV(src_width);
+ for (x = 0; x < src_width; ++x)
+ src->rect[(x + yx) * src->type + chan] = Y[x];
}
}
- if (xy & 2) { // V
- for (x=0; x<src->x; ++x) {
- for (y=0; y<src->y; ++y)
- X[y] = src->rect[(x + y*src->x)*src->type + chan];
- YVV(src->y);
- for (y=0; y<src->y; ++y)
- src->rect[(x + y*src->x)*src->type + chan] = Y[y];
+ if (xy & 2) { // V
+ for (x = 0; x < src_width; ++x) {
+ for (y = 0; y < src_height; ++y)
+ X[y] = src->rect[(x + y * src_width) * src->type + chan];
+ YVV(src_height);
+ for (y = 0; y < src_height; ++y)
+ src->rect[(x + y * src_width) * src->type + chan] = Y[y];
}
}