speedup for fast gauss blue (approx 10% - 15%)

- get the image width and height once rather then calculating on every access (was doing min/max subtract). - use unsigned int's - faster for looping.
author: Campbell Barton <ideasman42@gmail.com> 2012-06-16 13:52:38 +0400
committer: Campbell Barton <ideasman42@gmail.com> 2012-06-16 13:52:38 +0400
commit: 2f29f8d18656e9c8796b68671a60812d0cffcb70 (patch)
tree: 73418dba2888b792df0c272699fe391a2bb9062b /source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
parent: 250e919b7c1fa3c70925c87d625fa5e0f2d298ab (diff)
1 files changed, 25 insertions, 19 deletions
diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
index 7830eef829c..48cfbeb36f8 100644
--- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
@@ -20,6 +20,8 @@
  *		Monique Dewanchand
  */
 
+#include <limits.h>
+
 #include "COM_FastGaussianBlurOperation.h"
 #include "MEM_guardedalloc.h"
 #include "BLI_utildefines.h"
@@ -74,7 +76,7 @@ void FastGaussianBlurOperation::deinitExecution()
 		delete this->iirgaus;
 		this->iirgaus = NULL;
 	}
-	BlurBaseOperation::deinitMutex();	
+	BlurBaseOperation::deinitMutex();
 }
 
 void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
@@ -84,7 +86,7 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m
 		MemoryBuffer *newBuf = (MemoryBuffer *)this->inputProgram->initializeTileData(rect, memoryBuffers);
 		MemoryBuffer *copy = newBuf->duplicate();
 		updateSize(memoryBuffers);
-		
+
 		int c;
 		sx = data->sizex * this->size / 2.0f;
 		sy = data->sizey * this->size / 2.0f;
@@ -109,11 +111,14 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m
 	return iirgaus;
 }
 
-void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int chan, int xy)
+void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsigned int chan, unsigned int xy)
 {
 	double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
 	double *X, *Y, *W;
-	int i, x, y, sz;
+	const unsigned int src_width = src->getWidth();
+	const unsigned int src_height = src->getHeight();
+	unsigned int x, y, sz;
+	unsigned int i;
 	float *buffer = src->getBuffer();
 	
 	// <0.5 not valid, though can have a possibly useful sort of sharpening effect
@@ -123,8 +128,8 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch
 	
 	// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
 	//     so just skiping blur along faulty direction if src's def is below that limit!
-	if (src->getWidth() < 3) xy &= ~(int) 1;
-	if (src->getHeight() < 3) xy &= ~(int) 2;
+	if (src_width < 3) xy &= ~(int) 1;
+	if (src_height < 3) xy &= ~(int) 2;
 	if (xy < 1) return;
 	
 	// see "Recursive Gabor Filtering" by Young/VanVliet
@@ -178,33 +183,34 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch
 	Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2];     \
 	Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1];   \
 	Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
-	for (i = L - 4; i >= 0; i--) {                                                      \
+	/* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */   \
+	for (i = L - 4; i != UINT_MAX; i--) {                                               \
 		Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3];   \
 	}                                                                                   \
 } (void)0
 	
 	// intermediate buffers
-	sz = MAX2(src->getWidth(), src->getHeight());
+	sz = MAX2(src_width, src_height);
 	X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
 	Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
 	W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
 	if (xy & 1) {   // H
-		for (y = 0; y < src->getHeight(); ++y) {
-			const int yx = y * src->getWidth();
-			for (x = 0; x < src->getWidth(); ++x)
+		for (y = 0; y < src_height; ++y) {
+			const int yx = y * src_width;
+			for (x = 0; x < src_width; ++x)
 				X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan];
-			YVV(src->getWidth());
-			for (x = 0; x < src->getWidth(); ++x)
+			YVV(src_width);
+			for (x = 0; x < src_width; ++x)
 				buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x];
 		}
 	}
 	if (xy & 2) {   // V
-		for (x = 0; x < src->getWidth(); ++x) {
-			for (y = 0; y < src->getHeight(); ++y)
-				X[y] = buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan];
-			YVV(src->getHeight());
-			for (y = 0; y < src->getHeight(); ++y)
-				buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan] = Y[y];
+		for (x = 0; x < src_width; ++x) {
+			for (y = 0; y < src_height; ++y)
+				X[y] = buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan];
+			YVV(src_height);
+			for (y = 0; y < src_height; ++y)
+				buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan] = Y[y];
 		}
 	}
author	Campbell Barton <ideasman42@gmail.com>	2012-06-16 13:52:38 +0400
committer	Campbell Barton <ideasman42@gmail.com>	2012-06-16 13:52:38 +0400
commit	2f29f8d18656e9c8796b68671a60812d0cffcb70 (patch)
tree	73418dba2888b792df0c272699fe391a2bb9062b /source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
parent	250e919b7c1fa3c70925c87d625fa5e0f2d298ab (diff)