1 files changed, 131 insertions, 19 deletions
diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
index 9231261986d..a6be9254f6f 100644
--- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
@@ -29,6 +29,7 @@
 FastGaussianBlurOperation::FastGaussianBlurOperation() : BlurBaseOperation(COM_DT_COLOR)
 {
 	this->m_iirgaus = NULL;
+	this->m_chunksize = 256;
 }
 
 void FastGaussianBlurOperation::executePixel(float output[4], int x, int y, void *data)
@@ -37,22 +38,56 @@ void FastGaussianBlurOperation::executePixel(float output[4], int x, int y, void
 	newData->read(output, x, y);
 }
 
+// Calculate the depending area of interest. This depends on the
+// size of the blur operation; if the blur is large it is faster
+// to just calculate the whole image at once.
+// Returns true if the area is just a tile and false if it is
+// the whole image.
+bool FastGaussianBlurOperation::getDAI(rcti *rect, rcti *output)
+{
+	// m_data->sizex * m_size should be enough? For some reason there
+	// seem to be errors in the boundary between tiles.
+	int sx = this->m_data->sizex * this->m_size * 2;
+	if (sx < 1)
+		sx = 1;
+	int sy = this->m_data->sizey * this->m_size * 2;
+	if (sy < 1)
+		sy = 1;
+
+	if (sx >= this->m_chunksize || sy >= this->m_chunksize) {
+		output->xmin = 0;
+		output->xmax = this->getWidth();
+		output->ymin = 0;
+		output->ymax = this->getHeight();
+		return false;
+	}
+	else {
+		output->xmin = rect->xmin - sx - 1;
+		output->xmax = rect->xmax + sx + 1;
+		output->ymin = rect->ymin - sy - 1;
+		output->ymax = rect->ymax + sy + 1;
+		return true;
+	}
+}
+
 bool FastGaussianBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
 {
 	rcti newInput;
-	rcti sizeInput;
-	sizeInput.xmin = 0;
-	sizeInput.ymin = 0;
-	sizeInput.xmax = 5;
-	sizeInput.ymax = 5;
 	
-	NodeOperation *operation = this->getInputOperation(1);
-	if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
-		return true;
+	if (!this->m_sizeavailable) {
+		rcti sizeInput;
+		sizeInput.xmin = 0;
+		sizeInput.ymin = 0;
+		sizeInput.xmax = 5;
+		sizeInput.ymax = 5;
+		NodeOperation *operation = this->getInputOperation(1);
+		if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
+			return true;
+		}
 	}
-	else {
-		if (this->m_iirgaus) {
-			return false;
+	{
+		if (this->m_sizeavailable) {
+			getDAI(input, &newInput);
 		}
 		else {
 			newInput.xmin = 0;
@@ -81,6 +116,7 @@ void FastGaussianBlurOperation::deinitExecution()
 
 void *FastGaussianBlurOperation::initializeTileData(rcti *rect)
 {
+#if 0
 	lockMutex();
 	if (!this->m_iirgaus) {
 		MemoryBuffer *newBuf = (MemoryBuffer *)this->m_inputProgram->initializeTileData(rect);
@@ -109,8 +145,68 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect)
 	}
 	unlockMutex();
 	return this->m_iirgaus;
+#else
+
+	lockMutex();
+	if (this->m_iirgaus) {
+		// if this->m_iirgaus is set, we don't do tile rendering, so
+		// we can return the already calculated cache
+		unlockMutex();
+		return this->m_iirgaus;
+	}
+	updateSize();
+	rcti dai;
+	bool use_tiles = getDAI(rect, &dai);
+	if (use_tiles) {
+		unlockMutex();
+	}
+
+	MemoryBuffer *buffer = (MemoryBuffer *)this->m_inputProgram->initializeTileData(NULL);
+	rcti *buf_rect = buffer->getRect();
+
+	dai.xmin = max(dai.xmin, buf_rect->xmin);
+	dai.xmax = min(dai.xmax, buf_rect->xmax);
+	dai.ymin = max(dai.ymin, buf_rect->ymin);
+	dai.ymax = min(dai.ymax, buf_rect->ymax);
+
+	MemoryBuffer *tile = new MemoryBuffer(NULL, &dai);
+	tile->copyContentFrom(buffer);
+
+	int c;
+	float sx = this->m_data->sizex * this->m_size / 2.0f;
+	float sy = this->m_data->sizey * this->m_size / 2.0f;
+
+	if ((sx == sy) && (sx > 0.f)) {
+		for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
+			IIR_gauss(tile, sx, c, 3);
+	}
+	else {
+		if (sx > 0.0f) {
+			for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
+				IIR_gauss(tile, sx, c, 1);
+		}
+		if (sy > 0.0f) {
+			for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
+				IIR_gauss(tile, sy, c, 2);
+		}
+	}
+	if (!use_tiles) {
+		this->m_iirgaus = tile;
+		unlockMutex();
+	}
+	return tile;
+#endif
 }
 
+void FastGaussianBlurOperation::deinitializeTileData(rcti *rect, void *data)
+{
+	if (!this->m_iirgaus && data) {
+		MemoryBuffer *tile = (MemoryBuffer *)data;
+		delete tile;
+	}
+}
+
+
 void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsigned int chan, unsigned int xy)
 {
 	double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
@@ -195,22 +291,38 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsign
 	Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
 	W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
 	if (xy & 1) {   // H
+		int offset;
 		for (y = 0; y < src_height; ++y) {
 			const int yx = y * src_width;
-			for (x = 0; x < src_width; ++x)
-				X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan];
+			offset = yx * COM_NUMBER_OF_CHANNELS + chan;
+			for (x = 0; x < src_width; ++x) {
+				X[x] = buffer[offset];
+				offset += COM_NUMBER_OF_CHANNELS;
+			}
 			YVV(src_width);
-			for (x = 0; x < src_width; ++x)
-				buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x];
+			offset = yx * COM_NUMBER_OF_CHANNELS + chan;
+			for (x = 0; x < src_width; ++x) {
+				buffer[offset] = Y[x];
+				offset += COM_NUMBER_OF_CHANNELS;
+			}
 		}
 	}
 	if (xy & 2) {   // V
+		int offset;
+		const int add = src_width * COM_NUMBER_OF_CHANNELS;
+
 		for (x = 0; x < src_width; ++x) {
-			for (y = 0; y < src_height; ++y)
-				X[y] = buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan];
+			offset = x * COM_NUMBER_OF_CHANNELS + chan;
+			for (y = 0; y < src_height; ++y) {
+				X[y] = buffer[offset];
+				offset += add;
+			}
 			YVV(src_height);
-			for (y = 0; y < src_height; ++y)
-				buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan] = Y[y];
+			offset = x * COM_NUMBER_OF_CHANNELS + chan;
+			for (y = 0; y < src_height; ++y) {
+				buffer[offset] = Y[y];
+				offset += add;
+			}
 		}
 	}