From a00e63a542788ffe4e01515a09a1b55c087cb865 Mon Sep 17 00:00:00 2001
From: Campbell Barton <ideasman42@gmail.com>
Date: Tue, 11 Nov 2008 01:13:05 +0000
Subject: bicubic_interpolation function was re-calculating a variable it didnt
 need to - (was calling 32 pow()'s per pixel, now only 8 - approx 3-4x speedup
 on my system).

---
 source/blender/imbuf/intern/imageprocess.c | 55 ++++++++++++++++++------------
 1 file changed, 34 insertions(+), 21 deletions(-)

(limited to 'source/blender/imbuf')
diff --git a/source/blender/imbuf/intern/imageprocess.c b/source/blender/imbuf/intern/imageprocess.c
index 30e688ebe29..d7f1ab4419d 100644
--- a/source/blender/imbuf/intern/imageprocess.c
+++ b/source/blender/imbuf/intern/imageprocess.c
@@ -93,16 +93,14 @@ void IMB_convert_rgba_to_abgr(struct ImBuf *ibuf)
 */
 /* function assumes out to be zero'ed, only does RGBA */
 static float P(float k){
-	float aux;
-	aux=(float)(1.0f/6.0f)*( pow( MAX2(k+2.0f,0) , 3.0f ) - 4.0f * pow( MAX2(k+1.0f,0) , 3.0f ) + 6.0f * pow( MAX2(k,0) , 3.0f ) - 4.0f * pow( MAX2(k-1.0f,0) , 3.0f));
-	return aux ;
+	return (float)(1.0f/6.0f)*( pow( MAX2(k+2.0f,0) , 3.0f ) - 4.0f * pow( MAX2(k+1.0f,0) , 3.0f ) + 6.0f * pow( MAX2(k,0) , 3.0f ) - 4.0f * pow( MAX2(k-1.0f,0) , 3.0f));
 }
 
 void bicubic_interpolation(ImBuf *in, ImBuf *out, float x, float y, int xout, int yout)
 {
 	int i,j,n,m,x1,y1;
 	unsigned char *dataI,*outI;
-	float a,b, outR,outG,outB,outA,*dataF,*outF;
+	float a,b,w,wx,wy[4], outR,outG,outB,outA,*dataF,*outF;
 	int do_rect, do_float;
 
 	if (in == NULL) return;
@@ -120,24 +118,39 @@ void bicubic_interpolation(ImBuf *in, ImBuf *out, float x, float y, int xout, in
 	outG= 0.0f;
 	outB= 0.0f;
 	outA= 0.0f;
+	
+	/* avoid calling multiple times */
+	wy[0] = P(b-(-1));
+	wy[1] = P(b-  0);
+	wy[2] = P(b-  1);
+	wy[3] = P(b-  2);
+	
 	for(n= -1; n<= 2; n++){
-		for(m= -1; m<= 2; m++){
-			x1= i+n;
-			y1= j+m;
-			if (x1>0 && x1 < in->x && y1>0 && y1<in->y) {
-				if (do_float) {
-					dataF= in->rect_float + in->x * y1 * 4 + 4*x1;
-					outR+= dataF[0] * P(n-a) * P(b-m);
-					outG+= dataF[1] * P(n-a) * P(b-m);
-					outB+= dataF[2] * P(n-a) * P(b-m);
-					outA+= dataF[3] * P(n-a) * P(b-m);
-				}
-				if (do_rect) {
-					dataI= (unsigned char*)in->rect + in->x * y1 * 4 + 4*x1;
-					outR+= dataI[0] * P(n-a) * P(b-m);
-					outG+= dataI[1] * P(n-a) * P(b-m);
-					outB+= dataI[2] * P(n-a) * P(b-m);
-					outA+= dataI[3] * P(n-a) * P(b-m);
+		x1= i+n;
+		if (x1>0 && x1 < in->x) {
+			wx = P(n-a);
+			for(m= -1; m<= 2; m++){
+				y1= j+m;
+				if (y1>0 && y1<in->y) {
+					/* normally we could do this */
+					/* w = P(n-a) * P(b-m); */
+					/* except that would call P() 16 times per pixel therefor pow() 64 times, better precalc these */
+					w = wx * wy[m+1];
+					
+					if (do_float) {
+						dataF= in->rect_float + in->x * y1 * 4 + 4*x1;
+						outR+= dataF[0] * w;
+						outG+= dataF[1] * w;
+						outB+= dataF[2] * w;
+						outA+= dataF[3] * w;
+					}
+					if (do_rect) {
+						dataI= (unsigned char*)in->rect + in->x * y1 * 4 + 4*x1;
+						outR+= dataI[0] * w;
+						outG+= dataI[1] * w;
+						outB+= dataI[2] * w;
+						outA+= dataI[3] * w;
+					}
 				}
 			}
 		}
-- 
cgit v1.2.3