De-duplicate EWA filter code between renderer and compositor

The title says it all, move the EWA filter to BLI (currently it's math_interp.c) and use the function from both BI renderer and the compositor. This makes more central place of the algorithm, allowing to have fixes and optimizaitons synchronized across the two usages. This also fixes T41440: Displacement in compositing creates holes Reviewers: campbellbarton, lukastoenne Reviewed By: lukastoenne Maniphest Tasks: T41440 Differential Revision: https://developer.blender.org/D748
author: Sergey Sharybin <sergey.vfx@gmail.com> 2014-08-18 17:37:13 +0400
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2014-08-18 17:38:15 +0400
commit: afa3469288337079cbff2bcecb7142448e961393 (patch)
tree: 572862e514fddd8decbb478ba46e4a3a8e9b4c56 /source/blender
parent: 354b17d2f3ff6e1b1c01fe147c2934103c8abef1 (diff)
4 files changed, 278 insertions, 310 deletions
diff --git a/source/blender/blenlib/BLI_math_interp.h b/source/blender/blenlib/BLI_math_interp.h
index 43ef64214ad..d2ec7b80d86 100644
--- a/source/blender/blenlib/BLI_math_interp.h
+++ b/source/blender/blenlib/BLI_math_interp.h
@@ -45,4 +45,24 @@ void BLI_bilinear_interpolation_fl(const float *buffer, float *output, int width
 void BLI_bilinear_interpolation_char(const unsigned char *buffer, unsigned char *output, int width, int height,
                                      int components, float u, float v);
 
+#define EWA_MAXIDX 255
+extern const float EWA_WTS[EWA_MAXIDX + 1];
+
+typedef void (*ewa_filter_read_pixel_cb) (void *userdata, int x, int y, float result[4]);
+
+void BLI_ewa_imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc);
+
+/* TODO(sergey): Consider making this function inlined, so the pixel read callback
+ * could also be inlined in order to avoid per-pixel function calls.
+ */
+void BLI_ewa_filter(const int width, const int height,
+                    const bool intpol,
+                    const bool use_alpha,
+                    const float uv[2],
+                    const float du[2],
+                    const float dv[2],
+                    ewa_filter_read_pixel_cb read_pixel_cb,
+                    void *customdata,
+                    float result[4]);
+
 #endif  /* __BLI_MATH_INTERP_H__ */
diff --git a/source/blender/blenlib/intern/math_interp.c b/source/blender/blenlib/intern/math_interp.c
index 9af1c8677df..ede45e41443 100644
--- a/source/blender/blenlib/intern/math_interp.c
+++ b/source/blender/blenlib/intern/math_interp.c
@@ -366,3 +366,192 @@ void BLI_bilinear_interpolation_char(const unsigned char *buffer, unsigned char
 {
 	bilinear_interpolation(buffer, NULL, output, NULL, width, height, components, u, v);
 }
+
+/**************************************************************************
+ * Filtering method based on
+ * "Creating raster omnimax images from multiple perspective views using the elliptical weighted average filter"
+ * by Ned Greene and Paul S. Heckbert (1986)
+ ***************************************************************************/
+
+/* table of (exp(ar) - exp(a)) / (1 - exp(a)) for r in range [0, 1] and a = -2
+ * used instead of actual gaussian, otherwise at high texture magnifications circular artifacts are visible */
+#define EWA_MAXIDX 255
+const float EWA_WTS[EWA_MAXIDX + 1] = {
+	1.f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f, 0.938216f, 0.929664f,
+	0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f, 0.879734f, 0.871638f, 0.863605f,
+	0.855636f, 0.847728f, 0.839883f, 0.832098f, 0.824375f, 0.816712f, 0.809108f, 0.801564f,
+	0.794079f, 0.786653f, 0.779284f, 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f,
+	0.736267f, 0.729292f, 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f,
+	0.68197f, 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
+	0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f, 0.588906f,
+	0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f, 0.549084f, 0.543572f,
+	0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f, 0.511389f, 0.506171f, 0.500994f,
+	0.495857f, 0.490761f, 0.485704f, 0.480687f, 0.475709f, 0.470769f, 0.465869f, 0.461006f,
+	0.456182f, 0.451395f, 0.446646f, 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f,
+	0.418919f, 0.414424f, 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f,
+	0.383923f, 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
+	0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f, 0.323939f,
+	0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f, 0.298272f, 0.294719f,
+	0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f, 0.273976f, 0.270613f, 0.267276f,
+	0.263965f, 0.26068f, 0.257421f, 0.254187f, 0.250979f, 0.247795f, 0.244636f, 0.241502f,
+	0.238393f, 0.235308f, 0.232246f, 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f,
+	0.214375f, 0.211478f, 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f,
+	0.191819f, 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
+	0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f, 0.153157f,
+	0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f, 0.136613f, 0.134323f,
+	0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f, 0.120954f, 0.118786f, 0.116635f,
+	0.114501f, 0.112384f, 0.110283f, 0.108199f, 0.106131f, 0.104079f, 0.102043f, 0.100023f,
+	0.0980186f, 0.09603f, 0.094057f, 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f,
+	0.0825384f, 0.0806708f, 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f,
+	0.0679997f, 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
+	0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f, 0.0430805f,
+	0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f, 0.0324175f, 0.0309415f,
+	0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f, 0.0223242f, 0.020927f, 0.0195408f,
+	0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f, 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f,
+	0.00754159f, 0.00625989f, 0.00498819f, 0.00372644f, 0.00247454f, 0.00123242f, 0.f
+};
+
+static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
+{
+	float ct2 = cosf(th);
+	const float st2 = 1.0f - ct2 * ct2;	/* <- sin(th)^2 */
+	ct2 *= ct2;
+	*A = a2*st2 + b2*ct2;
+	*B = (b2 - a2)*sinf(2.f*th);
+	*C = a2*ct2 + b2*st2;
+	*F = a2*b2;
+}
+
+/* all tests here are done to make sure possible overflows are hopefully minimized */
+void BLI_ewa_imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
+{
+	if (F <= 1e-5f) {	/* use arbitrary major radius, zero minor, infinite eccentricity */
+		*a = sqrtf(A > C ? A : C);
+		*b = 0.f;
+		*ecc = 1e10f;
+		*th = 0.5f * (atan2f(B, A - C) + (float)M_PI);
+	}
+	else {
+		const float AmC = A - C, ApC = A + C, F2 = F*2.f;
+		const float r = sqrtf(AmC * AmC + B * B);
+		float d = ApC - r;
+		*a = (d <= 0.f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
+		d = ApC + r;
+		if (d <= 0.f) {
+			*b = 0.f;
+			*ecc = 1e10f;
+		}
+		else {
+			*b = sqrtf(F2 / d);
+			*ecc = *a / *b;
+		}
+		/* incr theta by 0.5*pi (angle of major axis) */
+		*th = 0.5f * (atan2f(B, AmC) + (float)M_PI);
+	}
+}
+
+void BLI_ewa_filter(const int width, const int height,
+                    const bool intpol,
+                    const bool use_alpha,
+                    const float uv[2],
+                    const float du[2],
+                    const float dv[2],
+                    ewa_filter_read_pixel_cb read_pixel_cb,
+                    void *userdata,
+                    float result[4])
+{
+	/* scaling dxt/dyt by full resolution can cause overflow because of huge A/B/C and esp. F values,
+	 * scaling by aspect ratio alone does the opposite, so try something in between instead... */
+	const float ff2 = (float)width, ff = sqrtf(ff2), q = (float)height / ff;
+	const float Ux = du[0] * ff, Vx = dv[0] * q, Uy = du[1] * ff, Vy = dv[1] * q;
+	float A = Vx * Vx + Vy * Vy;
+	float B = -2.0f * (Ux * Vx + Uy * Vy);
+	float C = Ux * Ux + Uy * Uy;
+	float F = A * C - B * B * 0.25f;
+	float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d;
+	int u, v, u1, u2, v1, v2;
+
+	/* The so-called 'high' quality ewa method simply adds a constant of 1 to both A & C,
+	 * so the ellipse always covers at least some texels. But since the filter is now always larger,
+	 * it also means that everywhere else it's also more blurry then ideally should be the case.
+	 * So instead here the ellipse radii are modified instead whenever either is too low.
+	 * Use a different radius based on interpolation switch, just enough to anti-alias when interpolation is off,
+	 * and slightly larger to make result a bit smoother than bilinear interpolation when interpolation is on
+	 * (minimum values: const float rmin = intpol ? 1.f : 0.5f;) */
+	const float rmin = (intpol ? 1.5625f : 0.765625f)/ff2;
+	BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
+	if ((b2 = b * b) < rmin) {
+		if ((a2 = a*a) < rmin) {
+			B = 0.0f;
+			A = C = rmin;
+			F = A * C;
+		}
+		else {
+			b2 = rmin;
+			radangle2imp(a2, b2, th, &A, &B, &C, &F);
+		}
+	}
+
+	ue = ff * sqrtf(C);
+	ve = ff * sqrtf(A);
+	d = (float)(EWA_MAXIDX + 1) / (F * ff2);
+	A *= d;
+	B *= d;
+	C *= d;
+
+	U0 = uv[0] * (float)width;
+	V0 = uv[1] * (float)height;
+	u1 = (int)(floorf(U0 - ue));
+	u2 = (int)(ceilf(U0 + ue));
+	v1 = (int)(floorf(V0 - ve));
+	v2 = (int)(ceilf(V0 + ve));
+
+	/* sane clamping to avoid unnecessarily huge loops */
+	/* note: if eccentricity gets clamped (see above),
+	 * the ue/ve limits can also be lowered accordingly
+	 */
+	if (U0 - (float)u1 > EWA_MAXIDX) u1 = (int)U0 - EWA_MAXIDX;
+	if ((float)u2 - U0 > EWA_MAXIDX) u2 = (int)U0 + EWA_MAXIDX;
+	if (V0 - (float)v1 > EWA_MAXIDX) v1 = (int)V0 - EWA_MAXIDX;
+	if ((float)v2 - V0 > EWA_MAXIDX) v2 = (int)V0 + EWA_MAXIDX;
+
+	/* Early output check for cases the whole region is outside of the buffer. */
+	if ((u2 < 0 || u1 >= width) ||  (v2 < 0 || v1 >= height)) {
+		zero_v4(result);
+		return;
+	}
+
+	U0 -= 0.5f;
+	V0 -= 0.5f;
+	DDQ = 2.0f * A;
+	U = (float)u1 - U0;
+	ac1 = A * (2.0f * U + 1.0f);
+	ac2 = A * U * U;
+	BU = B * U;
+
+	d = 0.0f;
+	zero_v4(result);
+	for (v = v1; v <= v2; ++v) {
+		const float V = (float)v - V0;
+		float DQ = ac1 + B*V;
+		float Q = (C * V + BU) * V + ac2;
+		for (u = u1; u <= u2; ++u) {
+			if (Q < (float)(EWA_MAXIDX + 1)) {
+				float tc[4];
+				const float wt = EWA_WTS[(Q < 0.0f) ? 0 : (unsigned int)Q];
+				read_pixel_cb(userdata, u, v, tc);
+				madd_v3_v3fl(result, tc, wt);
+				result[3] += use_alpha ? tc[3] * wt : 0.0f;
+				d += wt;
+			}
+			Q += DQ;
+			DQ += DDQ;
+		}
+	}
+
+	/* d should hopefully never be zero anymore */
+	d = 1.0f / d;
+	mul_v3_fl(result, d);
+	/* clipping can be ignored if alpha used, texr->ta already includes filtered edge */
+	result[3] = use_alpha ? result[3] * d : 1.0f;
+}
diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.cpp b/source/blender/compositor/intern/COM_MemoryBuffer.cpp
index c1916f4a68f..c59ecced93c 100644
--- a/source/blender/compositor/intern/COM_MemoryBuffer.cpp
+++ b/source/blender/compositor/intern/COM_MemoryBuffer.cpp
@@ -178,162 +178,58 @@ void MemoryBuffer::addPixel(int x, int y, const float color[4])
 	}
 }
 
-
-// table of (exp(ar) - exp(a)) / (1 - exp(a)) for r in range [0, 1] and a = -2
-// used instead of actual gaussian, otherwise at high texture magnifications circular artifacts are visible
-#define EWA_MAXIDX 255
-static const float EWA_WTS[EWA_MAXIDX + 1] = {
-	1.f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f, 0.938216f, 0.929664f,
-	0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f, 0.879734f, 0.871638f, 0.863605f,
-	0.855636f, 0.847728f, 0.839883f, 0.832098f, 0.824375f, 0.816712f, 0.809108f, 0.801564f,
-	0.794079f, 0.786653f, 0.779284f, 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f,
-	0.736267f, 0.729292f, 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f,
-	0.68197f, 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
-	0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f, 0.588906f,
-	0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f, 0.549084f, 0.543572f,
-	0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f, 0.511389f, 0.506171f, 0.500994f,
-	0.495857f, 0.490761f, 0.485704f, 0.480687f, 0.475709f, 0.470769f, 0.465869f, 0.461006f,
-	0.456182f, 0.451395f, 0.446646f, 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f,
-	0.418919f, 0.414424f, 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f,
-	0.383923f, 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
-	0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f, 0.323939f,
-	0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f, 0.298272f, 0.294719f,
-	0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f, 0.273976f, 0.270613f, 0.267276f,
-	0.263965f, 0.26068f, 0.257421f, 0.254187f, 0.250979f, 0.247795f, 0.244636f, 0.241502f,
-	0.238393f, 0.235308f, 0.232246f, 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f,
-	0.214375f, 0.211478f, 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f,
-	0.191819f, 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
-	0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f, 0.153157f,
-	0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f, 0.136613f, 0.134323f,
-	0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f, 0.120954f, 0.118786f, 0.116635f,
-	0.114501f, 0.112384f, 0.110283f, 0.108199f, 0.106131f, 0.104079f, 0.102043f, 0.100023f,
-	0.0980186f, 0.09603f, 0.094057f, 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f,
-	0.0825384f, 0.0806708f, 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f,
-	0.0679997f, 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
-	0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f, 0.0430805f,
-	0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f, 0.0324175f, 0.0309415f,
-	0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f, 0.0223242f, 0.020927f, 0.0195408f,
-	0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f, 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f,
-	0.00754159f, 0.00625989f, 0.00498819f, 0.00372644f, 0.00247454f, 0.00123242f, 0.f
-};
-
-static void ellipse_bounds(float A, float B, float C, float F, float &xmax, float &ymax)
-{
-	float denom = 4.0f * A * C - B * B;
-	if (denom > 0.0f && A != 0.0f && C != 0.0f) {
-		xmax = sqrtf(F) / (2.0f * A) * (sqrtf(F * (4.0f * A - B * B / C)) + B * B * sqrtf(F / (C * denom)));
-		ymax = sqrtf(F) / (2.0f * C) * (sqrtf(F * (4.0f * C - B * B / A)) + B * B * sqrtf(F / (A * denom)));
+typedef struct ReadEWAData {
+	MemoryBuffer *buffer;
+	PixelSampler sampler;
+	float ufac, vfac;
+} ReadEWAData;
+
+static void read_ewa_pixel_sampled(void *userdata, int x, int y, float result[4])
+{
+	ReadEWAData *data = (ReadEWAData *) userdata;
+	switch (data->sampler) {
+		case COM_PS_NEAREST:
+			data->buffer->read(result, x, y);
+			break;
+		case COM_PS_BILINEAR:
+			data->buffer->readBilinear(result,
+			                           (float)x + data->ufac,
+			                           (float)y + data->vfac);
+			break;
+		case COM_PS_BICUBIC:
+			/* TOOD(sergey): no readBicubic method yet */
+			data->buffer->readBilinear(result,
+			                           (float)x + data->ufac,
+			                           (float)y + data->vfac);
+			break;
+		default:
+			zero_v4(result);
+			break;
 	}
-	else {
-		xmax = 0.0f;
-		ymax = 0.0f;
-	}
-}
-
-static void ellipse_params(float Ux, float Uy, float Vx, float Vy,
-                           float &A, float &B, float &C, float &F, float &umax, float &vmax)
-{
-	A = Vx * Vx + Vy * Vy;
-	B = -2.0f * (Ux * Vx + Uy * Vy);
-	C = Ux * Ux + Uy * Uy;
-	F = A * C - B * B * 0.25f;
-
-	float factor = (F != 0.0f ? (float)(EWA_MAXIDX + 1) / F : 0.0f);
-	A *= factor;
-	B *= factor;
-	C *= factor;
-	F = (float)(EWA_MAXIDX + 1);
-
-	ellipse_bounds(A, B, C, sqrtf(F), umax, vmax);
 }
 
-/**
- * Filtering method based on
- * "Creating raster omnimax images from multiple perspective views using the elliptical weighted average filter"
- * by Ned Greene and Paul S. Heckbert (1986)
- */
 void MemoryBuffer::readEWA(float result[4], const float uv[2], const float derivatives[2][2], PixelSampler sampler)
 {
-	zero_v4(result);
-	int width = this->getWidth(), height = this->getHeight();
-	if (width == 0 || height == 0)
-		return;
-
-	float u = uv[0], v = uv[1];
-	float Ux = derivatives[0][0], Vx = derivatives[1][0], Uy = derivatives[0][1], Vy = derivatives[1][1];
-	float A, B, C, F, ue, ve;
-	ellipse_params(Ux, Uy, Vx, Vy, A, B, C, F, ue, ve);
+	ReadEWAData data;
+	data.buffer = this;
+	data.sampler = sampler;
+	data.ufac = uv[0] - floorf(uv[0]);
+	data.vfac = uv[1] - floorf(uv[1]);
 
-	/* Note: highly eccentric ellipses can lead to large texture space areas to filter!
-	 * This is limited somewhat by the EWA_WTS size in the loop, but a nicer approach
-	 * could be the one found in
-	 * "High Quality Elliptical Texture Filtering on GPU"
-	 * by Pavlos Mavridis and Georgios Papaioannou
-	 * in which the eccentricity of the ellipse is clamped.
-	 */
-
-	int U0 = (int)u;
-	int V0 = (int)v;
-	/* pixel offset for interpolation */
-	float ufac = u - floorf(u), vfac = v - floorf(v);
-	/* filter size */
-	int u1 = (int)(u - ue);
-	int u2 = (int)(u + ue);
-	int v1 = (int)(v - ve);
-	int v2 = (int)(v + ve);
-
-	/* sane clamping to avoid unnecessarily huge loops */
-	/* note: if eccentricity gets clamped (see above),
-	 * the ue/ve limits can also be lowered accordingly
+	int width = this->getWidth(), height = this->getHeight();
+	/* TODO(sergey): Render pipeline uses normalized coordinates and derivatives,
+	 * but compositor uses pixel space. For now let's just divide the values and
+	 * switch compositor to normalized space for EWA later.
 	 */
-	if (U0 - u1 > EWA_MAXIDX) u1 = U0 - EWA_MAXIDX;
-	if (u2 - U0 > EWA_MAXIDX) u2 = U0 + EWA_MAXIDX;
-	if (V0 - v1 > EWA_MAXIDX) v1 = V0 - EWA_MAXIDX;
-	if (v2 - V0 > EWA_MAXIDX) v2 = V0 + EWA_MAXIDX;
-
-	/* Early output check for cases the whole region is outside of the buffer. */
-	if ((u2 < m_rect.xmin || u1 >= m_rect.xmax) ||
-	    (v2 < m_rect.ymin || v1 >= m_rect.ymax))
-	{
-		zero_v4(result);
-		return;
-	}
-
-	/* Clamp sampling rectagle to the buffer dimensions. */
-	u1 = max_ii(u1, m_rect.xmin);
-	u2 = min_ii(u2, m_rect.xmax);
-	v1 = max_ii(v1, m_rect.ymin);
-	v2 = min_ii(v2, m_rect.ymax);
-
-	float DDQ = 2.0f * A;
-	float U = u1 - U0;
-	float ac1 = A * (2.0f * U + 1.0f);
-	float ac2 = A * U * U;
-	float BU = B * U;
-
-	float sum = 0.0f;
-	for (int v = v1; v <= v2; ++v) {
-		float V = v - V0;
-
-		float DQ = ac1 + B * V;
-		float Q = (C * V + BU) * V + ac2;
-		for (int u = u1; u <= u2; ++u) {
-			if (Q < F) {
-				float tc[4];
-				const float wt = EWA_WTS[CLAMPIS((int)Q, 0, EWA_MAXIDX)];
-				switch (sampler) {
-					case COM_PS_NEAREST: read(tc, u, v); break;
-					case COM_PS_BILINEAR: readBilinear(tc, (float)u + ufac, (float)v + vfac); break;
-					case COM_PS_BICUBIC: readBilinear(tc, (float)u + ufac, (float)v + vfac); break; /* XXX no readBicubic method yet */
-					default: zero_v4(tc); break;
-				}
-				madd_v4_v4fl(result, tc, wt);
-				sum += wt;
-			}
-			Q += DQ;
-			DQ += DDQ;
-		}
-	}
-	
-	mul_v4_fl(result, (sum != 0.0f ? 1.0f / sum : 0.0f));
+	float uv_normal[2] = {uv[0] / width, uv[1] / height};
+	float du_normal[2] = {derivatives[0][0] / width, derivatives[0][1] / height};
+	float dv_normal[2] = {derivatives[1][0] / width, derivatives[1][1] / height};
+
+	BLI_ewa_filter(this->getWidth(), this->getHeight(),
+	               false,
+	               true,
+	               uv_normal, du_normal, dv_normal,
+	               read_ewa_pixel_sampled,
+	               &data,
+	               result);
 }
diff --git a/source/blender/render/intern/source/imagetexture.c b/source/blender/render/intern/source/imagetexture.c
index 7d4b70cea15..12701099e18 100644
--- a/source/blender/render/intern/source/imagetexture.c
+++ b/source/blender/render/intern/source/imagetexture.c
@@ -791,176 +791,39 @@ static void area_sample(TexResult *texr, ImBuf *ibuf, float fx, float fy, afdata
 	texr->ta = texr->talpha ? texr->ta*xsd : (clip ? cw*xsd : 1.f);
 }
 
-/* table of (exp(ar) - exp(a)) / (1 - exp(a)) for r in range [0, 1] and a = -2
- * used instead of actual gaussian, otherwise at high texture magnifications circular artifacts are visible */
-#define EWA_MAXIDX 255
-static const float EWA_WTS[EWA_MAXIDX + 1] = {
-	1.f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f, 0.938216f, 0.929664f,
-	0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f, 0.879734f, 0.871638f, 0.863605f,
-	0.855636f, 0.847728f, 0.839883f, 0.832098f, 0.824375f, 0.816712f, 0.809108f, 0.801564f,
-	0.794079f, 0.786653f, 0.779284f, 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f,
-	0.736267f, 0.729292f, 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f,
-	0.68197f, 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
-	0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f, 0.588906f,
-	0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f, 0.549084f, 0.543572f,
-	0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f, 0.511389f, 0.506171f, 0.500994f,
-	0.495857f, 0.490761f, 0.485704f, 0.480687f, 0.475709f, 0.470769f, 0.465869f, 0.461006f,
-	0.456182f, 0.451395f, 0.446646f, 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f,
-	0.418919f, 0.414424f, 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f,
-	0.383923f, 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
-	0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f, 0.323939f,
-	0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f, 0.298272f, 0.294719f,
-	0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f, 0.273976f, 0.270613f, 0.267276f,
-	0.263965f, 0.26068f, 0.257421f, 0.254187f, 0.250979f, 0.247795f, 0.244636f, 0.241502f,
-	0.238393f, 0.235308f, 0.232246f, 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f,
-	0.214375f, 0.211478f, 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f,
-	0.191819f, 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
-	0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f, 0.153157f,
-	0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f, 0.136613f, 0.134323f,
-	0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f, 0.120954f, 0.118786f, 0.116635f,
-	0.114501f, 0.112384f, 0.110283f, 0.108199f, 0.106131f, 0.104079f, 0.102043f, 0.100023f,
-	0.0980186f, 0.09603f, 0.094057f, 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f,
-	0.0825384f, 0.0806708f, 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f,
-	0.0679997f, 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
-	0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f, 0.0430805f,
-	0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f, 0.0324175f, 0.0309415f,
-	0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f, 0.0223242f, 0.020927f, 0.0195408f,
-	0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f, 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f,
-	0.00754159f, 0.00625989f, 0.00498819f, 0.00372644f, 0.00247454f, 0.00123242f, 0.f
-};
-
 /* test if a float value is 'nan'
  * there is a C99 function for this: isnan(), but blender seems to use C90 (according to gcc warns),
  * and may not be supported by other compilers either */
+/* TODO(sergey): Consider using isnan(), it's used in the other areas. */
 #ifndef ISNAN
-#define ISNAN(x) ((x) != (x))
+#  define ISNAN(x) ((x) != (x))
 #endif
-//static int ISNAN(float x) { return (x != x); }
 
-static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
-{
-	float ct2 = cosf(th);
-	const float st2 = 1.0f - ct2 * ct2;	/* <- sin(th)^2 */
-	ct2 *= ct2;
-	*A = a2*st2 + b2*ct2;
-	*B = (b2 - a2)*sinf(2.f*th);
-	*C = a2*ct2 + b2*st2;
-	*F = a2*b2;
-}
+typedef struct ReadEWAData {
+	ImBuf *ibuf;
+	afdata_t *AFD;
+} ReadEWAData;
 
-/* all tests here are done to make sure possible overflows are hopefully minimized */
-static void imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
+static void ewa_read_pixel_cb(void *userdata, int x, int y, float result[4])
 {
-	if (F <= 1e-5f) {	/* use arbitrary major radius, zero minor, infinite eccentricity */
-		*a = sqrtf(A > C ? A : C);
-		*b = 0.f;
-		*ecc = 1e10f;
-		*th = 0.5f*(atan2f(B, A - C) + (float)M_PI);
-	}
-	else {
-		const float AmC = A - C, ApC = A + C, F2 = F*2.f;
-		const float r = sqrtf(AmC*AmC + B*B);
-		float d = ApC - r;
-		*a = (d <= 0.f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
-		d = ApC + r;
-		if (d <= 0.f) {
-			*b = 0.f;
-			*ecc = 1e10f;
-		}
-		else {
-			*b = sqrtf(F2 / d);
-			*ecc = *a / *b;
-		}
-		/* incr theta by 0.5*pi (angle of major axis) */
-		*th = 0.5f*(atan2f(B, AmC) + (float)M_PI);
-	}
+	ReadEWAData *data = (ReadEWAData *) userdata;
+	ibuf_get_color_clip(result, data->ibuf, x, y, data->AFD->extflag);
 }
 
 static void ewa_eval(TexResult *texr, ImBuf *ibuf, float fx, float fy, afdata_t *AFD)
 {
-	/* scaling dxt/dyt by full resolution can cause overflow because of huge A/B/C and esp. F values,
-	 * scaling by aspect ratio alone does the opposite, so try something in between instead... */
-	const float ff2 = ibuf->x, ff = sqrtf(ff2), q = ibuf->y / ff;
-	const float Ux = AFD->dxt[0]*ff, Vx = AFD->dxt[1]*q, Uy = AFD->dyt[0]*ff, Vy = AFD->dyt[1]*q;
-	float A = Vx*Vx + Vy*Vy;
-	float B = -2.f*(Ux*Vx + Uy*Vy);
-	float C = Ux*Ux + Uy*Uy;
-	float F = A*C - B*B*0.25f;
-	float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d; /* TXF alpha: cw = 0.f; */
-	int u, v, u1, u2, v1, v2; /* TXF alpha: clip = 0; */
-
-	/* The so-called 'high' quality ewa method simply adds a constant of 1 to both A & C,
-	 * so the ellipse always covers at least some texels. But since the filter is now always larger,
-	 * it also means that everywhere else it's also more blurry then ideally should be the case.
-	 * So instead here the ellipse radii are modified instead whenever either is too low.
-	 * Use a different radius based on interpolation switch, just enough to anti-alias when interpolation is off,
-	 * and slightly larger to make result a bit smoother than bilinear interpolation when interpolation is on
-	 * (minimum values: const float rmin = intpol ? 1.f : 0.5f;) */
-	const float rmin = (AFD->intpol ? 1.5625f : 0.765625f)/ff2;
-	imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
-	if ((b2 = b*b) < rmin) {
-		if ((a2 = a*a) < rmin) {
-			B = 0.f;
-			A = C = rmin;
-			F = A*C;
-		}
-		else {
-			b2 = rmin;
-			radangle2imp(a2, b2, th, &A, &B, &C, &F);
-		}
-	}
-
-	ue = ff*sqrtf(C);
-	ve = ff*sqrtf(A);
-	d = (float)(EWA_MAXIDX + 1) / (F*ff2);
-	A *= d;
-	B *= d;
-	C *= d;
-
-	U0 = fx*ibuf->x;
-	V0 = fy*ibuf->y;
-	u1 = (int)(floorf(U0 - ue));
-	u2 = (int)(ceilf(U0 + ue));
-	v1 = (int)(floorf(V0 - ve));
-	v2 = (int)(ceilf(V0 + ve));
-	U0 -= 0.5f;
-	V0 -= 0.5f;
-	DDQ = 2.f*A;
-	U = u1 - U0;
-	ac1 = A*(2.f*U + 1.f);
-	ac2 = A*U*U;
-	BU = B*U;
+	ReadEWAData data;
+	float uv[2] = {fx, fy};
+	data.ibuf = ibuf;
+	data.AFD = AFD;
+	BLI_ewa_filter(ibuf->x, ibuf->y,
+	               AFD->intpol != 0,
+	               texr->talpha,
+	               uv, AFD->dxt, AFD->dyt,
+	               ewa_read_pixel_cb,
+	               &data,
+	               &texr->tr);
 
-	d = texr->tr = texr->tb = texr->tg = texr->ta = 0.f;
-	for (v=v1; v<=v2; ++v) {
-		const float V = v - V0;
-		float DQ = ac1 + B*V;
-		float Q = (C*V + BU)*V + ac2;
-		for (u=u1; u<=u2; ++u) {
-			if (Q < (float)(EWA_MAXIDX + 1)) {
-				float tc[4];
-				const float wt = EWA_WTS[(Q < 0.f) ? 0 : (unsigned int)Q];
-				/*const int out =*/ ibuf_get_color_clip(tc, ibuf, u, v, AFD->extflag);
-				/* TXF alpha: clip |= out;
-				 * TXF alpha: cw += out ? 0.f : wt; */
-				texr->tr += tc[0]*wt;
-				texr->tg += tc[1]*wt;
-				texr->tb += tc[2]*wt;
-				texr->ta += texr->talpha ? tc[3]*wt : 0.f;
-				d += wt;
-			}
-			Q += DQ;
-			DQ += DDQ;
-		}
-	}
-
-	/* d should hopefully never be zero anymore */
-	d = 1.f/d;
-	texr->tr *= d;
-	texr->tg *= d;
-	texr->tb *= d;
-	/* clipping can be ignored if alpha used, texr->ta already includes filtered edge */
-	texr->ta = texr->talpha ? texr->ta*d : 1.f; /* TXF alpha (clip ? cw*d : 1.f); */
 }
 
 static void feline_eval(TexResult *texr, ImBuf *ibuf, float fx, float fy, afdata_t *AFD)
@@ -1304,7 +1167,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 		const float C = Ux*Ux + Uy*Uy;
 		const float F = A*C - B*B*0.25f;
 		float a, b, th, ecc;
-		imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
+		BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
 		if (tex->texfilter == TXF_FELINE) {
 			float fProbes;
 			a *= ff;
@@ -1408,7 +1271,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 			const float C = Ux*Ux + Uy*Uy;
 			const float F = A*C - B*B*0.25f;
 			float a, b, th, ecc, fProbes;
-			imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
+			BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
 			a *= ff;
 			b *= ff;
 			a = max_ff(a, 1.0f);
author	Sergey Sharybin <sergey.vfx@gmail.com>	2014-08-18 17:37:13 +0400
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2014-08-18 17:38:15 +0400
commit	afa3469288337079cbff2bcecb7142448e961393 (patch)
tree	572862e514fddd8decbb478ba46e4a3a8e9b4c56 /source/blender
parent	354b17d2f3ff6e1b1c01fe147c2934103c8abef1 (diff)