4 files changed, 278 insertions, 310 deletions
diff --git a/source/blender/blenlib/BLI_math_interp.h b/source/blender/blenlib/BLI_math_interp.h
index 43ef64214ad..d2ec7b80d86 100644
--- a/source/blender/blenlib/BLI_math_interp.h
+++ b/source/blender/blenlib/BLI_math_interp.h
@@ -45,4 +45,24 @@ void BLI_bilinear_interpolation_fl(const float *buffer, float *output, int width
 void BLI_bilinear_interpolation_char(const unsigned char *buffer, unsigned char *output, int width, int height,
                                      int components, float u, float v);
 
+#define EWA_MAXIDX 255
+extern const float EWA_WTS[EWA_MAXIDX + 1];
+
+typedef void (*ewa_filter_read_pixel_cb) (void *userdata, int x, int y, float result[4]);
+
+void BLI_ewa_imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc);
+
+/* TODO(sergey): Consider making this function inlined, so the pixel read callback
+ * could also be inlined in order to avoid per-pixel function calls.
+ */
+void BLI_ewa_filter(const int width, const int height,
+                    const bool intpol,
+                    const bool use_alpha,
+                    const float uv[2],
+                    const float du[2],
+                    const float dv[2],
+                    ewa_filter_read_pixel_cb read_pixel_cb,
+                    void *customdata,
+                    float result[4]);
+
 #endif  /* __BLI_MATH_INTERP_H__ */
diff --git a/source/blender/blenlib/intern/math_interp.c b/source/blender/blenlib/intern/math_interp.c
index 9af1c8677df..ede45e41443 100644
--- a/source/blender/blenlib/intern/math_interp.c
+++ b/source/blender/blenlib/intern/math_interp.c
@@ -366,3 +366,192 @@ void BLI_bilinear_interpolation_char(const unsigned char *buffer, unsigned char
 {
 	bilinear_interpolation(buffer, NULL, output, NULL, width, height, components, u, v);
 }
+
+/**************************************************************************
+ * Filtering method based on
+ * "Creating raster omnimax images from multiple perspective views using the elliptical weighted average filter"
+ * by Ned Greene and Paul S. Heckbert (1986)
+ ***************************************************************************/
+
+/* table of (exp(ar) - exp(a)) / (1 - exp(a)) for r in range [0, 1] and a = -2
+ * used instead of actual gaussian, otherwise at high texture magnifications circular artifacts are visible */
+#define EWA_MAXIDX 255
+const float EWA_WTS[EWA_MAXIDX + 1] = {
+	1.f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f, 0.938216f, 0.929664f,
+	0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f, 0.879734f, 0.871638f, 0.863605f,
+	0.855636f, 0.847728f, 0.839883f, 0.832098f, 0.824375f, 0.816712f, 0.809108f, 0.801564f,
+	0.794079f, 0.786653f, 0.779284f, 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f,
+	0.736267f, 0.729292f, 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f,
+	0.68197f, 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
+	0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f, 0.588906f,
+	0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f, 0.549084f, 0.543572f,
+	0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f, 0.511389f, 0.506171f, 0.500994f,
+	0.495857f, 0.490761f, 0.485704f, 0.480687f, 0.475709f, 0.470769f, 0.465869f, 0.461006f,
+	0.456182f, 0.451395f, 0.446646f, 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f,
+	0.418919f, 0.414424f, 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f,
+	0.383923f, 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
+	0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f, 0.323939f,
+	0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f, 0.298272f, 0.294719f,
+	0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f, 0.273976f, 0.270613f, 0.267276f,
+	0.263965f, 0.26068f, 0.257421f, 0.254187f, 0.250979f, 0.247795f, 0.244636f, 0.241502f,
+	0.238393f, 0.235308f, 0.232246f, 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f,
+	0.214375f, 0.211478f, 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f,
+	0.191819f, 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
+	0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f, 0.153157f,
+	0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f, 0.136613f, 0.134323f,
+	0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f, 0.120954f, 0.118786f, 0.116635f,
+	0.114501f, 0.112384f, 0.110283f, 0.108199f, 0.106131f, 0.104079f, 0.102043f, 0.100023f,
+	0.0980186f, 0.09603f, 0.094057f, 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f,
+	0.0825384f, 0.0806708f, 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f,
+	0.0679997f, 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
+	0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f, 0.0430805f,
+	0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f, 0.0324175f, 0.0309415f,
+	0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f, 0.0223242f, 0.020927f, 0.0195408f,
+	0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f, 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f,
+	0.00754159f, 0.00625989f, 0.00498819f, 0.00372644f, 0.00247454f, 0.00123242f, 0.f
+};
+
+static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
+{
+	float ct2 = cosf(th);
+	const float st2 = 1.0f - ct2 * ct2;	/* <- sin(th)^2 */
+	ct2 *= ct2;
+	*A = a2*st2 + b2*ct2;
+	*B = (b2 - a2)*sinf(2.f*th);
+	*C = a2*ct2 + b2*st2;
+	*F = a2*b2;
+}
+
+/* all tests here are done to make sure possible overflows are hopefully minimized */
+void BLI_ewa_imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
+{
+	if (F <= 1e-5f) {	/* use arbitrary major radius, zero minor, infinite eccentricity */
+		*a = sqrtf(A > C ? A : C);
+		*b = 0.f;
+		*ecc = 1e10f;
+		*th = 0.5f * (atan2f(B, A - C) + (float)M_PI);
+	}
+	else {
+		const float AmC = A - C, ApC = A + C, F2 = F*2.f;
+		const float r = sqrtf(AmC * AmC + B * B);
+		float d = ApC - r;
+		*a = (d <= 0.f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
+		d = ApC + r;
+		if (d <= 0.f) {
+			*b = 0.f;
+			*ecc = 1e10f;
+		}
+		else {
+			*b = sqrtf(F2 / d);
+			*ecc = *a / *b;
+		}
+		/* incr theta by 0.5*pi (angle of major axis) */
+		*th = 0.5f * (atan2f(B, AmC) + (float)M_PI);
+	}
+}
+
+void BLI_ewa_filter(const int width, const int height,
+                    const bool intpol,
+                    const bool use_alpha,
+                    const float uv[2],
+                    const float du[2],
+                    const float dv[2],
+                    ewa_filter_read_pixel_cb read_pixel_cb,
+                    void *userdata,
+                    float result[4])
+{
+	/* scaling dxt/dyt by full resolution can cause overflow because of huge A/B/C and esp. F values,
+	 * scaling by aspect ratio alone does the opposite, so try something in between instead... */
+	const float ff2 = (float)width, ff = sqrtf(ff2), q = (float)height / ff;
+	const float Ux = du[0] * ff, Vx = dv[0] * q, Uy = du[1] * ff, Vy = dv[1] * q;
+	float A = Vx * Vx + Vy * Vy;
+	float B = -2.0f * (Ux * Vx + Uy * Vy);
+	float C = Ux * Ux + Uy * Uy;
+	float F = A * C - B * B * 0.25f;
+	float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d;
+	int u, v, u1, u2, v1, v2;
+
+	/* The so-called 'high' quality ewa method simply adds a constant of 1 to both A & C,
+	 * so the ellipse always covers at least some texels. But since the filter is now always larger,
+	 * it also means that everywhere else it's also more blurry then ideally should be the case.
+	 * So instead here the ellipse radii are modified instead whenever either is too low.
+	 * Use a different radius based on interpolation switch, just enough to anti-alias when interpolation is off,
+	 * and slightly larger to make result a bit smoother than bilinear interpolation when interpolation is on
+	 * (minimum values: const float rmin = intpol ? 1.f : 0.5f;) */
+	const float rmin = (intpol ? 1.5625f : 0.765625f)/ff2;
+	BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
+	if ((b2 = b * b) < rmin) {
+		if ((a2 = a*a) < rmin) {
+			B = 0.0f;
+			A = C = rmin;
+			F = A * C;
+		}
+		else {
+			b2 = rmin;
+			radangle2imp(a2, b2, th, &A, &B, &C, &F);
+		}
+	}
+
+	ue = ff * sqrtf(C);
+	ve = ff * sqrtf(A);
+	d = (float)(EWA_MAXIDX + 1) / (F * ff2);
+	A *= d;
+	B *= d;
+	C *= d;
+
+	U0 = uv[0] * (float)width;
+	V0 = uv[1] * (float)height;
+	u1 = (int)(floorf(U0 - ue));
+	u2 = (int)(ceilf(U0 + ue));
+	v1 = (int)(floorf(V0 - ve));
+	v2 = (int)(ceilf(V0 + ve));
+
+	/* sane clamping to avoid unnecessarily huge loops */
+	/* note: if eccentricity gets clamped (see above),
+	 * the ue/ve limits can also be lowered accordingly
+	 */
+	if (U0 - (float)u1 > EWA_MAXIDX) u1 = (int)U0 - EWA_MAXIDX;
+	if ((float)u2 - U0 > EWA_MAXIDX) u2 = (int)U0 + EWA_MAXIDX;
+	if (V0 - (float)v1 > EWA_MAXIDX) v1 = (int)V0 - EWA_MAXIDX;
+	if ((float)v2 - V0 > EWA_MAXIDX) v2 = (int)V0 + EWA_MAXIDX;
+
+	/* Early output check for cases the whole region is outside of the buffer. */
+	if ((u2 < 0 || u1 >= width) ||  (v2 < 0 || v1 >= height)) {
+		zero_v4(result);
+		return;
+	}
+
+	U0 -= 0.5f;
+	V0 -= 0.5f;
+	DDQ = 2.0f * A;
+	U = (float)u1 - U0;
+	ac1 = A * (2.0f * U + 1.0f);
+	ac2 = A * U * U;
+	BU = B * U;
+
+	d = 0.0f;
+	zero_v4(result);
+	for (v = v1; v <= v2; ++v) {
+		const float V = (float)v - V0;
+		float DQ = ac1 + B*V;
+		float Q = (C * V + BU) * V + ac2;
+		for (u = u1; u <= u2; ++u) {
+			if (Q < (float)(EWA_MAXIDX + 1)) {
+				float tc[4];
+				const float wt = EWA_WTS[(Q < 0.0f) ? 0 : (unsigned int)Q];
+				read_pixel_cb(userdata, u, v, tc);
+				madd_v3_v3fl(result, tc, wt);
+				result[3] += use_alpha ? tc[3] * wt : 0.0f;
+				d += wt;
+			}
+			Q += DQ;
+			DQ += DDQ;
+		}
+	}
+
+	/* d should hopefully never be zero anymore */
+	d = 1.0f / d;
+	mul_v3_fl(result, d);
+	/* clipping can be ignored if alpha used, texr->ta already includes filtered edge */
+	result[3] = use_alpha ? result[3] * d : 1.0f;
+}
diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.cpp b/source/blender/compositor/intern/COM_MemoryBuffer.cpp
index c1916f4a68f..c59ecced93c 100644
--- a/source/blender/compositor/intern/COM_MemoryBuffer.cpp
+++ b/source/blender/compositor/intern/COM_MemoryBuffer.cpp
@@ -178,162 +178,58 @@ void MemoryBuffer::addPixel(int x, int y, const float color[4])
 	}
 }
 
-
-// table of (exp(ar) - exp(a)) / (1 - exp(a)) for r in range [0, 1] and a = -2
-// used instead of actual gaussian, otherwise at high texture magnifications circular artifacts are visible
-#define EWA_MAXIDX 255
-static const float EWA_WTS[EWA_MAXIDX + 1] = {
-	1.f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f, 0.938216f, 0.929664f,
-	0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f, 0.879734f, 0.871638f, 0.863605f,
-	0.855636f, 0.847728f, 0.839883f, 0.832098f, 0.824375f, 0.816712f, 0.809108f, 0.801564f,
-	0.794079f, 0.786653f, 0.779284f, 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f,
-	0.736267f, 0.729292f, 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f,
-	0.68197f, 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
-	0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f, 0.588906f,
-	0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f, 0.549084f, 0.543572f,
-	0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f, 0.511389f, 0.506171f, 0.500994f,
-	0.495857f, 0.490761f, 0.485704f, 0.480687f, 0.475709f, 0.470769f, 0.465869f, 0.461006f,
-	0.456182f, 0.451395f, 0.446646f, 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f,
-	0.418919f, 0.414424f, 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f,
-	0.383923f, 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
-	0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f, 0.323939f,
-	0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f, 0.298272f, 0.294719f,
-	0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f, 0.273976f, 0.270613f, 0.267276f,
-	0.263965f, 0.26068f, 0.257421f, 0.254187f, 0.250979f, 0.247795f, 0.244636f, 0.241502f,
-	0.238393f, 0.235308f, 0.232246f, 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f,
-	0.214375f, 0.211478f, 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f,
-	0.191819f, 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
-	0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f, 0.153157f,
-	0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f, 0.136613f, 0.134323f,
-	0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f, 0.120954f, 0.118786f, 0.116635f,
-	0.114501f, 0.112384f, 0.110283f, 0.108199f, 0.106131f, 0.104079f, 0.102043f, 0.100023f,
-	0.0980186f, 0.09603f, 0.094057f, 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f,
-	0.0825384f, 0.0806708f, 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f,
-	0.0679997f, 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
-	0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f, 0.0430805f,
-	0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f, 0.0324175f, 0.0309415f,
-	0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f, 0.0223242f, 0.020927f, 0.0195408f,
-	0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f, 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f,
-	0.00754159f, 0.00625989f, 0.00498819f, 0.00372644f, 0.00247454f, 0.00123242f, 0.f
-};
-
-static void ellipse_bounds(float A, float B, float C, float F, float &xmax, float &ymax)
-{
-	float denom = 4.0f * A * C - B * B;
-	if (denom > 0.0f && A != 0.0f && C != 0.0f) {
-		xmax = sqrtf(F) / (2.0f * A) * (sqrtf(F * (4.0f * A - B * B / C)) + B * B * sqrtf(F / (C * denom)));
-		ymax = sqrtf(F) / (2.0f * C) * (sqrtf(F * (4.0f * C - B * B / A)) + B * B * sqrtf(F / (A * denom)));
+typedef struct ReadEWAData {
+	MemoryBuffer *buffer;
+	PixelSampler sampler;
+	float ufac, vfac;
+} ReadEWAData;
+
+static void read_ewa_pixel_sampled(void *userdata, int x, int y, float result[4])
+{
+	ReadEWAData *data = (ReadEWAData *) userdata;
+	switch (data->sampler) {
+		case COM_PS_NEAREST:
+			data->buffer->read(result, x, y);
+			break;
+		case COM_PS_BILINEAR:
+			data->buffer->readBilinear(result,
+			                           (float)x + data->ufac,
+			                           (float)y + data->vfac);
+			break;
+		case COM_PS_BICUBIC:
+			/* TOOD(sergey): no readBicubic method yet */
+			data->buffer->readBilinear(result,
+			                           (float)x + data->ufac,
+			                           (float)y + data->vfac);
+			break;
+		default:
+			zero_v4(result);
+			break;
 	}
-	else {
-		xmax = 0.0f;
-		ymax = 0.0f;
-	}
-}
-
-static void ellipse_params(float Ux, float Uy, float Vx, float Vy,
-                           float &A, float &B, float &C, float &F, float &umax, float &vmax)
-{
-	A = Vx * Vx + Vy * Vy;
-	B = -2.0f * (Ux * Vx + Uy * Vy);
-	C = Ux * Ux + Uy * Uy;
-	F = A * C - B * B * 0.25f;
-
-	float factor = (F != 0.0f ? (float)(EWA_MAXIDX + 1) / F : 0.0f);
-	A *= factor;
-	B *= factor;
-	C *= factor;
-	F = (float)(EWA_MAXIDX + 1);
-
-	ellipse_bounds(A, B, C, sqrtf(F), umax, vmax);
 }
 
-/**
- * Filtering method based on
- * "Creating raster omnimax images from multiple perspective views using the elliptical weighted average filter"
- * by Ned Greene and Paul S. Heckbert (1986)
- */
 void MemoryBuffer::readEWA(float result[4], const float uv[2], const float derivatives[2][2], PixelSampler sampler)
 {
-	zero_v4(result);
-	int width = this->getWidth(), height = this->getHeight();
-	if (width == 0 || height == 0)
-		return;
-
-	float u = uv[0], v = uv[1];
-	float Ux = derivatives[0][0], Vx = derivatives[1][0], Uy = derivatives[0][1], Vy = derivatives[1][1];
-	float A, B, C, F, ue, ve;
-	ellipse_params(Ux, Uy, Vx, Vy, A, B, C, F, ue, ve);
+	ReadEWAData data;
+	data.buffer = this;
+	data.sampler = sampler;
+	data.ufac = uv[0] - floorf(uv[0]);
+	data.vfac = uv[1] - floorf(uv[1]);
 
-	/* Note: highly eccentric ellipses can lead to large texture space areas to filter!
-	 * This is limited somewhat by the EWA_WTS size in the loop, but a nicer approach
-	 * could be the one found in
-	 * "High Quality Elliptical Texture Filtering on GPU"
-	 * by Pavlos Mavridis and Georgios Papaioannou
-	 * in which the eccentricity of the ellipse is clamped.
-	 */
-
-	int U0 = (int)u;
-	int V0 = (int)v;
-	/* pixel offset for interpolation */
-	float ufac = u - floorf(u), vfac = v - floorf(v);
-	/* filter size */
-	int u1 = (int)(u - ue);
-	int u2 = (int)(u + ue);
-	int v1 = (int)(v - ve);
-	int v2 = (int)(v + ve);
-
-	/* sane clamping to avoid unnecessarily huge loops */
-	/* note: if eccentricity gets clamped (see above),
-	 * the ue/ve limits can also be lowered accordingly
+	int width = this->getWidth(), height = this->getHeight();
+	/* TODO(sergey): Render pipeline uses normalized coordinates and derivatives,
+	 * but compositor uses pixel space. For now let's just divide the values and
+	 * switch compositor to normalized space for EWA later.
 	 */
-	if (U0 - u1 > EWA_MAXIDX) u1 = U0 - EWA_MAXIDX;
-	if (u2 - U0 > EWA_MAXIDX) u2 = U0 + EWA_MAXIDX;
-	if (V0 - v1 > EWA_MAXIDX) v1 = V0 - EWA_MAXIDX;
-	if (v2 - V0 > EWA_MAXIDX) v2 = V0 + EWA_MAXIDX;
-
-	/* Early output check for cases the whole region is outside of the buffer. */
-	if ((u2 < m_rect.xmin || u1 >= m_rect.xmax) ||
-	    (v2 < m_rect.ymin || v1 >= m_rect.ymax))
-	{
-		zero_v4(result);
-		return;
-	}
-
-	/* Clamp sampling rectagle to the buffer dimensions. */
-	u1 = max_ii(u1, m_rect.xmin);
-	u2 = min_ii(u2, m_rect.xmax);
-	v1 = max_ii(v1, m_rect.ymin);
-	v2 = min_ii(v2, m_rect.ymax);
-
-	float DDQ = 2.0f * A;
-	float U = u1 - U0;
-	float ac1 = A * (2.0f * U + 1.0f);
-	float ac2 = A * U * U;
-	float BU = B * U;
-
-	float sum = 0.0f;
-	for (int v = v1; v <= v2; ++v) {
-		float V = v - V0;
-
-		float DQ = ac1 + B * V;
-		float Q = (C * V + BU) * V + ac2;
-		for (int u = u1; u <= u2; ++u) {
-			if (Q < F) {
-				float tc[4];
-				const float wt = EWA_WTS[CLAMPIS((int)Q, 0, EWA_MAXIDX)];
-				switch (sampler) {
-					case COM_PS_NEAREST: read(tc, u, v); break;
-					case COM_PS_BILINEAR: readBilinear(tc, (float)u + ufac, (float)v + vfac); break;
-					case COM_PS_BICUBIC: readBilinear(tc, (float)u + ufac, (float)v + vfac); break; /* XXX no readBicubic method yet */
-					default: zero_v4(tc); break;
-				}
-				madd_v4_v4fl(result, tc, wt);
-				sum += wt;
-			}
-			Q += DQ;
-			DQ += DDQ;
-		}
-	}
-	
-	mul_v4_fl(result, (sum != 0.0f ? 1.0f / sum : 0.0f));
+	float uv_normal[2] = {uv[0] / width, uv[1] / height};
+	float du_normal[2] = {derivatives[0][0] / width, derivatives[0][1] / height};
+	float dv_normal[2] = {derivatives[1][0] / width, derivatives[1][1] / height};
+
+	BLI_ewa_filter(this->getWidth(), this->getHeight(),
+	               false,
+	               true,
+	               uv_normal, du_normal, dv_normal,
+	               read_ewa_pixel_sampled,
+	               &data,
+	               result);
 }
diff --git a/source/blender/render/intern/source/imagetexture.c b/source/blender/render/intern/source/imagetexture.c
index 7d4b70cea15..12701099e18 100644
--- a/source/blender/render/intern/source/imagetexture.c
+++ b/source/blender/render/intern/source/imagetexture.c
@@ -791,176 +791,39 @@ static void area_sample(TexResult *texr, ImBuf *ibuf, float fx, float fy, afdata
 	texr->ta = texr->talpha ? texr->ta*xsd : (clip ? cw*xsd : 1.f);
 }
 
-/* table of (exp(ar) - exp(a)) / (1 - exp(a)) for r in range [0, 1] and a = -2
- * used instead of actual gaussian, otherwise at high texture magnifications circular artifacts are visible */
-#define EWA_MAXIDX 255
-static const float EWA_WTS[EWA_MAXIDX + 1] = {
-	1.f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f, 0.938216f, 0.929664f,
-	0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f, 0.879734f, 0.871638f, 0.863605f,
-	0.855636f, 0.847728f, 0.839883f, 0.832098f, 0.824375f, 0.816712f, 0.809108f, 0.801564f,
-	0.794079f, 0.786653f, 0.779284f, 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f,
-	0.736267f, 0.729292f, 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f,
-	0.68197f, 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
-	0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f, 0.588906f,
-	0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f, 0.549084f, 0.543572f,
-	0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f, 0.511389f, 0.506171f, 0.500994f,
-	0.495857f, 0.490761f, 0.485704f, 0.480687f, 0.475709f, 0.470769f, 0.465869f, 0.461006f,
-	0.456182f, 0.451395f, 0.446646f, 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f,
-	0.418919f, 0.414424f, 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f,
-	0.383923f, 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
-	0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f, 0.323939f,
-	0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f, 0.298272f, 0.294719f,
-	0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f, 0.273976f, 0.270613f, 0.267276f,
-	0.263965f, 0.26068f, 0.257421f, 0.254187f, 0.250979f, 0.247795f, 0.244636f, 0.241502f,
-	0.238393f, 0.235308f, 0.232246f, 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f,
-	0.214375f, 0.211478f, 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f,
-	0.191819f, 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
-	0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f, 0.153157f,
-	0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f, 0.136613f, 0.134323f,
-	0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f, 0.120954f, 0.118786f, 0.116635f,
-	0.114501f, 0.112384f, 0.110283f, 0.108199f, 0.106131f, 0.104079f, 0.102043f, 0.100023f,
-	0.0980186f, 0.09603f, 0.094057f, 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f,
-	0.0825384f, 0.0806708f, 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f,
-	0.0679997f, 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
-	0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f, 0.0430805f,
-	0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f, 0.0324175f, 0.0309415f,
-	0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f, 0.0223242f, 0.020927f, 0.0195408f,
-	0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f, 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f,
-	0.00754159f, 0.00625989f, 0.00498819f, 0.00372644f, 0.00247454f, 0.00123242f, 0.f
-};
-
 /* test if a float value is 'nan'
  * there is a C99 function for this: isnan(), but blender seems to use C90 (according to gcc warns),
  * and may not be supported by other compilers either */
+/* TODO(sergey): Consider using isnan(), it's used in the other areas. */
 #ifndef ISNAN
-#define ISNAN(x) ((x) != (x))
+#  define ISNAN(x) ((x) != (x))
 #endif
-//static int ISNAN(float x) { return (x != x); }
 
-static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
-{
-	float ct2 = cosf(th);
-	const float st2 = 1.0f - ct2 * ct2;	/* <- sin(th)^2 */
-	ct2 *= ct2;
-	*A = a2*st2 + b2*ct2;
-	*B = (b2 - a2)*sinf(2.f*th);
-	*C = a2*ct2 + b2*st2;
-	*F = a2*b2;
-}
+typedef struct ReadEWAData {
+	ImBuf *ibuf;
+	afdata_t *AFD;
+} ReadEWAData;
 
-/* all tests here are done to make sure possible overflows are hopefully minimized */
-static void imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
+static void ewa_read_pixel_cb(void *userdata, int x, int y, float result[4])
 {
-	if (F <= 1e-5f) {	/* use arbitrary major radius, zero minor, infinite eccentricity */
-		*a = sqrtf(A > C ? A : C);
-		*b = 0.f;
-		*ecc = 1e10f;
-		*th = 0.5f*(atan2f(B, A - C) + (float)M_PI);
-	}
-	else {
-		const float AmC = A - C, ApC = A + C, F2 = F*2.f;
-		const float r = sqrtf(AmC*AmC + B*B);
-		float d = ApC - r;
-		*a = (d <= 0.f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
-		d = ApC + r;
-		if (d <= 0.f) {
-			*b = 0.f;
-			*ecc = 1e10f;
-		}
-		else {
-			*b = sqrtf(F2 / d);
-			*ecc = *a / *b;
-		}
-		/* incr theta by 0.5*pi (angle of major axis) */
-		*th = 0.5f*(atan2f(B, AmC) + (float)M_PI);
-	}
+	ReadEWAData *data = (ReadEWAData *) userdata;
+	ibuf_get_color_clip(result, data->ibuf, x, y, data->AFD->extflag);
 }
 
 static void ewa_eval(TexResult *texr, ImBuf *ibuf, float fx, float fy, afdata_t *AFD)
 {
-	/* scaling dxt/dyt by full resolution can cause overflow because of huge A/B/C and esp. F values,
-	 * scaling by aspect ratio alone does the opposite, so try something in between instead... */
-	const float ff2 = ibuf->x, ff = sqrtf(ff2), q = ibuf->y / ff;
-	const float Ux = AFD->dxt[0]*ff, Vx = AFD->dxt[1]*q, Uy = AFD->dyt[0]*ff, Vy = AFD->dyt[1]*q;
-	float A = Vx*Vx + Vy*Vy;
-	float B = -2.f*(Ux*Vx + Uy*Vy);
-	float C = Ux*Ux + Uy*Uy;
-	float F = A*C - B*B*0.25f;
-	float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d; /* TXF alpha: cw = 0.f; */
-	int u, v, u1, u2, v1, v2; /* TXF alpha: clip = 0; */
-
-	/* The so-called 'high' quality ewa method simply adds a constant of 1 to both A & C,
-	 * so the ellipse always covers at least some texels. But since the filter is now always larger,
-	 * it also means that everywhere else it's also more blurry then ideally should be the case.
-	 * So instead here the ellipse radii are modified instead whenever either is too low.
-	 * Use a different radius based on interpolation switch, just enough to anti-alias when interpolation is off,
-	 * and slightly larger to make result a bit smoother than bilinear interpolation when interpolation is on
-	 * (minimum values: const float rmin = intpol ? 1.f : 0.5f;) */
-	const float rmin = (AFD->intpol ? 1.5625f : 0.765625f)/ff2;
-	imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
-	if ((b2 = b*b) < rmin) {
-		if ((a2 = a*a) < rmin) {
-			B = 0.f;
-			A = C = rmin;
-			F = A*C;
-		}
-		else {
-			b2 = rmin;
-			radangle2imp(a2, b2, th, &A, &B, &C, &F);
-		}
-	}
-
-	ue = ff*sqrtf(C);
-	ve = ff*sqrtf(A);
-	d = (float)(EWA_MAXIDX + 1) / (F*ff2);
-	A *= d;
-	B *= d;
-	C *= d;
-
-	U0 = fx*ibuf->x;
-	V0 = fy*ibuf->y;
-	u1 = (int)(floorf(U0 - ue));
-	u2 = (int)(ceilf(U0 + ue));
-	v1 = (int)(floorf(V0 - ve));
-	v2 = (int)(ceilf(V0 + ve));
-	U0 -= 0.5f;
-	V0 -= 0.5f;
-	DDQ = 2.f*A;
-	U = u1 - U0;
-	ac1 = A*(2.f*U + 1.f);
-	ac2 = A*U*U;
-	BU = B*U;
+	ReadEWAData data;
+	float uv[2] = {fx, fy};
+	data.ibuf = ibuf;
+	data.AFD = AFD;
+	BLI_ewa_filter(ibuf->x, ibuf->y,
+	               AFD->intpol != 0,
+	               texr->talpha,
+	               uv, AFD->dxt, AFD->dyt,
+	               ewa_read_pixel_cb,
+	               &data,
+	               &texr->tr);
 
-	d = texr->tr = texr->tb = texr->tg = texr->ta = 0.f;
-	for (v=v1; v<=v2; ++v) {
-		const float V = v - V0;
-		float DQ = ac1 + B*V;
-		float Q = (C*V + BU)*V + ac2;
-		for (u=u1; u<=u2; ++u) {
-			if (Q < (float)(EWA_MAXIDX + 1)) {
-				float tc[4];
-				const float wt = EWA_WTS[(Q < 0.f) ? 0 : (unsigned int)Q];
-				/*const int out =*/ ibuf_get_color_clip(tc, ibuf, u, v, AFD->extflag);
-				/* TXF alpha: clip |= out;
-				 * TXF alpha: cw += out ? 0.f : wt; */
-				texr->tr += tc[0]*wt;
-				texr->tg += tc[1]*wt;
-				texr->tb += tc[2]*wt;
-				texr->ta += texr->talpha ? tc[3]*wt : 0.f;
-				d += wt;
-			}
-			Q += DQ;
-			DQ += DDQ;
-		}
-	}
-
-	/* d should hopefully never be zero anymore */
-	d = 1.f/d;
-	texr->tr *= d;
-	texr->tg *= d;
-	texr->tb *= d;
-	/* clipping can be ignored if alpha used, texr->ta already includes filtered edge */
-	texr->ta = texr->talpha ? texr->ta*d : 1.f; /* TXF alpha (clip ? cw*d : 1.f); */
 }
 
 static void feline_eval(TexResult *texr, ImBuf *ibuf, float fx, float fy, afdata_t *AFD)
@@ -1304,7 +1167,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 		const float C = Ux*Ux + Uy*Uy;
 		const float F = A*C - B*B*0.25f;
 		float a, b, th, ecc;
-		imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
+		BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
 		if (tex->texfilter == TXF_FELINE) {
 			float fProbes;
 			a *= ff;
@@ -1408,7 +1271,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 			const float C = Ux*Ux + Uy*Uy;
 			const float F = A*C - B*B*0.25f;
 			float a, b, th, ecc, fProbes;
-			imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
+			BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
 			a *= ff;
 			b *= ff;
 			a = max_ff(a, 1.0f);