Cycles: Use more accurate implementation of erf() and erfinv()

This functions are orders of magnitude more accurate than the old ones, and they're around the same complexity to compute.
author: Sergey Sharybin <sergey.vfx@gmail.com> 2014-10-03 16:27:08 +0400
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2014-10-03 16:28:44 +0400
commit: 1e4d99368bba53edb6f903036b49edda92f27e31 (patch)
tree: 033e43b12b9419c9ef0848e6cd67817064b74c02
parent: 72f557d34e21a5fe341de0a88797334810a3c66c (diff)
1 files changed, 22 insertions, 123 deletions
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index a0c59e6cbc0..c6aed1205e0 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -35,139 +35,38 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Approximate erf and erfinv implementations
+/* Approximate erf and erfinv implementations.
+ * Implementation comes stright from the wikipedia:
  *
- * Adapted from code (C) Copyright John Maddock 2006.
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
-
-ccl_device float approx_erff_impl(float z)
-{
-	float result;
-
-	if(z < 0.5f) {
-		if(z < 1e-10f) {
-			if(z == 0) {
-				result = 0;
-			}
-			else {
-				float c = 0.0033791670f;
-				result = z * 1.125f + z * c;
-			}
-		}
-		else {
-			float Y = 1.044948577f;
-
-			float zz = z * z;
-			float num = (((-0.007727583f * zz) + -0.050999073f)*zz + -0.338165134f)*zz + 0.083430589f;
-			float denom = (((0.000370900f * zz) + 0.008585719f)*zz + 0.087522260f)*zz + 0.455004033f;
-			result = z * (Y + num / denom);
-		}
-	}
-	else if(z < 2.5f) {
-		if(z < 1.5f) {
-			float Y = 0.4059357643f;
-			float fz = z - 0.5f;
-
-			float num = (((0.088890036f * fz) + 0.191003695f)*fz + 0.178114665f)*fz + -0.098090592f;
-			float denom = (((0.123850974f * fz) + 0.578052804f)*fz + 1.426280048f)*fz + 1.847590709f;
-
-			result = Y + num / denom;
-			result *= expf(-z * z) / z;
-		}
-		else  {
-			float Y = 0.506728172f;
-			float fz = z - 1.5f;
-			float num = (((0.017567943f * fz) + 0.043948189f)*fz + 0.038654037f)*fz + -0.024350047f;
-			float denom = (((0.325732924f * fz) + 0.982403709f)*fz + 1.539914949f)*fz + 1;
-
-			result = Y + num / denom;
-			result *= expf(-z * z) / z;
-		}
-
-		result = 1 - result;
-	}
-	else {
-		result = 1;
-	}
-
-	return result;
-}
+ *   http://en.wikipedia.org/wiki/Error_function
+ *
+ * Some constants are baked into the code.
+ */
 
-ccl_device float approx_erff(float z)
+ccl_device_inline float approx_erff(float x)
 {
 	float s = 1.0f;
-
-	if(z < 0.0f) {
+	if(x < 0.0f) {
 		s = -1.0f;
-		z = -z;
-	}
-
-	return s * approx_erff_impl(z);
-}
-
-ccl_device float approx_erfinvf_impl(float p, float q)
-{
-	float result = 0;
-
-	if(p <= 0.5f) {
-		float Y = 0.089131474f;
-		float g = p * (p + 10);
-		float num = (((-0.012692614f * p) + 0.033480662f)*p + -0.008368748f)*p + -0.000508781f;
-		float denom = (((1.562215583f * p) + -1.565745582f)*p + -0.970005043f)*p + 1.0f;
-		float r = num / denom;
-		result = g * Y + g * r;
+		x = -x;
 	}
-	else if(q >= 0.25f) {
-		float Y = 2.249481201f;
-		float g = sqrtf(-2 * logf(q));
-		float xs = q - 0.25f;
-		float num = (((17.644729840f * xs) + 8.370503283f)*xs + 0.105264680f)*xs + -0.202433508f;
-		float denom = (((-28.660818049f * xs) + 3.971343795f)*xs + 6.242641248f)*xs + 1.0f;
-		float r = num / denom;
-		result = g / (Y + r);
+	/* Such a clamp doesn't give much distortion to the output value
+	 * and gives quite a few of the speedup.
+	 */
+	if(x > 3.0f) {
+		return s;
 	}
-	else {
-		float x = sqrtf(-logf(q));
-
-		if(x < 3) {
-			float Y = 0.807220458f;
-			float xs = x - 1.125f;
-			float num = (((0.387079738f * xs) + 0.117030156f)*xs + -0.163794047f)*xs + -0.131102781f;
-			float denom = (((4.778465929f * xs) + 5.381683457f)*xs + 3.466254072f)*xs + 1.0f;
-			float R = num / denom;
-			result = Y * x + R * x;
-		}
-		else {
-			float Y = 0.939955711f;
-			float xs = x - 3;
-			float num = (((0.009508047f * xs) + 0.018557330f)*xs + -0.002224265f)*xs + -0.035035378f;
-			float denom = (((0.220091105f * xs) + 0.762059164f)*xs + 1.365334981f)*xs + 1.0f;
-			float R = num / denom;
-			result = Y * x + R * x;
-		}
-	}
-
-	return result;
+	float t = 1.0f / (1.0f + 0.47047f*x);
+	return s * (1.0f -
+	            t*(0.3480242f + t*(-0.0958798f + t*0.7478556f)) * expf(-x*x));
 }
 
-ccl_device float approx_erfinvf(float z)
+ccl_device_inline float approx_erfinvf(float x)
 {
-	float p, q, s;
-
-	if(z < 0) {
-	  p = -z;
-	  q = 1 - p;
-	  s = -1;
-	}
-	else {
-	  p = z;
-	  q = 1 - z;
-	  s = 1;
-	}
-
-	return s * approx_erfinvf_impl(p, q);
+	float ln1_x2 = logf(1.0f - x*x);
+	float term = 4.546884979448f + ln1_x2 * 0.5f;
+	return copysignf(1.0f, x) *
+	       sqrtf(sqrtf(term*term - ln1_x2 * 7.142230224076f) - term);
 }
 
 /* Beckmann and GGX microfacet importance sampling from:
author	Sergey Sharybin <sergey.vfx@gmail.com>	2014-10-03 16:27:08 +0400
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2014-10-03 16:28:44 +0400
commit	1e4d99368bba53edb6f903036b49edda92f27e31 (patch)
tree	033e43b12b9419c9ef0848e6cd67817064b74c02
parent	72f557d34e21a5fe341de0a88797334810a3c66c (diff)