diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-01-30 16:25:24 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-01-30 23:49:41 +0300 |
commit | d632ef7c66399bef4686673ae344cbad2a887c2a (patch) | |
tree | ec4648d2d9339d942bc87ebfda34ebf00dad60c8 /intern/cycles | |
parent | dc1043dda0552af72396fec15dccd9d7eefee803 (diff) |
Cycles: Use fast math functions in hair BSDF
Precision of the fast functions seems to be enough in there and
since the code was heavily using inverse trigonometric functions
this change gives few percent speedup on Victor's hair.
From the tests files from ctests storage doesn't have any meaningful
difference, hair on Victor is all below 4% absolute error and only
few pixels are exceeding 1% absolute difference.
In any case, let it be as it is currently so it allows us to have
fast math file in sources for it's further evaluation and possible
usage in other areas as well.
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/kernel/closure/bsdf_hair.h | 75 |
1 files changed, 43 insertions, 32 deletions
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index 3d7bdab4ff2..5642f6f268c 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -76,12 +76,12 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con float3 locy = normalize(I - Tg * Iz); //float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); float omega_in_z = dot(Tg, omega_in); float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); - float theta_i = M_PI_2_F - safe_acosf(omega_in_z); + float theta_i = M_PI_2_F - fast_acosf(omega_in_z); float cosphi_i = dot(omega_in_y, locy); if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) { @@ -89,17 +89,19 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con return make_float3(*pdf, *pdf, *pdf); } - float phi_i = safe_acosf(cosphi_i) / roughness2; + float roughness1_inv = 1.0f / roughness1; + float roughness2_inv = 1.0f / roughness2; + float phi_i = fast_acosf(cosphi_i) * roughness2_inv; phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F; - float costheta_i = cosf(theta_i); + float costheta_i = fast_cosf(theta_i); - float a_R = atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); - float b_R = atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); + float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); float theta_h = (theta_i + theta_r) * 0.5f; float t = theta_h - offset; - float phi_pdf = cosf(phi_i * 0.5f) * 0.25f / roughness2; + float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i); *pdf = phi_pdf * theta_pdf; @@ -132,24 +134,25 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, float3 locy = normalize(I - Tg * Iz); //float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); float omega_in_z = dot(Tg, omega_in); float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); - float theta_i = M_PI_2_F - safe_acosf(omega_in_z); - float phi_i = safe_acosf(dot(omega_in_y, locy)); + float theta_i = M_PI_2_F - fast_acosf(omega_in_z); + float phi_i = fast_acosf(dot(omega_in_y, locy)); if(M_PI_2_F - fabsf(theta_i) < 0.001f) { *pdf = 0.0f; return make_float3(*pdf, *pdf, *pdf); } - float costheta_i = cosf(theta_i); + float costheta_i = fast_cosf(theta_i); - float a_TT = atan2f(((M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float b_TT = atan2f(((-M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float c_TT = 2 * atan2f(M_PI_2_F / roughness2, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); float theta_h = (theta_i + theta_r) / 2; float t = theta_h - offset; @@ -177,27 +180,31 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float a_R = atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); - float b_R = atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); float t = roughness1 * tanf(randu * (a_R - b_R) + b_R); float theta_h = t + offset; float theta_i = 2 * theta_h - theta_r; - float costheta_i = cosf(theta_i); - float sintheta_i = sinf(theta_i); + + float costheta_i, sintheta_i; + fast_sincosf(theta_i, &sintheta_i, &costheta_i); float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2; - float phi_pdf = cosf(phi * 0.5f) * 0.25f / roughness2; + float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i); - *omega_in =(cosf(phi) * costheta_i) * locy - - (sinf(phi) * costheta_i) * locx + - ( sintheta_i) * Tg; + float sinphi, cosphi; + fast_sincosf(phi, &sinphi, &cosphi); + *omega_in =(cosphi * costheta_i) * locy - + (sinphi * costheta_i) * locx + + ( sintheta_i) * Tg; //differentials - TODO: find a better approximation for the reflective bounce #ifdef __RAY_DIFFERENTIALS__ @@ -228,27 +235,31 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float a_TT = atan2f(((M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float b_TT = atan2f(((-M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float c_TT = 2 * atan2f(M_PI_2_F / roughness2, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT); float theta_h = t + offset; float theta_i = 2 * theta_h - theta_r; - float costheta_i = cosf(theta_i); - float sintheta_i = sinf(theta_i); + + float costheta_i, sintheta_i; + fast_sincosf(theta_i, &sintheta_i, &costheta_i); float p = roughness2 * tanf(c_TT * (randv - 0.5f)); float phi = p + M_PI_F; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_TT - b_TT) * costheta_i); float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); - *omega_in =(cosf(phi) * costheta_i) * locy - - (sinf(phi) * costheta_i) * locx + - ( sintheta_i) * Tg; + float sinphi, cosphi; + fast_sincosf(phi, &sinphi, &cosphi); + *omega_in =(cosphi * costheta_i) * locy - + (sinphi * costheta_i) * locx + + ( sintheta_i) * Tg; //differentials - TODO: find a better approximation for the transmission bounce #ifdef __RAY_DIFFERENTIALS__ |