From 53896d4235d883f39d4b02119419c462ab65a3a7 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 7 Feb 2017 13:05:19 +0100 Subject: Fix T49253: Cycles blackbody is wrong on AVX2 CPU on Windows Seems to be bug in optimizer, but managed to reshuffle in a way which should also give some speedup. --- intern/cycles/kernel/svm/svm_math_util.h | 53 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index 01547b60014..a7f15de7325 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -134,32 +134,37 @@ ccl_device float3 svm_math_blackbody_color(float t) { { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f }, }; - if(t >= 12000.0f) + int i; + if(t >= 12000.0f) { return make_float3(0.826270103f, 0.994478524f, 1.56626022f); + } + else if(t >= 6365.0f) { + i = 5; + } + else if(t >= 3315.0f) { + i = 4; + } + else if(t >= 1902.0f) { + i = 3; + } + else if(t >= 1449.0f) { + i = 2; + } + else if(t >= 1167.0f) { + i = 1; + } + else if(t >= 965.0f) { + i = 0; + } + else { + /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */ + return make_float3(4.70366907f, 0.0f, 0.0f); + } - /* Define a macro to reduce stack usage for nvcc */ -#define MAKE_BB_RGB(i) make_float3(\ - rc[i][0] / t + rc[i][1] * t + rc[i][2],\ - gc[i][0] / t + gc[i][1] * t + gc[i][2],\ - ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]) - - if(t >= 6365.0f) - return MAKE_BB_RGB(5); - if(t >= 3315.0f) - return MAKE_BB_RGB(4); - if(t >= 1902.0f) - return MAKE_BB_RGB(3); - if(t >= 1449.0f) - return MAKE_BB_RGB(2); - if(t >= 1167.0f) - return MAKE_BB_RGB(1); - if(t >= 965.0f) - return MAKE_BB_RGB(0); - -#undef MAKE_BB_RGB - - /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */ - return make_float3(4.70366907f, 0.0f, 0.0f); + const float t_inv = 1.0f / t; + return make_float3(rc[i][0] * t_inv + rc[i][1] * t + rc[i][2], + gc[i][0] * t_inv + gc[i][1] * t + gc[i][2], + ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]); } ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma) -- cgit v1.2.3