diff options
author | Tianwei Shen <shentianweipku@gmail.com> | 2017-04-15 16:44:48 +0300 |
---|---|---|
committer | Tianwei Shen <shentianweipku@gmail.com> | 2017-04-15 16:44:48 +0300 |
commit | f11ff0e672b8df79cdab85c8bebbd8c36c2c1cff (patch) | |
tree | c80138a7af123907c3eb26fee978644c702795cf /intern/cycles/kernel/kernel_compat_cpu.h | |
parent | 473653f33798b727148e8f6379f31cbba95de479 (diff) | |
parent | 97c9c6a3f321f35f0b58ab167aea97a790c94cfb (diff) |
Merge branch 'master' into soc-2016-multiviewsoc-2016-multiview
Diffstat (limited to 'intern/cycles/kernel/kernel_compat_cpu.h')
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cpu.h | 357 |
1 files changed, 193 insertions, 164 deletions
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index e347a1eca18..21da180bb8e 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -35,12 +35,12 @@ # define __NODES_FEATURES__ NODE_FEATURE_ALL #endif -#include "util_debug.h" -#include "util_math.h" -#include "util_simd.h" -#include "util_half.h" -#include "util_types.h" -#include "util_texture.h" +#include "util/util_debug.h" +#include "util/util_math.h" +#include "util/util_simd.h" +#include "util/util_half.h" +#include "util/util_types.h" +#include "util/util_texture.h" #define ccl_addr_space @@ -87,9 +87,9 @@ template<typename T> struct texture { ccl_always_inline avxf fetch_avxf(const int index) { kernel_assert(index >= 0 && (index+1) < width); - ssef *ssefData = (ssef*)data; - ssef *ssefNodeData = &ssefData[index]; - return _mm256_loadu_ps((float *)ssefNodeData); + ssef *ssef_data = (ssef*)data; + ssef *ssef_node_data = &ssef_data[index]; + return _mm256_loadu_ps((float *)ssef_node_data); } #endif @@ -316,184 +316,213 @@ template<typename T> struct texture_image { return interp_3d_ex(x, y, z, interpolation); } - ccl_always_inline float4 interp_3d_ex(float x, float y, float z, - int interpolation = INTERPOLATION_LINEAR) + ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z) { - if(UNLIKELY(!data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - int ix, iy, iz, nix, niy, niz; - - if(interpolation == INTERPOLATION_CLOSEST) { - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - frac(z*(float)depth, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - /* Fall through. */ - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); + int ix, iy, iz; + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + frac(z*(float)depth, &iz); + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - return read(data[ix + iy*width + iz*width*height]); + } + /* Fall through. */ + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } - else if(interpolation == INTERPOLATION_LINEAR) { - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - float tz = frac(z*(float)depth - 0.5f, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - /* Fall through. */ - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); + return read(data[ix + iy*width + iz*width*height]); + } - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); + ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z) + { + int ix, iy, iz; + int nix, niy, niz; + + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + float tz = frac(z*(float)depth - 0.5f, &iz); + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - float4 r; - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); - r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); - r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); - r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - - r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); - r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); - r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); - r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - - return r; + } + /* Fall through. */ + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } - else { - /* Tricubic b-spline interpolation. */ - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - const float tz = frac(z*(float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); + float4 r; - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - piz = wrap_periodic(iz-1, depth); + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); + r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); + r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); + r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); + r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); + r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); + r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); + r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - nniz = wrap_periodic(iz+2, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - /* Fall through. */ - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - piz = wrap_clamp(iz-1, depth); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - nniz = wrap_clamp(iz+2, depth); + return r; + } - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); + /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are + * causing stack overflow issue in this function unless it is inlined. + * + * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization + * enabled. + */ +#ifdef __GNUC__ + ccl_always_inline +#else + ccl_never_inline +#endif + float4 interp_3d_ex_tricubic(float x, float y, float z) + { + int ix, iy, iz; + int nix, niy, niz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x*(float)width - 0.5f, &ix); + const float ty = frac(y*(float)height - 0.5f, &iy); + const float tz = frac(z*(float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + piz = wrap_periodic(iz-1, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + nniz = wrap_periodic(iz+2, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - const int zc[4] = {width * height * piz, - width * height * iz, - width * height * niz, - width * height * nniz}; - float u[4], v[4], w[4]; + } + /* Fall through. */ + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + piz = wrap_clamp(iz-1, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + nniz = wrap_clamp(iz+2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) #define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + \ - u[1] * DATA(1, col, row) + \ - u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) #define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + \ - COL_TERM(1, row) + \ - COL_TERM(2, row) + \ - COL_TERM(3, row))) + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); - /* Actual interpolation. */ - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); #undef COL_TERM #undef ROW_TERM #undef DATA + } + + ccl_always_inline float4 interp_3d_ex(float x, float y, float z, + int interpolation = INTERPOLATION_LINEAR) + { + if(UNLIKELY(!data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + switch(interpolation) { + case INTERPOLATION_CLOSEST: + return interp_3d_ex_closest(x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_ex_linear(x, y, z); + default: + return interp_3d_ex_tricubic(x, y, z); } } |