From 23098cda9936d785988b689ee69e58e900f17cb2 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 6 Oct 2017 21:47:41 +0200 Subject: Code refactor: make texture code more consistent between devices. * Use common TextureInfo struct for all devices, except CUDA fermi. * Move image sampling code to kernels/*/kernel_*_image.h files. * Use arrays for data textures on Fermi too, so device_vector works. --- intern/cycles/kernel/kernel_compat_cpu.h | 449 +------------------------------ 1 file changed, 1 insertion(+), 448 deletions(-) (limited to 'intern/cycles/kernel/kernel_compat_cpu.h') diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 93934ee6b38..6f63c8f77a2 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -74,7 +74,7 @@ CCL_NAMESPACE_BEGIN * pointer lookup. */ template struct texture { - ccl_always_inline T fetch(int index) + ccl_always_inline const T& fetch(int index) { kernel_assert(index >= 0 && index < width); return data[index]; @@ -112,449 +112,6 @@ template struct texture { int width; }; -template struct texture_image { -#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ - u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ - u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void)0 - - ccl_always_inline float4 read(float4 r) - { - return r; - } - - ccl_always_inline float4 read(uchar4 r) - { - float f = 1.0f/255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - - ccl_always_inline float4 read(uchar r) - { - float f = r*(1.0f/255.0f); - return make_float4(f, f, f, 1.0f); - } - - ccl_always_inline float4 read(float r) - { - /* TODO(dingto): Optimize this, so interpolation - * happens on float instead of float4 */ - return make_float4(r, r, r, 1.0f); - } - - ccl_always_inline float4 read(half4 r) - { - return half4_to_float4(r); - } - - ccl_always_inline float4 read(half r) - { - float f = half_to_float(r); - return make_float4(f, f, f, 1.0f); - } - - ccl_always_inline int wrap_periodic(int x, int width) - { - x %= width; - if(x < 0) - x += width; - return x; - } - - ccl_always_inline int wrap_clamp(int x, int width) - { - return clamp(x, 0, width-1); - } - - ccl_always_inline float frac(float x, int *ix) - { - int i = float_to_int(x) - ((x < 0.0f)? 1: 0); - *ix = i; - return x - (float)i; - } - - ccl_always_inline float4 interp(float x, float y) - { - if(UNLIKELY(!data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - int ix, iy, nix, niy; - - if(interpolation == INTERPOLATION_CLOSEST) { - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[ix + iy*width]); - } - else if(interpolation == INTERPOLATION_LINEAR) { - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); - r += (1.0f - ty)*tx*read(data[nix + iy*width]); - r += ty*(1.0f - tx)*read(data[ix + niy*width]); - r += ty*tx*read(data[nix + niy*width]); - - return r; - } - else { - /* Bicubic b-spline interpolation. */ - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - int pix, piy, nnix, nniy; - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - float u[4], v[4]; - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -#define DATA(x, y) (read(data[xc[x] + yc[y]])) -#define TERM(col) \ - (v[col] * (u[0] * DATA(0, col) + \ - u[1] * DATA(1, col) + \ - u[2] * DATA(2, col) + \ - u[3] * DATA(3, col))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - - /* Actual interpolation. */ - return TERM(0) + TERM(1) + TERM(2) + TERM(3); - -#undef TERM -#undef DATA - } - } - - ccl_always_inline float4 interp_3d(float x, float y, float z) - { - return interp_3d_ex(x, y, z, interpolation); - } - - ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z) - { - int ix, iy, iz; - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - frac(z*(float)depth, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - return read(data[ix + iy*width + iz*width*height]); - } - - ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z) - { - int ix, iy, iz; - int nix, niy, niz; - - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - float tz = frac(z*(float)depth - 0.5f, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - float4 r; - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); - r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); - r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); - r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - - r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); - r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); - r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); - r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - - return r; - } - - /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are - * causing stack overflow issue in this function unless it is inlined. - * - * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization - * enabled. - */ -#ifdef __GNUC__ - ccl_always_inline -#else - ccl_never_inline -#endif - float4 interp_3d_ex_tricubic(float x, float y, float z) - { - int ix, iy, iz; - int nix, niy, niz; - /* Tricubic b-spline interpolation. */ - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - const float tz = frac(z*(float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - piz = wrap_periodic(iz-1, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - nniz = wrap_periodic(iz+2, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - piz = wrap_clamp(iz-1, depth); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - nniz = wrap_clamp(iz+2, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - const int zc[4] = {width * height * piz, - width * height * iz, - width * height * niz, - width * height * nniz}; - float u[4], v[4], w[4]; - - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) -#define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + \ - u[1] * DATA(1, col, row) + \ - u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) -#define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + \ - COL_TERM(1, row) + \ - COL_TERM(2, row) + \ - COL_TERM(3, row))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - /* Actual interpolation. */ - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); - -#undef COL_TERM -#undef ROW_TERM -#undef DATA - } - - ccl_always_inline float4 interp_3d_ex(float x, float y, float z, - int interpolation = INTERPOLATION_LINEAR) - { - if(UNLIKELY(!data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - switch(interpolation) { - case INTERPOLATION_CLOSEST: - return interp_3d_ex_closest(x, y, z); - case INTERPOLATION_LINEAR: - return interp_3d_ex_linear(x, y, z); - default: - return interp_3d_ex_tricubic(x, y, z); - } - } - - ccl_always_inline void dimensions_set(int width_, int height_, int depth_) - { - width = width_; - height = height_; - depth = depth_; - } - - T *data; - int interpolation; - ExtensionType extension; - int width, height, depth; -#undef SET_CUBIC_SPLINE_WEIGHTS -}; - -typedef texture texture_float4; -typedef texture texture_float2; -typedef texture texture_float; -typedef texture texture_uint; -typedef texture texture_int; -typedef texture texture_uint4; -typedef texture texture_uchar4; -typedef texture texture_uchar; -typedef texture_image texture_image_float; -typedef texture_image texture_image_uchar; -typedef texture_image texture_image_half; -typedef texture_image texture_image_float4; -typedef texture_image texture_image_uchar4; -typedef texture_image texture_image_half4; - /* Macros to handle different memory storage on different devices */ #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index)) @@ -563,10 +120,6 @@ typedef texture_image texture_image_half4; #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index)) #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) -#define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y) -#define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z) -#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation) - #define kernel_data (kg->__data) #ifdef __KERNEL_SSE2__ -- cgit v1.2.3