diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-10-06 22:47:41 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-10-07 15:53:14 +0300 |
commit | 23098cda9936d785988b689ee69e58e900f17cb2 (patch) | |
tree | ed49843e81afbe9c38707324f37bf7e14b234a9b /intern/cycles/kernel/kernels | |
parent | d013b56dde47580d1907e3a994bc49cfaaa9f90c (diff) |
Code refactor: make texture code more consistent between devices.
* Use common TextureInfo struct for all devices, except CUDA fermi.
* Move image sampling code to kernels/*/kernel_*_image.h files.
* Use arrays for data textures on Fermi too, so device_vector<Struct> works.
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel.cpp | 122 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h | 488 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel.cu | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h | 175 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel.cl | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h | 229 |
6 files changed, 869 insertions, 148 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 998619ac897..7679ab4f111 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -84,130 +84,16 @@ void kernel_tex_copy(KernelGlobals *kg, if(0) { } -#define KERNEL_TEX(type, ttype, tname) \ +#define KERNEL_TEX(type, tname) \ else if(strcmp(name, #tname) == 0) { \ kg->tname.data = (type*)mem; \ kg->tname.width = width; \ } -#define KERNEL_IMAGE_TEX(type, ttype, tname) +#define KERNEL_IMAGE_TEX(type, tname) #include "kernel/kernel_textures.h" - - else if(strstr(name, "__tex_image_float4")) { - texture_image_float4 *tex = NULL; - int id = atoi(name + strlen("__tex_image_float4_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_float4_images.size()) { - kg->texture_float4_images.resize(array_index+1); - } - tex = &kg->texture_float4_images[array_index]; - } - - if(tex) { - tex->data = (float4*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_float")) { - texture_image_float *tex = NULL; - int id = atoi(name + strlen("__tex_image_float_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_float_images.size()) { - kg->texture_float_images.resize(array_index+1); - } - tex = &kg->texture_float_images[array_index]; - } - - if(tex) { - tex->data = (float*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_byte4")) { - texture_image_uchar4 *tex = NULL; - int id = atoi(name + strlen("__tex_image_byte4_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_byte4_images.size()) { - kg->texture_byte4_images.resize(array_index+1); - } - tex = &kg->texture_byte4_images[array_index]; - } - - if(tex) { - tex->data = (uchar4*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_byte")) { - texture_image_uchar *tex = NULL; - int id = atoi(name + strlen("__tex_image_byte_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_byte_images.size()) { - kg->texture_byte_images.resize(array_index+1); - } - tex = &kg->texture_byte_images[array_index]; - } - - if(tex) { - tex->data = (uchar*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_half4")) { - texture_image_half4 *tex = NULL; - int id = atoi(name + strlen("__tex_image_half4_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_half4_images.size()) { - kg->texture_half4_images.resize(array_index+1); - } - tex = &kg->texture_half4_images[array_index]; - } - - if(tex) { - tex->data = (half4*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_half")) { - texture_image_half *tex = NULL; - int id = atoi(name + strlen("__tex_image_half_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_half_images.size()) { - kg->texture_half_images.resize(array_index+1); - } - tex = &kg->texture_half_images[array_index]; - } - - if(tex) { - tex->data = (half*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else + else { assert(0); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index f6bb4c25012..b2ad60f08c1 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -17,70 +17,500 @@ #ifndef __KERNEL_CPU_IMAGE_H__ #define __KERNEL_CPU_IMAGE_H__ -#ifdef __KERNEL_CPU__ - CCL_NAMESPACE_BEGIN -ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y) +template<typename T> struct TextureInterpolator { +#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ + { \ + u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ + u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ + u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } (void)0 + + static ccl_always_inline float4 read(float4 r) + { + return r; + } + + static ccl_always_inline float4 read(uchar4 r) + { + float f = 1.0f/255.0f; + return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); + } + + static ccl_always_inline float4 read(uchar r) + { + float f = r*(1.0f/255.0f); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(float r) + { + /* TODO(dingto): Optimize this, so interpolation + * happens on float instead of float4 */ + return make_float4(r, r, r, 1.0f); + } + + static ccl_always_inline float4 read(half4 r) + { + return half4_to_float4(r); + } + + static ccl_always_inline float4 read(half r) + { + float f = half_to_float(r); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline int wrap_periodic(int x, int width) + { + x %= width; + if(x < 0) + x += width; + return x; + } + + static ccl_always_inline int wrap_clamp(int x, int width) + { + return clamp(x, 0, width-1); + } + + static ccl_always_inline float frac(float x, int *ix) + { + int i = float_to_int(x) - ((x < 0.0f)? 1: 0); + *ix = i; + return x - (float)i; + } + + static ccl_always_inline float4 interp(const TextureInfo& info, float x, float y) + { + if(UNLIKELY(!info.data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + const T *data = (const T*)info.data; + int width = info.width; + int height = info.height; + int ix, iy, nix, niy; + + if(info.interpolation == INTERPOLATION_CLOSEST) { + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return read(data[ix + iy*width]); + } + else if(info.interpolation == INTERPOLATION_LINEAR) { + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); + r += (1.0f - ty)*tx*read(data[nix + iy*width]); + r += ty*(1.0f - tx)*read(data[ix + niy*width]); + r += ty*tx*read(data[nix + niy*width]); + + return r; + } + else { + /* Bicubic b-spline interpolation. */ + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + int pix, piy, nnix, nniy; + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + float u[4], v[4]; + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y) (read(data[xc[x] + yc[y]])) +#define TERM(col) \ + (v[col] * (u[0] * DATA(0, col) + \ + u[1] * DATA(1, col) + \ + u[2] * DATA(2, col) + \ + u[3] * DATA(3, col))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + + /* Actual interpolation. */ + return TERM(0) + TERM(1) + TERM(2) + TERM(3); + +#undef TERM +#undef DATA + } + } + + static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + frac(z*(float)depth, &iz); + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T*)info.data; + return read(data[ix + iy*width + iz*width*height]); + } + + static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + float tz = frac(z*(float)depth - 0.5f, &iz); + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T*)info.data; + float4 r; + + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); + r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); + r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); + r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); + + r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); + r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); + r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); + r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); + + return r; + } + + /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are + * causing stack overflow issue in this function unless it is inlined. + * + * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization + * enabled. + */ +#ifdef __GNUC__ + static ccl_always_inline +#else + static ccl_never_inline +#endif + float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x*(float)width - 0.5f, &ix); + const float ty = frac(y*(float)height - 0.5f, &iy); + const float tz = frac(z*(float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + piz = wrap_periodic(iz-1, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + nniz = wrap_periodic(iz+2, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + piz = wrap_clamp(iz-1, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + nniz = wrap_clamp(iz+2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) +#define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +#define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + const T *data = (const T*)info.data; + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +#undef COL_TERM +#undef ROW_TERM +#undef DATA + } + + static ccl_always_inline float4 interp_3d(const TextureInfo& info, + float x, float y, float z, + int interpolation = INTERPOLATION_LINEAR) + { + if(UNLIKELY(!info.data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + switch(interpolation) { + case INTERPOLATION_CLOSEST: + return interp_3d_closest(info, x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_linear(info, x, y, z); + default: + return interp_3d_tricubic(info, x, y, z); + } + } +#undef SET_CUBIC_SPLINE_WEIGHTS +}; + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { - switch(kernel_tex_type(tex)) { + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + + switch(kernel_tex_type(id)) { case IMAGE_DATA_TYPE_HALF: - return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator<half>::interp(info, x, y); case IMAGE_DATA_TYPE_BYTE: - return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator<uchar>::interp(info, x, y); case IMAGE_DATA_TYPE_FLOAT: - return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator<float>::interp(info, x, y); case IMAGE_DATA_TYPE_HALF4: - return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator<half4>::interp(info, x, y); case IMAGE_DATA_TYPE_BYTE4: - return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator<uchar4>::interp(info, x, y); case IMAGE_DATA_TYPE_FLOAT4: default: - return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator<float4>::interp(info, x, y); } } -ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) { - switch(kernel_tex_type(tex)) { + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + InterpolationType interp = (InterpolationType)info.interpolation; + + switch(kernel_tex_type(id)) { case IMAGE_DATA_TYPE_HALF: - return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z); + return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_BYTE: - return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z); + return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_FLOAT: - return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z); + return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_HALF4: - return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z); + return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_BYTE4: - return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z); + return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_FLOAT4: default: - return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z); + return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); } } -ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation) +ccl_device float4 kernel_tex_image_interp_3d_ex(KernelGlobals *kg, int id, float x, float y, float z, int interp) { - switch(kernel_tex_type(tex)) { + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + + switch(kernel_tex_type(id)) { case IMAGE_DATA_TYPE_HALF: - return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_BYTE: - return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_FLOAT: - return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_HALF4: - return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_BYTE4: - return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_FLOAT4: default: - return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); } } CCL_NAMESPACE_END -#endif // __KERNEL_CPU__ - - #endif // __KERNEL_CPU_IMAGE_H__ diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 1ac6afd167a..3c93e00ccf1 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -26,6 +26,7 @@ #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" #include "kernel/kernel_globals.h" +#include "kernel/kernels/cuda/kernel_cuda_image.h" #include "kernel/kernel_film.h" #include "kernel/kernel_path.h" #include "kernel/kernel_path_branched.h" diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h new file mode 100644 index 00000000000..00f6954003d --- /dev/null +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -0,0 +1,175 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if __CUDA_ARCH__ >= 300 + +/* Kepler */ + +ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) +{ + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + CUtexObject tex = (CUtexObject)info.data; + + /* float4, byte4 and half4 */ + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || + texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4) + { + return tex2D<float4>(tex, x, y); + } + /* float, byte and half */ + else { + float f = tex2D<float>(tex, x, y); + return make_float4(f, f, f, 1.0f); + } +} + +ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z) +{ + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + CUtexObject tex = (CUtexObject)info.data; + + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || + texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4) + { + return tex3D<float4>(tex, x, y, z); + } + else { + float f = tex3D<float>(tex, x, y, z); + return make_float4(f, f, f, 1.0f); + } +} + +#else + +/* Fermi */ + +ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) +{ + float4 r; + switch(id) { + case 0: r = tex2D(__tex_image_float4_000, x, y); break; + case 8: r = tex2D(__tex_image_float4_008, x, y); break; + case 16: r = tex2D(__tex_image_float4_016, x, y); break; + case 24: r = tex2D(__tex_image_float4_024, x, y); break; + case 32: r = tex2D(__tex_image_float4_032, x, y); break; + case 1: r = tex2D(__tex_image_byte4_001, x, y); break; + case 9: r = tex2D(__tex_image_byte4_009, x, y); break; + case 17: r = tex2D(__tex_image_byte4_017, x, y); break; + case 25: r = tex2D(__tex_image_byte4_025, x, y); break; + case 33: r = tex2D(__tex_image_byte4_033, x, y); break; + case 41: r = tex2D(__tex_image_byte4_041, x, y); break; + case 49: r = tex2D(__tex_image_byte4_049, x, y); break; + case 57: r = tex2D(__tex_image_byte4_057, x, y); break; + case 65: r = tex2D(__tex_image_byte4_065, x, y); break; + case 73: r = tex2D(__tex_image_byte4_073, x, y); break; + case 81: r = tex2D(__tex_image_byte4_081, x, y); break; + case 89: r = tex2D(__tex_image_byte4_089, x, y); break; + case 97: r = tex2D(__tex_image_byte4_097, x, y); break; + case 105: r = tex2D(__tex_image_byte4_105, x, y); break; + case 113: r = tex2D(__tex_image_byte4_113, x, y); break; + case 121: r = tex2D(__tex_image_byte4_121, x, y); break; + case 129: r = tex2D(__tex_image_byte4_129, x, y); break; + case 137: r = tex2D(__tex_image_byte4_137, x, y); break; + case 145: r = tex2D(__tex_image_byte4_145, x, y); break; + case 153: r = tex2D(__tex_image_byte4_153, x, y); break; + case 161: r = tex2D(__tex_image_byte4_161, x, y); break; + case 169: r = tex2D(__tex_image_byte4_169, x, y); break; + case 177: r = tex2D(__tex_image_byte4_177, x, y); break; + case 185: r = tex2D(__tex_image_byte4_185, x, y); break; + case 193: r = tex2D(__tex_image_byte4_193, x, y); break; + case 201: r = tex2D(__tex_image_byte4_201, x, y); break; + case 209: r = tex2D(__tex_image_byte4_209, x, y); break; + case 217: r = tex2D(__tex_image_byte4_217, x, y); break; + case 225: r = tex2D(__tex_image_byte4_225, x, y); break; + case 233: r = tex2D(__tex_image_byte4_233, x, y); break; + case 241: r = tex2D(__tex_image_byte4_241, x, y); break; + case 249: r = tex2D(__tex_image_byte4_249, x, y); break; + case 257: r = tex2D(__tex_image_byte4_257, x, y); break; + case 265: r = tex2D(__tex_image_byte4_265, x, y); break; + case 273: r = tex2D(__tex_image_byte4_273, x, y); break; + case 281: r = tex2D(__tex_image_byte4_281, x, y); break; + case 289: r = tex2D(__tex_image_byte4_289, x, y); break; + case 297: r = tex2D(__tex_image_byte4_297, x, y); break; + case 305: r = tex2D(__tex_image_byte4_305, x, y); break; + case 313: r = tex2D(__tex_image_byte4_313, x, y); break; + case 321: r = tex2D(__tex_image_byte4_321, x, y); break; + case 329: r = tex2D(__tex_image_byte4_329, x, y); break; + case 337: r = tex2D(__tex_image_byte4_337, x, y); break; + case 345: r = tex2D(__tex_image_byte4_345, x, y); break; + case 353: r = tex2D(__tex_image_byte4_353, x, y); break; + case 361: r = tex2D(__tex_image_byte4_361, x, y); break; + case 369: r = tex2D(__tex_image_byte4_369, x, y); break; + case 377: r = tex2D(__tex_image_byte4_377, x, y); break; + case 385: r = tex2D(__tex_image_byte4_385, x, y); break; + case 393: r = tex2D(__tex_image_byte4_393, x, y); break; + case 401: r = tex2D(__tex_image_byte4_401, x, y); break; + case 409: r = tex2D(__tex_image_byte4_409, x, y); break; + case 417: r = tex2D(__tex_image_byte4_417, x, y); break; + case 425: r = tex2D(__tex_image_byte4_425, x, y); break; + case 433: r = tex2D(__tex_image_byte4_433, x, y); break; + case 441: r = tex2D(__tex_image_byte4_441, x, y); break; + case 449: r = tex2D(__tex_image_byte4_449, x, y); break; + case 457: r = tex2D(__tex_image_byte4_457, x, y); break; + case 465: r = tex2D(__tex_image_byte4_465, x, y); break; + case 473: r = tex2D(__tex_image_byte4_473, x, y); break; + case 481: r = tex2D(__tex_image_byte4_481, x, y); break; + case 489: r = tex2D(__tex_image_byte4_489, x, y); break; + case 497: r = tex2D(__tex_image_byte4_497, x, y); break; + case 505: r = tex2D(__tex_image_byte4_505, x, y); break; + case 513: r = tex2D(__tex_image_byte4_513, x, y); break; + case 521: r = tex2D(__tex_image_byte4_521, x, y); break; + case 529: r = tex2D(__tex_image_byte4_529, x, y); break; + case 537: r = tex2D(__tex_image_byte4_537, x, y); break; + case 545: r = tex2D(__tex_image_byte4_545, x, y); break; + case 553: r = tex2D(__tex_image_byte4_553, x, y); break; + case 561: r = tex2D(__tex_image_byte4_561, x, y); break; + case 569: r = tex2D(__tex_image_byte4_569, x, y); break; + case 577: r = tex2D(__tex_image_byte4_577, x, y); break; + case 585: r = tex2D(__tex_image_byte4_585, x, y); break; + case 593: r = tex2D(__tex_image_byte4_593, x, y); break; + case 601: r = tex2D(__tex_image_byte4_601, x, y); break; + case 609: r = tex2D(__tex_image_byte4_609, x, y); break; + case 617: r = tex2D(__tex_image_byte4_617, x, y); break; + case 625: r = tex2D(__tex_image_byte4_625, x, y); break; + case 633: r = tex2D(__tex_image_byte4_633, x, y); break; + case 641: r = tex2D(__tex_image_byte4_641, x, y); break; + case 649: r = tex2D(__tex_image_byte4_649, x, y); break; + case 657: r = tex2D(__tex_image_byte4_657, x, y); break; + case 665: r = tex2D(__tex_image_byte4_665, x, y); break; + default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return r; +} + +ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z) +{ + float4 r; + switch(id) { + case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break; + case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break; + case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break; + case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break; + case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break; + } + return r; +} + +#endif + diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 66b6e19de84..9d5d784e140 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -20,7 +20,7 @@ #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" #include "kernel/kernel_globals.h" -#include "kernel/kernel_image_opencl.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" #include "kernel/kernel_film.h" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h new file mode 100644 index 00000000000..514980e731e --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -0,0 +1,229 @@ +/* + * Copyright 2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* For OpenCL we do manual lookup and interpolation. */ + +ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { + const uint tex_offset = id +#define KERNEL_TEX(type, name) + 1 +#include "kernel/kernel_textures.h" + ; + + return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset]; +} + +#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)] + +ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) +{ + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + const int texture_type = kernel_tex_type(id); + + /* Float4 */ + if(texture_type == IMAGE_DATA_TYPE_FLOAT4) { + return tex_fetch(float4, info, offset); + } + /* Byte4 */ + else if(texture_type == IMAGE_DATA_TYPE_BYTE4) { + uchar4 r = tex_fetch(uchar4, info, offset); + float f = 1.0f/255.0f; + return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); + } + /* Float */ + else if(texture_type == IMAGE_DATA_TYPE_FLOAT) { + float f = tex_fetch(float, info, offset); + return make_float4(f, f, f, 1.0f); + } + /* Byte */ + else { + uchar r = tex_fetch(uchar, info, offset); + float f = r * (1.0f/255.0f); + return make_float4(f, f, f, 1.0f); + } +} + +ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) +{ + x %= width; + if(x < 0) + x += width; + return x; +} + +ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) +{ + return clamp(x, 0, width-1); +} + +ccl_device_inline float svm_image_texture_frac(float x, int *ix) +{ + int i = float_to_int(x) - ((x < 0.0f)? 1: 0); + *ix = i; + return x - (float)i; +} + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +{ + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + uint width = info->width; + uint height = info->height; + uint offset = 0; + uint interpolation = info->interpolation; + uint extension = info->extension; + + /* Actual sampling. */ + float4 r; + int ix, iy, nix, niy; + if(interpolation == INTERPOLATION_CLOSEST) { + svm_image_texture_frac(x*width, &ix); + svm_image_texture_frac(y*height, &iy); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + /* Fall through. */ + /* EXTENSION_EXTEND */ + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + } + + r = svm_image_texture_read(kg, id, offset + ix + iy*width); + } + else { /* INTERPOLATION_LINEAR */ + float tx = svm_image_texture_frac(x*width - 0.5f, &ix); + float ty = svm_image_texture_frac(y*height - 0.5f, &iy); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + + nix = svm_image_texture_wrap_periodic(ix+1, width); + niy = svm_image_texture_wrap_periodic(iy+1, height); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + nix = svm_image_texture_wrap_clamp(ix+1, width); + niy = svm_image_texture_wrap_clamp(iy+1, height); + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + } + + r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width); + r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width); + r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width); + r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width); + } + return r; +} + + +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) +{ + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + uint width = info->width; + uint height = info->height; + uint offset = 0; + uint depth = info->depth; + uint interpolation = info->interpolation; + uint extension = info->extension; + + /* Actual sampling. */ + float4 r; + int ix, iy, iz, nix, niy, niz; + if(interpolation == INTERPOLATION_CLOSEST) { + svm_image_texture_frac(x*width, &ix); + svm_image_texture_frac(y*height, &iy); + svm_image_texture_frac(z*depth, &iz); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + iz = svm_image_texture_wrap_periodic(iz, depth); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + /* Fall through. */ + /* EXTENSION_EXTEND */ + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + iz = svm_image_texture_wrap_clamp(iz, depth); + } + r = svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height); + } + else { /* INTERPOLATION_LINEAR */ + float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix); + float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy); + float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + iz = svm_image_texture_wrap_periodic(iz, depth); + + nix = svm_image_texture_wrap_periodic(ix+1, width); + niy = svm_image_texture_wrap_periodic(iy+1, height); + niz = svm_image_texture_wrap_periodic(iz+1, depth); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + /* Fall through. */ + /* EXTENSION_EXTEND */ + nix = svm_image_texture_wrap_clamp(ix+1, width); + niy = svm_image_texture_wrap_clamp(iy+1, height); + niz = svm_image_texture_wrap_clamp(iz+1, depth); + + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + iz = svm_image_texture_wrap_clamp(iz, depth); + } + + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height); + r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + iz*width*height); + r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + iz*width*height); + r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + iz*width*height); + + r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + niz*width*height); + r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height); + r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height); + r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height); + } + return r; +} |