diff options
Diffstat (limited to 'intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h')
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h | 151 |
1 files changed, 143 insertions, 8 deletions
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 00f6954003d..269e74f6164 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -18,7 +18,115 @@ /* Kepler */ -ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) +/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ +ccl_device float cubic_w0(float a) +{ + return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f); +} + +ccl_device float cubic_w1(float a) +{ + return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f); +} + +ccl_device float cubic_w2(float a) +{ + return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f); +} + +ccl_device float cubic_w3(float a) +{ + return (1.0f/6.0f)*(a*a*a); +} + +/* g0 and g1 are the two amplitude functions. */ +ccl_device float cubic_g0(float a) +{ + return cubic_w0(a) + cubic_w1(a); +} + +ccl_device float cubic_g1(float a) +{ + return cubic_w2(a) + cubic_w3(a); +} + +/* h0 and h1 are the two offset functions */ +ccl_device float cubic_h0(float a) +{ + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; +} + +ccl_device float cubic_h1(float a) +{ + return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; +} + +/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ +template<typename T> +ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y) +{ + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + + float px = floor(x); + float py = floor(y); + float fx = x - px; + float fy = y - py; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + + return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + + g1x * tex2D<T>(tex, x1, y0)) + + cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + + g1x * tex2D<T>(tex, x1, y1)); +} + +/* Fast tricubic texture lookup using 8 bilinear lookups. */ +template<typename T> +ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z) +{ + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + z = (z * info.depth) - 0.5f; + + float px = floor(x); + float py = floor(y); + float pz = floor(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + float z0 = (pz + cubic_h0(fz)) / info.depth; + float z1 = (pz + cubic_h1(fz)) / info.depth; + + return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + + g1x * tex3D<T>(tex, x1, y0, z0)) + + g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + + g1x * tex3D<T>(tex, x1, y1, z0))) + + g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + + g1x * tex3D<T>(tex, x1, y0, z1)) + + g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + + g1x * tex3D<T>(tex, x1, y1, z1))); +} + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { const TextureInfo& info = kernel_tex_fetch(__texture_info, id); CUtexObject tex = (CUtexObject)info.data; @@ -29,29 +137,56 @@ ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) { - return tex2D<float4>(tex, x, y); + if(info.interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y); + } + else { + return tex2D<float4>(tex, x, y); + } } /* float, byte and half */ else { - float f = tex2D<float>(tex, x, y); + float f; + + if(info.interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y); + } + else { + f = tex2D<float>(tex, x, y); + } + return make_float4(f, f, f, 1.0f); } } -ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { const TextureInfo& info = kernel_tex_fetch(__texture_info, id); CUtexObject tex = (CUtexObject)info.data; + uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp; const int texture_type = kernel_tex_type(id); if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) { - return tex3D<float4>(tex, x, y, z); + if(interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z); + } + else { + return tex3D<float4>(tex, x, y, z); + } } else { - float f = tex3D<float>(tex, x, y, z); + float f; + + if(interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z); + } + else { + f = tex3D<float>(tex, x, y, z); + } + return make_float4(f, f, f, 1.0f); } } @@ -60,7 +195,7 @@ ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, /* Fermi */ -ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { float4 r; switch(id) { @@ -158,7 +293,7 @@ ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) return r; } -ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) { float4 r; switch(id) { |