diff options
Diffstat (limited to 'intern/cycles/kernel/device/gpu/image.h')
-rw-r--r-- | intern/cycles/kernel/device/gpu/image.h | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h new file mode 100644 index 00000000000..b015c78a8f5 --- /dev/null +++ b/intern/cycles/kernel/device/gpu/image.h @@ -0,0 +1,278 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +#ifdef WITH_NANOVDB +# define NDEBUG /* Disable "assert" in device code */ +# define NANOVDB_USE_INTRINSICS +# include "nanovdb/NanoVDB.h" +# include "nanovdb/util/SampleFromVoxels.h" +#endif + +/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ +ccl_device float cubic_w0(float a) +{ + return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); +} +ccl_device float cubic_w1(float a) +{ + return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); +} +ccl_device float cubic_w2(float a) +{ + return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); +} +ccl_device float cubic_w3(float a) +{ + return (1.0f / 6.0f) * (a * a * a); +} + +/* g0 and g1 are the two amplitude functions. */ +ccl_device float cubic_g0(float a) +{ + return cubic_w0(a) + cubic_w1(a); +} +ccl_device float cubic_g1(float a) +{ + return cubic_w2(a) + cubic_w3(a); +} + +/* h0 and h1 are the two offset functions */ +ccl_device float cubic_h0(float a) +{ + return (cubic_w1(a) / cubic_g0(a)) - 1.0f; +} +ccl_device float cubic_h1(float a) +{ + return (cubic_w3(a) / cubic_g1(a)) + 1.0f; +} + +/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ +template<typename T> +ccl_device_noinline T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y) +{ + ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data; + + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + + float px = floorf(x); + float py = floorf(y); + float fx = x - px; + float fy = y - py; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; + float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; + float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; + float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; + + return cubic_g0(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y0) + + g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y0)) + + cubic_g1(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y1) + + g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y1)); +} + +/* Fast tricubic texture lookup using 8 trilinear lookups. */ +template<typename T> +ccl_device_noinline T +kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z) +{ + ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data; + + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + z = (z * info.depth) - 0.5f; + + float px = floorf(x); + float py = floorf(y); + float pz = floorf(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; + float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; + float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; + float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; + float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth; + float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth; + + return g0z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z0) + + g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z0)) + + g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z0) + + g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z0))) + + g1z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z1) + + g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z1)) + + g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z1) + + g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z1))); +} + +#ifdef WITH_NANOVDB +template<typename T, typename S> +ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z) +{ + float px = floorf(x); + float py = floorf(y); + float pz = floorf(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = px + cubic_h0(fx); + float x1 = px + cubic_h1(fx); + float y0 = py + cubic_h0(fy); + float y1 = py + cubic_h1(fy); + float z0 = pz + cubic_h0(fz); + float z1 = pz + cubic_h1(fz); + + using namespace nanovdb; + + return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) + + g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) + + g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) + + g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1)))); +} + +template<typename T> +ccl_device_noinline T kernel_tex_image_interp_nanovdb( + const TextureInfo &info, float x, float y, float z, uint interpolation) +{ + using namespace nanovdb; + + NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; + typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType; + AccessorType acc = grid->getAccessor(); + + switch (interpolation) { + case INTERPOLATION_CLOSEST: + return SampleFromVoxels<AccessorType, 0, false>(acc)(Vec3f(x, y, z)); + case INTERPOLATION_LINEAR: + return SampleFromVoxels<AccessorType, 1, false>(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f)); + default: + SampleFromVoxels<AccessorType, 1, false> s(acc); + return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f); + } +} +#endif + +ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float x, float y) +{ + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + /* float4, byte4, ushort4 and half4 */ + const int texture_type = info.data_type; + if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { + if (info.interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic<float4>(info, x, y); + } + else { + ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data; + return ccl_gpu_tex_object_read_2D<float4>(tex, x, y); + } + } + /* float, byte and half */ + else { + float f; + + if (info.interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic<float>(info, x, y); + } + else { + ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data; + f = ccl_gpu_tex_object_read_2D<float>(tex, x, y); + } + + return make_float4(f, f, f, 1.0f); + } +} + +ccl_device float4 kernel_tex_image_interp_3d(const KernelGlobals *kg, + int id, + float3 P, + InterpolationType interp) +{ + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + if (info.use_transform_3d) { + P = transform_point(&info.transform_3d, P); + } + + const float x = P.x; + const float y = P.y; + const float z = P.z; + + uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp; + const int texture_type = info.data_type; + +#ifdef WITH_NANOVDB + if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { + float f = kernel_tex_image_interp_nanovdb<float>(info, x, y, z, interpolation); + return make_float4(f, f, f, 1.0f); + } + if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { + nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>( + info, x, y, z, interpolation); + return make_float4(f[0], f[1], f[2], 1.0f); + } +#endif + if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { + if (interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_tricubic<float4>(info, x, y, z); + } + else { + ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data; + return ccl_gpu_tex_object_read_3D<float4>(tex, x, y, z); + } + } + else { + float f; + + if (interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_tricubic<float>(info, x, y, z); + } + else { + ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data; + f = ccl_gpu_tex_object_read_3D<float>(tex, x, y, z); + } + + return make_float4(f, f, f, 1.0f); + } +} + +CCL_NAMESPACE_END |