diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/kernel/kernel_compat_opencl.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h | 106 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h | 86 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h | 235 | ||||
-rw-r--r-- | intern/cycles/render/image_vdb.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 2 |
6 files changed, 266 insertions, 168 deletions
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index ba7ab43a47a..1848f6059b6 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -48,7 +48,7 @@ #define ccl_align(n) __attribute__((aligned(n))) #define ccl_optional_struct_init -#if __OPENCL_VERSION__ >= 200 +#if __OPENCL_VERSION__ >= 200 && !defined(__NV_CL_C_VERSION) # define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1))) #else # define ccl_loop_no_unroll diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index b466b41f456..b97400a443a 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -28,7 +28,6 @@ CCL_NAMESPACE_BEGIN * instruction sets. */ namespace { -template<typename T> struct TextureInterpolator { #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ { \ u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ @@ -38,6 +37,15 @@ template<typename T> struct TextureInterpolator { } \ (void)0 +ccl_always_inline float frac(float x, int *ix) +{ + int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); + *ix = i; + return x - (float)i; +} + +template<typename T> struct TextureInterpolator { + static ccl_always_inline float4 read(float4 r) { return r; @@ -106,13 +114,6 @@ template<typename T> struct TextureInterpolator { return clamp(x, 0, width - 1); } - static ccl_always_inline float frac(float x, int *ix) - { - int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); - *ix = i; - return x - (float)i; - } - /* ******** 2D interpolation ******** */ static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y) @@ -370,7 +371,7 @@ template<typename T> struct TextureInterpolator { static ccl_never_inline #endif float4 - interp_3d_tricubic(const TextureInfo &info, float x, float y, float z) + interp_3d_cubic(const TextureInfo &info, float x, float y, float z) { int width = info.width; int height = info.height; @@ -469,14 +470,16 @@ template<typename T> struct TextureInterpolator { case INTERPOLATION_LINEAR: return interp_3d_linear(info, x, y, z); default: - return interp_3d_tricubic(info, x, y, z); + return interp_3d_cubic(info, x, y, z); } } -#undef SET_CUBIC_SPLINE_WEIGHTS }; #ifdef WITH_NANOVDB template<typename T> struct NanoVDBInterpolator { + + typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT; + static ccl_always_inline float4 read(float r) { return make_float4(r, r, r, 1.0f); @@ -487,26 +490,93 @@ template<typename T> struct NanoVDBInterpolator { return make_float4(r[0], r[1], r[2], 1.0f); } + static ccl_always_inline float4 interp_3d_closest(ReadAccessorT acc, float x, float y, float z) + { + const nanovdb::Vec3f xyz(x, y, z); + return read(nanovdb::NearestNeighborSampler<ReadAccessorT, false>(acc)(xyz)); + } + + static ccl_always_inline float4 interp_3d_linear(ReadAccessorT acc, float x, float y, float z) + { + const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f); + return read(nanovdb::TrilinearSampler<ReadAccessorT, false>(acc)(xyz)); + } + +# if defined(__GNUC__) || defined(__clang__) + static ccl_always_inline +# else + static ccl_never_inline +# endif + float4 + interp_3d_cubic(ReadAccessorT acc, float x, float y, float z) + { + int ix, iy, iz; + int nix, niy, niz; + int pix, piy, piz; + int nnix, nniy, nniz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x - 0.5f, &ix); + const float ty = frac(y - 0.5f, &iy); + const float tz = frac(z - 0.5f, &iz); + pix = ix - 1; + piy = iy - 1; + piz = iz - 1; + nix = ix + 1; + niy = iy + 1; + niz = iz + 1; + nnix = ix + 2; + nniy = iy + 2; + nniz = iz + 2; + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {piy, iy, niy, nniy}; + const int zc[4] = {piz, iz, niz, nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z])))) +# define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +# define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +# undef COL_TERM +# undef ROW_TERM +# undef DATA + } + static ccl_always_inline float4 interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp) { - const nanovdb::Vec3f xyz(x, y, z); - nanovdb::NanoGrid<T> *const grid = (nanovdb::NanoGrid<T> *)info.data; - const nanovdb::NanoRoot<T> &root = grid->tree().root(); + using namespace nanovdb; + + NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; + const NanoRoot<T> &root = grid->tree().root(); - typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT; switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { case INTERPOLATION_CLOSEST: - return read(nanovdb::SampleFromVoxels<ReadAccessorT, 0, false>(root)(xyz)); + return interp_3d_closest(root, x, y, z); case INTERPOLATION_LINEAR: - return read(nanovdb::SampleFromVoxels<ReadAccessorT, 1, false>(root)(xyz)); + return interp_3d_linear(root, x, y, z); default: - return read(nanovdb::SampleFromVoxels<ReadAccessorT, 3, false>(root)(xyz)); + return interp_3d_cubic(root, x, y, z); } } }; #endif +#undef SET_CUBIC_SPLINE_WEIGHTS + ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index c2a0ee06dbc..b8aaacba960 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -24,17 +24,14 @@ ccl_device float cubic_w0(float a) { return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); } - ccl_device float cubic_w1(float a) { return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); } - ccl_device float cubic_w2(float a) { return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); } - ccl_device float cubic_w3(float a) { return (1.0f / 6.0f) * (a * a * a); @@ -45,7 +42,6 @@ ccl_device float cubic_g0(float a) { return cubic_w0(a) + cubic_w1(a); } - ccl_device float cubic_g1(float a) { return cubic_w2(a) + cubic_w3(a); @@ -54,13 +50,11 @@ ccl_device float cubic_g1(float a) /* h0 and h1 are the two offset functions */ ccl_device float cubic_h0(float a) { - /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ - return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; + return (cubic_w1(a) / cubic_g0(a)) - 1.0f; } - ccl_device float cubic_h1(float a) { - return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; + return (cubic_w3(a) / cubic_g1(a)) + 1.0f; } /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ @@ -79,10 +73,11 @@ ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, f float g0x = cubic_g0(fx); float g1x = cubic_g1(fx); - float x0 = (px + cubic_h0(fx)) / info.width; - float x1 = (px + cubic_h1(fx)) / info.width; - float y0 = (py + cubic_h0(fy)) / info.height; - float y1 = (py + cubic_h1(fy)) / info.height; + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; + float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; + float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; + float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) + cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1)); @@ -90,7 +85,7 @@ ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, f /* Fast tricubic texture lookup using 8 trilinear lookups. */ template<typename T> -ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x, float y, float z) +ccl_device T kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z) { CUtexObject tex = (CUtexObject)info.data; @@ -112,12 +107,13 @@ ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x float g0z = cubic_g0(fz); float g1z = cubic_g1(fz); - float x0 = (px + cubic_h0(fx)) / info.width; - float x1 = (px + cubic_h1(fx)) / info.width; - float y0 = (py + cubic_h0(fy)) / info.height; - float y1 = (py + cubic_h1(fy)) / info.height; - float z0 = (pz + cubic_h0(fz)) / info.depth; - float z1 = (pz + cubic_h1(fz)) / info.depth; + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; + float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; + float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; + float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; + float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth; + float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth; return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) + g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) + @@ -126,22 +122,56 @@ ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x } #ifdef WITH_NANOVDB +template<typename T, typename S> +ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z) +{ + float px = floor(x); + float py = floor(y); + float pz = floor(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = px + cubic_h0(fx); + float x1 = px + cubic_h1(fx); + float y0 = py + cubic_h0(fy); + float y1 = py + cubic_h1(fy); + float z0 = pz + cubic_h0(fz); + float z1 = pz + cubic_h1(fz); + + using namespace nanovdb; + + return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) + + g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) + + g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) + + g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1)))); +} + template<typename T> ccl_device_inline T kernel_tex_image_interp_nanovdb( const TextureInfo &info, float x, float y, float z, uint interpolation) { - const nanovdb::Vec3f xyz(x, y, z); - nanovdb::NanoGrid<T> *const grid = (nanovdb::NanoGrid<T> *)info.data; - const nanovdb::NanoRoot<T> &root = grid->tree().root(); + using namespace nanovdb; + typedef ReadAccessor<NanoRoot<T>> ReadAccessorT; + + NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; + const NanoRoot<T> &root = grid->tree().root(); - typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT; switch (interpolation) { case INTERPOLATION_CLOSEST: - return nanovdb::SampleFromVoxels<ReadAccessorT, 0, false>(root)(xyz); + return NearestNeighborSampler<ReadAccessorT, false>(root)(Vec3f(x, y, z)); case INTERPOLATION_LINEAR: - return nanovdb::SampleFromVoxels<ReadAccessorT, 1, false>(root)(xyz); + return TrilinearSampler<ReadAccessorT, false>(root)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f)); default: - return nanovdb::SampleFromVoxels<ReadAccessorT, 3, false>(root)(xyz); + TrilinearSampler<ReadAccessorT, false> s(root); + return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f); } } #endif @@ -210,7 +240,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { if (interpolation == INTERPOLATION_CUBIC) { - return kernel_tex_image_interp_bicubic_3d<float4>(info, x, y, z); + return kernel_tex_image_interp_tricubic<float4>(info, x, y, z); } else { CUtexObject tex = (CUtexObject)info.data; @@ -221,7 +251,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, float f; if (interpolation == INTERPOLATION_CUBIC) { - f = kernel_tex_image_interp_bicubic_3d<float>(info, x, y, z); + f = kernel_tex_image_interp_tricubic<float>(info, x, y, z); } else { CUtexObject tex = (CUtexObject)info.data; diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index cbf9a208112..f39998299ef 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -16,7 +16,6 @@ #ifdef WITH_NANOVDB # include "nanovdb/CNanoVDB.h" -# include "nanovdb/util/CSampleFromVoxels.h" #endif /* For OpenCL we do manual lookup and interpolation. */ @@ -47,95 +46,128 @@ ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) return clamp(x, 0, width - 1); } -ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, - const ccl_global TextureInfo *info, - int id, - int offset) +ccl_device_inline float4 svm_image_texture_read( + KernelGlobals *kg, const ccl_global TextureInfo *info, void *acc, int x, int y, int z) { + const int data_offset = x + info->width * y + info->width * info->height * z; const int texture_type = info->data_type; /* Float4 */ if (texture_type == IMAGE_DATA_TYPE_FLOAT4) { - return tex_fetch(float4, info, offset); + return tex_fetch(float4, info, data_offset); } /* Byte4 */ else if (texture_type == IMAGE_DATA_TYPE_BYTE4) { - uchar4 r = tex_fetch(uchar4, info, offset); + uchar4 r = tex_fetch(uchar4, info, data_offset); float f = 1.0f / 255.0f; return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); } /* Ushort4 */ else if (texture_type == IMAGE_DATA_TYPE_USHORT4) { - ushort4 r = tex_fetch(ushort4, info, offset); + ushort4 r = tex_fetch(ushort4, info, data_offset); float f = 1.0f / 65535.f; return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); } /* Float */ else if (texture_type == IMAGE_DATA_TYPE_FLOAT) { - float f = tex_fetch(float, info, offset); + float f = tex_fetch(float, info, data_offset); return make_float4(f, f, f, 1.0f); } /* UShort */ else if (texture_type == IMAGE_DATA_TYPE_USHORT) { - ushort r = tex_fetch(ushort, info, offset); + ushort r = tex_fetch(ushort, info, data_offset); float f = r * (1.0f / 65535.0f); return make_float4(f, f, f, 1.0f); } - /* Byte */ +#ifdef WITH_NANOVDB + /* NanoVDB Float */ + else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { + cnanovdb_coord coord; + coord.mVec[0] = x; + coord.mVec[1] = y; + coord.mVec[2] = z; + float f = cnanovdb_readaccessor_getValueF((cnanovdb_readaccessor *)acc, &coord); + return make_float4(f, f, f, 1.0f); + } + /* NanoVDB Float3 */ + else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { + cnanovdb_coord coord; + coord.mVec[0] = x; + coord.mVec[1] = y; + coord.mVec[2] = z; + cnanovdb_Vec3F f = cnanovdb_readaccessor_getValueF3((cnanovdb_readaccessor *)acc, &coord); + return make_float4(f.mVec[0], f.mVec[1], f.mVec[2], 1.0f); + } +#endif #ifdef __KERNEL_CL_KHR_FP16__ - /* half and half4 are optional in OpenCL */ + /* Half and Half4 are optional in OpenCL */ else if (texture_type == IMAGE_DATA_TYPE_HALF) { - float f = tex_fetch(half, info, offset); + float f = tex_fetch(half, info, data_offset); return make_float4(f, f, f, 1.0f); } else if (texture_type == IMAGE_DATA_TYPE_HALF4) { - half4 r = tex_fetch(half4, info, offset); + half4 r = tex_fetch(half4, info, data_offset); return make_float4(r.x, r.y, r.z, r.w); } #endif + /* Byte */ else { - uchar r = tex_fetch(uchar, info, offset); + uchar r = tex_fetch(uchar, info, data_offset); float f = r * (1.0f / 255.0f); return make_float4(f, f, f, 1.0f); } } -ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y) +ccl_device_inline float4 +svm_image_texture_read_2d(KernelGlobals *kg, int id, void *acc, int x, int y) { const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - /* Wrap */ - if (info->extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info->width); - y = svm_image_texture_wrap_periodic(y, info->height); - } - else { - x = svm_image_texture_wrap_clamp(x, info->width); - y = svm_image_texture_wrap_clamp(y, info->height); +#ifdef WITH_NANOVDB + if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && + info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { +#endif + /* Wrap */ + if (info->extension == EXTENSION_REPEAT) { + x = svm_image_texture_wrap_periodic(x, info->width); + y = svm_image_texture_wrap_periodic(y, info->height); + } + else { + x = svm_image_texture_wrap_clamp(x, info->width); + y = svm_image_texture_wrap_clamp(y, info->height); + } +#ifdef WITH_NANOVDB } +#endif - int offset = x + info->width * y; - return svm_image_texture_read(kg, info, id, offset); + return svm_image_texture_read(kg, info, acc, x, y, 0); } -ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, int x, int y, int z) +ccl_device_inline float4 +svm_image_texture_read_3d(KernelGlobals *kg, int id, void *acc, int x, int y, int z) { const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - /* Wrap */ - if (info->extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info->width); - y = svm_image_texture_wrap_periodic(y, info->height); - z = svm_image_texture_wrap_periodic(z, info->depth); - } - else { - x = svm_image_texture_wrap_clamp(x, info->width); - y = svm_image_texture_wrap_clamp(y, info->height); - z = svm_image_texture_wrap_clamp(z, info->depth); +#ifdef WITH_NANOVDB + if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && + info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { +#endif + /* Wrap */ + if (info->extension == EXTENSION_REPEAT) { + x = svm_image_texture_wrap_periodic(x, info->width); + y = svm_image_texture_wrap_periodic(y, info->height); + z = svm_image_texture_wrap_periodic(z, info->depth); + } + else { + x = svm_image_texture_wrap_clamp(x, info->width); + y = svm_image_texture_wrap_clamp(y, info->height); + z = svm_image_texture_wrap_clamp(z, info->depth); + } +#ifdef WITH_NANOVDB } +#endif - int offset = x + info->width * y + info->width * info->height * z; - return svm_image_texture_read(kg, info, id, offset); + return svm_image_texture_read(kg, info, acc, x, y, z); } ccl_device_inline float svm_image_texture_frac(float x, int *ix) @@ -170,7 +202,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl svm_image_texture_frac(x * info->width, &ix); svm_image_texture_frac(y * info->height, &iy); - return svm_image_texture_read_2d(kg, id, ix, iy); + return svm_image_texture_read_2d(kg, id, NULL, ix, iy); } else if (info->interpolation == INTERPOLATION_LINEAR) { /* Bilinear interpolation. */ @@ -179,10 +211,10 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); float4 r; - r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy); - r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy); - r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy + 1); - r += ty * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy + 1); + r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy); + r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy); + r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy + 1); + r += ty * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy + 1); return r; } else { @@ -200,7 +232,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl for (int y = 0; y < 4; y++) { for (int x = 0; x < 4; x++) { float weight = u[x] * v[y]; - r += weight * svm_image_texture_read_2d(kg, id, ix + x - 1, iy + y - 1); + r += weight * svm_image_texture_read_2d(kg, id, NULL, ix + x - 1, iy + y - 1); } } return r; @@ -216,102 +248,68 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P P = transform_point(&tfm, P); } - const float x = P.x; - const float y = P.y; - const float z = P.z; - - if (info->extension == EXTENSION_CLIP) { - if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } + float x = P.x; + float y = P.y; + float z = P.z; uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp; #ifdef WITH_NANOVDB - cnanovdb_Vec3F xyz; - xyz.mVec[0] = x; - xyz.mVec[1] = y; - xyz.mVec[2] = z; - - if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { + cnanovdb_readaccessor acc; + if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT || + info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { ccl_global cnanovdb_griddata *grid = (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data); - const ccl_global cnanovdb_rootdataF *root = cnanovdb_treedata_rootF( - cnanovdb_griddata_tree(grid)); - - cnanovdb_readaccessor acc; - cnanovdb_readaccessor_init(&acc, root); - - float value; - switch (interpolation) { - case INTERPOLATION_CLOSEST: - value = cnanovdb_sampleF_nearest(&acc, &xyz); - break; - default: - case INTERPOLATION_LINEAR: - value = cnanovdb_sampleF_trilinear(&acc, &xyz); - break; - } - return make_float4(value, value, value, 1.0f); + cnanovdb_readaccessor_init(&acc, cnanovdb_treedata_rootF(cnanovdb_griddata_tree(grid))); } - if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { - ccl_global cnanovdb_griddata *grid = - (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data); - const ccl_global cnanovdb_rootdataF3 *root = cnanovdb_treedata_rootF3( - cnanovdb_griddata_tree(grid)); - - cnanovdb_readaccessor acc; - cnanovdb_readaccessor_init(&acc, root); - - cnanovdb_Vec3F value; - switch (interpolation) { - default: - case INTERPOLATION_LINEAR: - value = cnanovdb_sampleF3_trilinear(&acc, &xyz); - break; - case INTERPOLATION_CLOSEST: - value = cnanovdb_sampleF3_nearest(&acc, &xyz); - break; + else { + if (info->extension == EXTENSION_CLIP) { + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } } - return make_float4(value.mVec[0], value.mVec[1], value.mVec[2], 1.0f); + + x *= info->width; + y *= info->height; + z *= info->depth; } #endif if (interpolation == INTERPOLATION_CLOSEST) { /* Closest interpolation. */ int ix, iy, iz; - svm_image_texture_frac(x * info->width, &ix); - svm_image_texture_frac(y * info->height, &iy); - svm_image_texture_frac(z * info->depth, &iz); + svm_image_texture_frac(x, &ix); + svm_image_texture_frac(y, &iy); + svm_image_texture_frac(z, &iz); - return svm_image_texture_read_3d(kg, id, ix, iy, iz); + return svm_image_texture_read_3d(kg, id, &acc, ix, iy, iz); } else if (interpolation == INTERPOLATION_LINEAR) { - /* Bilinear interpolation. */ + /* Trilinear interpolation. */ int ix, iy, iz; - float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); - float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz); + float tx = svm_image_texture_frac(x - 0.5f, &ix); + float ty = svm_image_texture_frac(y - 0.5f, &iy); + float tz = svm_image_texture_frac(z - 0.5f, &iz); float4 r; - r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz); - r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz); - r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz); - r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz); - - r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz + 1); - r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz + 1); - r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz + 1); - r += tz * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz + 1); + r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * + svm_image_texture_read_3d(kg, id, &acc, ix, iy, iz); + r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy, iz); + r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, &acc, ix, iy + 1, iz); + r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy + 1, iz); + + r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, &acc, ix, iy, iz + 1); + r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy, iz + 1); + r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, &acc, ix, iy + 1, iz + 1); + r += tz * ty * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy + 1, iz + 1); return r; } else { - /* Bicubic interpolation. */ + /* Tricubic interpolation. */ int ix, iy, iz; - float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); - float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz); + float tx = svm_image_texture_frac(x - 0.5f, &ix); + float ty = svm_image_texture_frac(y - 0.5f, &iy); + float tz = svm_image_texture_frac(z - 0.5f, &iz); float u[4], v[4], w[4]; SET_CUBIC_SPLINE_WEIGHTS(u, tx); @@ -324,7 +322,8 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P for (int y = 0; y < 4; y++) { for (int x = 0; x < 4; x++) { float weight = u[x] * v[y] * w[z]; - r += weight * svm_image_texture_read_3d(kg, id, ix + x - 1, iy + y - 1, iz + z - 1); + r += weight * + svm_image_texture_read_3d(kg, id, &acc, ix + x - 1, iy + y - 1, iz + z - 1); } } } diff --git a/intern/cycles/render/image_vdb.cpp b/intern/cycles/render/image_vdb.cpp index 016bbf7151d..5d0999d5623 100644 --- a/intern/cycles/render/image_vdb.cpp +++ b/intern/cycles/render/image_vdb.cpp @@ -145,8 +145,7 @@ bool VDBImageLoader::load_metadata(ImageMetaData &metadata) } # ifdef WITH_NANOVDB - /* Add small offset for correct sampling between voxels. */ - Transform texture_to_index = transform_translate(0.5f, 0.5f, 0.5f); + Transform texture_to_index = transform_identity(); # else Transform texture_to_index = transform_translate(min.x(), min.y(), min.z()) * transform_scale(dim.x(), dim.y(), dim.z()); diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index a721595667d..fc80fa9696c 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -53,7 +53,7 @@ typedef unsigned short ushort; /* Fixed Bits Types */ #ifdef __KERNEL_OPENCL__ -typedef ulong uint64_t; +typedef unsigned long uint64_t; #endif #ifndef __KERNEL_GPU__ |