Cycles: Fix tricubic sampling with NanoVDB

Volumes using tricubic sampling were producing different results with NanoVDB compared to dense textures. This fixes that by using the same tricubic sampling algorithm in both cases. It also fixes some remaining offset issues and some minor things that broke OpenCL kernel compilation on NVIDIA. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9491
author: Patrick Mours <pmours@nvidia.com> 2020-11-06 17:19:58 +0300
committer: Patrick Mours <pmours@nvidia.com> 2020-11-09 14:37:47 +0300
commit: 118e31a0a995ae4e8845376215d9c35017a8f781 (patch)
tree: 25672cc8396c7b6cb75086b4d2378c4c06d153b5
parent: 92083772e73a68bf74702aceb0fcb759a6b2419c (diff)
6 files changed, 266 insertions, 168 deletions
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index ba7ab43a47a..1848f6059b6 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -48,7 +48,7 @@
 #define ccl_align(n) __attribute__((aligned(n)))
 #define ccl_optional_struct_init
 
-#if __OPENCL_VERSION__ >= 200
+#if __OPENCL_VERSION__ >= 200 && !defined(__NV_CL_C_VERSION)
 #  define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
 #else
 #  define ccl_loop_no_unroll
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index b466b41f456..b97400a443a 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -28,7 +28,6 @@ CCL_NAMESPACE_BEGIN
  * instruction sets. */
 namespace {
 
-template<typename T> struct TextureInterpolator {
 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
   { \
     u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
@@ -38,6 +37,15 @@ template<typename T> struct TextureInterpolator {
   } \
   (void)0
 
+ccl_always_inline float frac(float x, int *ix)
+{
+  int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+  *ix = i;
+  return x - (float)i;
+}
+
+template<typename T> struct TextureInterpolator {
+
   static ccl_always_inline float4 read(float4 r)
   {
     return r;
@@ -106,13 +114,6 @@ template<typename T> struct TextureInterpolator {
     return clamp(x, 0, width - 1);
   }
 
-  static ccl_always_inline float frac(float x, int *ix)
-  {
-    int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
-    *ix = i;
-    return x - (float)i;
-  }
-
   /* ********  2D interpolation ******** */
 
   static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
@@ -370,7 +371,7 @@ template<typename T> struct TextureInterpolator {
   static ccl_never_inline
 #endif
       float4
-      interp_3d_tricubic(const TextureInfo &info, float x, float y, float z)
+      interp_3d_cubic(const TextureInfo &info, float x, float y, float z)
   {
     int width = info.width;
     int height = info.height;
@@ -469,14 +470,16 @@ template<typename T> struct TextureInterpolator {
       case INTERPOLATION_LINEAR:
         return interp_3d_linear(info, x, y, z);
       default:
-        return interp_3d_tricubic(info, x, y, z);
+        return interp_3d_cubic(info, x, y, z);
     }
   }
-#undef SET_CUBIC_SPLINE_WEIGHTS
 };
 
 #ifdef WITH_NANOVDB
 template<typename T> struct NanoVDBInterpolator {
+
+  typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT;
+
   static ccl_always_inline float4 read(float r)
   {
     return make_float4(r, r, r, 1.0f);
@@ -487,26 +490,93 @@ template<typename T> struct NanoVDBInterpolator {
     return make_float4(r[0], r[1], r[2], 1.0f);
   }
 
+  static ccl_always_inline float4 interp_3d_closest(ReadAccessorT acc, float x, float y, float z)
+  {
+    const nanovdb::Vec3f xyz(x, y, z);
+    return read(nanovdb::NearestNeighborSampler<ReadAccessorT, false>(acc)(xyz));
+  }
+
+  static ccl_always_inline float4 interp_3d_linear(ReadAccessorT acc, float x, float y, float z)
+  {
+    const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
+    return read(nanovdb::TrilinearSampler<ReadAccessorT, false>(acc)(xyz));
+  }
+
+#  if defined(__GNUC__) || defined(__clang__)
+  static ccl_always_inline
+#  else
+  static ccl_never_inline
+#  endif
+      float4
+      interp_3d_cubic(ReadAccessorT acc, float x, float y, float z)
+  {
+    int ix, iy, iz;
+    int nix, niy, niz;
+    int pix, piy, piz;
+    int nnix, nniy, nniz;
+    /* Tricubic b-spline interpolation. */
+    const float tx = frac(x - 0.5f, &ix);
+    const float ty = frac(y - 0.5f, &iy);
+    const float tz = frac(z - 0.5f, &iz);
+    pix = ix - 1;
+    piy = iy - 1;
+    piz = iz - 1;
+    nix = ix + 1;
+    niy = iy + 1;
+    niz = iz + 1;
+    nnix = ix + 2;
+    nniy = iy + 2;
+    nniz = iz + 2;
+
+    const int xc[4] = {pix, ix, nix, nnix};
+    const int yc[4] = {piy, iy, niy, nniy};
+    const int zc[4] = {piz, iz, niz, nniz};
+    float u[4], v[4], w[4];
+
+    /* Some helper macro to keep code reasonable size,
+     * let compiler to inline all the matrix multiplications.
+     */
+#  define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
+#  define COL_TERM(col, row) \
+    (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
+               u[3] * DATA(3, col, row)))
+#  define ROW_TERM(row) \
+    (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
+
+    SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+    SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+    SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+    /* Actual interpolation. */
+    return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#  undef COL_TERM
+#  undef ROW_TERM
+#  undef DATA
+  }
+
   static ccl_always_inline float4
   interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
   {
-    const nanovdb::Vec3f xyz(x, y, z);
-    nanovdb::NanoGrid<T> *const grid = (nanovdb::NanoGrid<T> *)info.data;
-    const nanovdb::NanoRoot<T> &root = grid->tree().root();
+    using namespace nanovdb;
+
+    NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
+    const NanoRoot<T> &root = grid->tree().root();
 
-    typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT;
     switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
       case INTERPOLATION_CLOSEST:
-        return read(nanovdb::SampleFromVoxels<ReadAccessorT, 0, false>(root)(xyz));
+        return interp_3d_closest(root, x, y, z);
       case INTERPOLATION_LINEAR:
-        return read(nanovdb::SampleFromVoxels<ReadAccessorT, 1, false>(root)(xyz));
+        return interp_3d_linear(root, x, y, z);
       default:
-        return read(nanovdb::SampleFromVoxels<ReadAccessorT, 3, false>(root)(xyz));
+        return interp_3d_cubic(root, x, y, z);
     }
   }
 };
 #endif
 
+#undef SET_CUBIC_SPLINE_WEIGHTS
+
 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
   const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index c2a0ee06dbc..b8aaacba960 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -24,17 +24,14 @@ ccl_device float cubic_w0(float a)
 {
   return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
 }
-
 ccl_device float cubic_w1(float a)
 {
   return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
 }
-
 ccl_device float cubic_w2(float a)
 {
   return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
 }
-
 ccl_device float cubic_w3(float a)
 {
   return (1.0f / 6.0f) * (a * a * a);
@@ -45,7 +42,6 @@ ccl_device float cubic_g0(float a)
 {
   return cubic_w0(a) + cubic_w1(a);
 }
-
 ccl_device float cubic_g1(float a)
 {
   return cubic_w2(a) + cubic_w3(a);
@@ -54,13 +50,11 @@ ccl_device float cubic_g1(float a)
 /* h0 and h1 are the two offset functions */
 ccl_device float cubic_h0(float a)
 {
-  /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
-  return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
+  return (cubic_w1(a) / cubic_g0(a)) - 1.0f;
 }
-
 ccl_device float cubic_h1(float a)
 {
-  return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
+  return (cubic_w3(a) / cubic_g1(a)) + 1.0f;
 }
 
 /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
@@ -79,10 +73,11 @@ ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, f
 
   float g0x = cubic_g0(fx);
   float g1x = cubic_g1(fx);
-  float x0 = (px + cubic_h0(fx)) / info.width;
-  float x1 = (px + cubic_h1(fx)) / info.width;
-  float y0 = (py + cubic_h0(fy)) / info.height;
-  float y1 = (py + cubic_h1(fy)) / info.height;
+  /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+  float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
+  float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
+  float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
+  float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
 
   return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
          cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
@@ -90,7 +85,7 @@ ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, f
 
 /* Fast tricubic texture lookup using 8 trilinear lookups. */
 template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x, float y, float z)
+ccl_device T kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
 {
   CUtexObject tex = (CUtexObject)info.data;
 
@@ -112,12 +107,13 @@ ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x
   float g0z = cubic_g0(fz);
   float g1z = cubic_g1(fz);
 
-  float x0 = (px + cubic_h0(fx)) / info.width;
-  float x1 = (px + cubic_h1(fx)) / info.width;
-  float y0 = (py + cubic_h0(fy)) / info.height;
-  float y1 = (py + cubic_h1(fy)) / info.height;
-  float z0 = (pz + cubic_h0(fz)) / info.depth;
-  float z1 = (pz + cubic_h1(fz)) / info.depth;
+  /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+  float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
+  float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
+  float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
+  float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
+  float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
+  float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
 
   return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
                 g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
@@ -126,22 +122,56 @@ ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x
 }
 
 #ifdef WITH_NANOVDB
+template<typename T, typename S>
+ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
+{
+  float px = floor(x);
+  float py = floor(y);
+  float pz = floor(z);
+  float fx = x - px;
+  float fy = y - py;
+  float fz = z - pz;
+
+  float g0x = cubic_g0(fx);
+  float g1x = cubic_g1(fx);
+  float g0y = cubic_g0(fy);
+  float g1y = cubic_g1(fy);
+  float g0z = cubic_g0(fz);
+  float g1z = cubic_g1(fz);
+
+  float x0 = px + cubic_h0(fx);
+  float x1 = px + cubic_h1(fx);
+  float y0 = py + cubic_h0(fy);
+  float y1 = py + cubic_h1(fy);
+  float z0 = pz + cubic_h0(fz);
+  float z1 = pz + cubic_h1(fz);
+
+  using namespace nanovdb;
+
+  return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) +
+                g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) +
+         g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) +
+                g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
+}
+
 template<typename T>
 ccl_device_inline T kernel_tex_image_interp_nanovdb(
     const TextureInfo &info, float x, float y, float z, uint interpolation)
 {
-  const nanovdb::Vec3f xyz(x, y, z);
-  nanovdb::NanoGrid<T> *const grid = (nanovdb::NanoGrid<T> *)info.data;
-  const nanovdb::NanoRoot<T> &root = grid->tree().root();
+  using namespace nanovdb;
+  typedef ReadAccessor<NanoRoot<T>> ReadAccessorT;
+
+  NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
+  const NanoRoot<T> &root = grid->tree().root();
 
-  typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT;
   switch (interpolation) {
     case INTERPOLATION_CLOSEST:
-      return nanovdb::SampleFromVoxels<ReadAccessorT, 0, false>(root)(xyz);
+      return NearestNeighborSampler<ReadAccessorT, false>(root)(Vec3f(x, y, z));
     case INTERPOLATION_LINEAR:
-      return nanovdb::SampleFromVoxels<ReadAccessorT, 1, false>(root)(xyz);
+      return TrilinearSampler<ReadAccessorT, false>(root)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f));
     default:
-      return nanovdb::SampleFromVoxels<ReadAccessorT, 3, false>(root)(xyz);
+      TrilinearSampler<ReadAccessorT, false> s(root);
+      return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f);
   }
 }
 #endif
@@ -210,7 +240,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
   if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
       texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
     if (interpolation == INTERPOLATION_CUBIC) {
-      return kernel_tex_image_interp_bicubic_3d<float4>(info, x, y, z);
+      return kernel_tex_image_interp_tricubic<float4>(info, x, y, z);
     }
     else {
       CUtexObject tex = (CUtexObject)info.data;
@@ -221,7 +251,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
     float f;
 
     if (interpolation == INTERPOLATION_CUBIC) {
-      f = kernel_tex_image_interp_bicubic_3d<float>(info, x, y, z);
+      f = kernel_tex_image_interp_tricubic<float>(info, x, y, z);
     }
     else {
       CUtexObject tex = (CUtexObject)info.data;
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index cbf9a208112..f39998299ef 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -16,7 +16,6 @@
 
 #ifdef WITH_NANOVDB
 #  include "nanovdb/CNanoVDB.h"
-#  include "nanovdb/util/CSampleFromVoxels.h"
 #endif
 
 /* For OpenCL we do manual lookup and interpolation. */
@@ -47,95 +46,128 @@ ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
   return clamp(x, 0, width - 1);
 }
 
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
-                                                const ccl_global TextureInfo *info,
-                                                int id,
-                                                int offset)
+ccl_device_inline float4 svm_image_texture_read(
+    KernelGlobals *kg, const ccl_global TextureInfo *info, void *acc, int x, int y, int z)
 {
+  const int data_offset = x + info->width * y + info->width * info->height * z;
   const int texture_type = info->data_type;
 
   /* Float4 */
   if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
-    return tex_fetch(float4, info, offset);
+    return tex_fetch(float4, info, data_offset);
   }
   /* Byte4 */
   else if (texture_type == IMAGE_DATA_TYPE_BYTE4) {
-    uchar4 r = tex_fetch(uchar4, info, offset);
+    uchar4 r = tex_fetch(uchar4, info, data_offset);
     float f = 1.0f / 255.0f;
     return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
   }
   /* Ushort4 */
   else if (texture_type == IMAGE_DATA_TYPE_USHORT4) {
-    ushort4 r = tex_fetch(ushort4, info, offset);
+    ushort4 r = tex_fetch(ushort4, info, data_offset);
     float f = 1.0f / 65535.f;
     return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
   }
   /* Float */
   else if (texture_type == IMAGE_DATA_TYPE_FLOAT) {
-    float f = tex_fetch(float, info, offset);
+    float f = tex_fetch(float, info, data_offset);
     return make_float4(f, f, f, 1.0f);
   }
   /* UShort */
   else if (texture_type == IMAGE_DATA_TYPE_USHORT) {
-    ushort r = tex_fetch(ushort, info, offset);
+    ushort r = tex_fetch(ushort, info, data_offset);
     float f = r * (1.0f / 65535.0f);
     return make_float4(f, f, f, 1.0f);
   }
-  /* Byte */
+#ifdef WITH_NANOVDB
+  /* NanoVDB Float */
+  else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
+    cnanovdb_coord coord;
+    coord.mVec[0] = x;
+    coord.mVec[1] = y;
+    coord.mVec[2] = z;
+    float f = cnanovdb_readaccessor_getValueF((cnanovdb_readaccessor *)acc, &coord);
+    return make_float4(f, f, f, 1.0f);
+  }
+  /* NanoVDB Float3 */
+  else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+    cnanovdb_coord coord;
+    coord.mVec[0] = x;
+    coord.mVec[1] = y;
+    coord.mVec[2] = z;
+    cnanovdb_Vec3F f = cnanovdb_readaccessor_getValueF3((cnanovdb_readaccessor *)acc, &coord);
+    return make_float4(f.mVec[0], f.mVec[1], f.mVec[2], 1.0f);
+  }
+#endif
 #ifdef __KERNEL_CL_KHR_FP16__
-  /* half and half4 are optional in OpenCL */
+  /* Half and Half4 are optional in OpenCL */
   else if (texture_type == IMAGE_DATA_TYPE_HALF) {
-    float f = tex_fetch(half, info, offset);
+    float f = tex_fetch(half, info, data_offset);
     return make_float4(f, f, f, 1.0f);
   }
   else if (texture_type == IMAGE_DATA_TYPE_HALF4) {
-    half4 r = tex_fetch(half4, info, offset);
+    half4 r = tex_fetch(half4, info, data_offset);
     return make_float4(r.x, r.y, r.z, r.w);
   }
 #endif
+  /* Byte */
   else {
-    uchar r = tex_fetch(uchar, info, offset);
+    uchar r = tex_fetch(uchar, info, data_offset);
     float f = r * (1.0f / 255.0f);
     return make_float4(f, f, f, 1.0f);
   }
 }
 
-ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y)
+ccl_device_inline float4
+svm_image_texture_read_2d(KernelGlobals *kg, int id, void *acc, int x, int y)
 {
   const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
 
-  /* Wrap */
-  if (info->extension == EXTENSION_REPEAT) {
-    x = svm_image_texture_wrap_periodic(x, info->width);
-    y = svm_image_texture_wrap_periodic(y, info->height);
-  }
-  else {
-    x = svm_image_texture_wrap_clamp(x, info->width);
-    y = svm_image_texture_wrap_clamp(y, info->height);
+#ifdef WITH_NANOVDB
+  if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
+      info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+#endif
+    /* Wrap */
+    if (info->extension == EXTENSION_REPEAT) {
+      x = svm_image_texture_wrap_periodic(x, info->width);
+      y = svm_image_texture_wrap_periodic(y, info->height);
+    }
+    else {
+      x = svm_image_texture_wrap_clamp(x, info->width);
+      y = svm_image_texture_wrap_clamp(y, info->height);
+    }
+#ifdef WITH_NANOVDB
   }
+#endif
 
-  int offset = x + info->width * y;
-  return svm_image_texture_read(kg, info, id, offset);
+  return svm_image_texture_read(kg, info, acc, x, y, 0);
 }
 
-ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, int x, int y, int z)
+ccl_device_inline float4
+svm_image_texture_read_3d(KernelGlobals *kg, int id, void *acc, int x, int y, int z)
 {
   const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
 
-  /* Wrap */
-  if (info->extension == EXTENSION_REPEAT) {
-    x = svm_image_texture_wrap_periodic(x, info->width);
-    y = svm_image_texture_wrap_periodic(y, info->height);
-    z = svm_image_texture_wrap_periodic(z, info->depth);
-  }
-  else {
-    x = svm_image_texture_wrap_clamp(x, info->width);
-    y = svm_image_texture_wrap_clamp(y, info->height);
-    z = svm_image_texture_wrap_clamp(z, info->depth);
+#ifdef WITH_NANOVDB
+  if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
+      info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+#endif
+    /* Wrap */
+    if (info->extension == EXTENSION_REPEAT) {
+      x = svm_image_texture_wrap_periodic(x, info->width);
+      y = svm_image_texture_wrap_periodic(y, info->height);
+      z = svm_image_texture_wrap_periodic(z, info->depth);
+    }
+    else {
+      x = svm_image_texture_wrap_clamp(x, info->width);
+      y = svm_image_texture_wrap_clamp(y, info->height);
+      z = svm_image_texture_wrap_clamp(z, info->depth);
+    }
+#ifdef WITH_NANOVDB
   }
+#endif
 
-  int offset = x + info->width * y + info->width * info->height * z;
-  return svm_image_texture_read(kg, info, id, offset);
+  return svm_image_texture_read(kg, info, acc, x, y, z);
 }
 
 ccl_device_inline float svm_image_texture_frac(float x, int *ix)
@@ -170,7 +202,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
     svm_image_texture_frac(x * info->width, &ix);
     svm_image_texture_frac(y * info->height, &iy);
 
-    return svm_image_texture_read_2d(kg, id, ix, iy);
+    return svm_image_texture_read_2d(kg, id, NULL, ix, iy);
   }
   else if (info->interpolation == INTERPOLATION_LINEAR) {
     /* Bilinear interpolation. */
@@ -179,10 +211,10 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
     float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
 
     float4 r;
-    r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy);
-    r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy);
-    r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy + 1);
-    r += ty * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy + 1);
+    r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy);
+    r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy);
+    r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy + 1);
+    r += ty * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy + 1);
     return r;
   }
   else {
@@ -200,7 +232,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
     for (int y = 0; y < 4; y++) {
       for (int x = 0; x < 4; x++) {
         float weight = u[x] * v[y];
-        r += weight * svm_image_texture_read_2d(kg, id, ix + x - 1, iy + y - 1);
+        r += weight * svm_image_texture_read_2d(kg, id, NULL, ix + x - 1, iy + y - 1);
       }
     }
     return r;
@@ -216,102 +248,68 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P
     P = transform_point(&tfm, P);
   }
 
-  const float x = P.x;
-  const float y = P.y;
-  const float z = P.z;
-
-  if (info->extension == EXTENSION_CLIP) {
-    if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    }
-  }
+  float x = P.x;
+  float y = P.y;
+  float z = P.z;
 
   uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp;
 
 #ifdef WITH_NANOVDB
-  cnanovdb_Vec3F xyz;
-  xyz.mVec[0] = x;
-  xyz.mVec[1] = y;
-  xyz.mVec[2] = z;
-
-  if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
+  cnanovdb_readaccessor acc;
+  if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT ||
+      info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
     ccl_global cnanovdb_griddata *grid =
         (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data);
-    const ccl_global cnanovdb_rootdataF *root = cnanovdb_treedata_rootF(
-        cnanovdb_griddata_tree(grid));
-
-    cnanovdb_readaccessor acc;
-    cnanovdb_readaccessor_init(&acc, root);
-
-    float value;
-    switch (interpolation) {
-      case INTERPOLATION_CLOSEST:
-        value = cnanovdb_sampleF_nearest(&acc, &xyz);
-        break;
-      default:
-      case INTERPOLATION_LINEAR:
-        value = cnanovdb_sampleF_trilinear(&acc, &xyz);
-        break;
-    }
-    return make_float4(value, value, value, 1.0f);
+    cnanovdb_readaccessor_init(&acc, cnanovdb_treedata_rootF(cnanovdb_griddata_tree(grid)));
   }
-  if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
-    ccl_global cnanovdb_griddata *grid =
-        (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data);
-    const ccl_global cnanovdb_rootdataF3 *root = cnanovdb_treedata_rootF3(
-        cnanovdb_griddata_tree(grid));
-
-    cnanovdb_readaccessor acc;
-    cnanovdb_readaccessor_init(&acc, root);
-
-    cnanovdb_Vec3F value;
-    switch (interpolation) {
-      default:
-      case INTERPOLATION_LINEAR:
-        value = cnanovdb_sampleF3_trilinear(&acc, &xyz);
-        break;
-      case INTERPOLATION_CLOSEST:
-        value = cnanovdb_sampleF3_nearest(&acc, &xyz);
-        break;
+  else {
+    if (info->extension == EXTENSION_CLIP) {
+      if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+      }
     }
-    return make_float4(value.mVec[0], value.mVec[1], value.mVec[2], 1.0f);
+
+    x *= info->width;
+    y *= info->height;
+    z *= info->depth;
   }
 #endif
 
   if (interpolation == INTERPOLATION_CLOSEST) {
     /* Closest interpolation. */
     int ix, iy, iz;
-    svm_image_texture_frac(x * info->width, &ix);
-    svm_image_texture_frac(y * info->height, &iy);
-    svm_image_texture_frac(z * info->depth, &iz);
+    svm_image_texture_frac(x, &ix);
+    svm_image_texture_frac(y, &iy);
+    svm_image_texture_frac(z, &iz);
 
-    return svm_image_texture_read_3d(kg, id, ix, iy, iz);
+    return svm_image_texture_read_3d(kg, id, &acc, ix, iy, iz);
   }
   else if (interpolation == INTERPOLATION_LINEAR) {
-    /* Bilinear interpolation. */
+    /* Trilinear interpolation. */
     int ix, iy, iz;
-    float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
-    float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
-    float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+    float tx = svm_image_texture_frac(x - 0.5f, &ix);
+    float ty = svm_image_texture_frac(y - 0.5f, &iy);
+    float tz = svm_image_texture_frac(z - 0.5f, &iz);
 
     float4 r;
-    r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz);
-    r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz);
-    r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz);
-    r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz);
-
-    r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz + 1);
-    r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz + 1);
-    r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz + 1);
-    r += tz * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz + 1);
+    r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
+        svm_image_texture_read_3d(kg, id, &acc, ix, iy, iz);
+    r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy, iz);
+    r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, &acc, ix, iy + 1, iz);
+    r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy + 1, iz);
+
+    r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, &acc, ix, iy, iz + 1);
+    r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy, iz + 1);
+    r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, &acc, ix, iy + 1, iz + 1);
+    r += tz * ty * tx * svm_image_texture_read_3d(kg, id, &acc, ix + 1, iy + 1, iz + 1);
     return r;
   }
   else {
-    /* Bicubic interpolation. */
+    /* Tricubic interpolation. */
     int ix, iy, iz;
-    float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
-    float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
-    float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+    float tx = svm_image_texture_frac(x - 0.5f, &ix);
+    float ty = svm_image_texture_frac(y - 0.5f, &iy);
+    float tz = svm_image_texture_frac(z - 0.5f, &iz);
 
     float u[4], v[4], w[4];
     SET_CUBIC_SPLINE_WEIGHTS(u, tx);
@@ -324,7 +322,8 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P
       for (int y = 0; y < 4; y++) {
         for (int x = 0; x < 4; x++) {
           float weight = u[x] * v[y] * w[z];
-          r += weight * svm_image_texture_read_3d(kg, id, ix + x - 1, iy + y - 1, iz + z - 1);
+          r += weight *
+               svm_image_texture_read_3d(kg, id, &acc, ix + x - 1, iy + y - 1, iz + z - 1);
         }
       }
     }
diff --git a/intern/cycles/render/image_vdb.cpp b/intern/cycles/render/image_vdb.cpp
index 016bbf7151d..5d0999d5623 100644
--- a/intern/cycles/render/image_vdb.cpp
+++ b/intern/cycles/render/image_vdb.cpp
@@ -145,8 +145,7 @@ bool VDBImageLoader::load_metadata(ImageMetaData &metadata)
   }
 
 #  ifdef WITH_NANOVDB
-  /* Add small offset for correct sampling between voxels. */
-  Transform texture_to_index = transform_translate(0.5f, 0.5f, 0.5f);
+  Transform texture_to_index = transform_identity();
 #  else
   Transform texture_to_index = transform_translate(min.x(), min.y(), min.z()) *
                                transform_scale(dim.x(), dim.y(), dim.z());
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index a721595667d..fc80fa9696c 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -53,7 +53,7 @@ typedef unsigned short ushort;
 /* Fixed Bits Types */
 
 #ifdef __KERNEL_OPENCL__
-typedef ulong uint64_t;
+typedef unsigned long uint64_t;
 #endif
 
 #ifndef __KERNEL_GPU__
author	Patrick Mours <pmours@nvidia.com>	2020-11-06 17:19:58 +0300
committer	Patrick Mours <pmours@nvidia.com>	2020-11-09 14:37:47 +0300
commit	118e31a0a995ae4e8845376215d9c35017a8f781 (patch)
tree	25672cc8396c7b6cb75086b4d2378c4c06d153b5
parent	92083772e73a68bf74702aceb0fcb759a6b2419c (diff)