Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2020-11-06 17:19:58 +0300
committerPatrick Mours <pmours@nvidia.com>2020-11-09 14:37:47 +0300
commit118e31a0a995ae4e8845376215d9c35017a8f781 (patch)
tree25672cc8396c7b6cb75086b4d2378c4c06d153b5 /intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
parent92083772e73a68bf74702aceb0fcb759a6b2419c (diff)
Cycles: Fix tricubic sampling with NanoVDB
Volumes using tricubic sampling were producing different results with NanoVDB compared to dense textures. This fixes that by using the same tricubic sampling algorithm in both cases. It also fixes some remaining offset issues and some minor things that broke OpenCL kernel compilation on NVIDIA. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9491
Diffstat (limited to 'intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h')
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h86
1 files changed, 58 insertions, 28 deletions
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index c2a0ee06dbc..b8aaacba960 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -24,17 +24,14 @@ ccl_device float cubic_w0(float a)
{
return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
}
-
ccl_device float cubic_w1(float a)
{
return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
}
-
ccl_device float cubic_w2(float a)
{
return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
}
-
ccl_device float cubic_w3(float a)
{
return (1.0f / 6.0f) * (a * a * a);
@@ -45,7 +42,6 @@ ccl_device float cubic_g0(float a)
{
return cubic_w0(a) + cubic_w1(a);
}
-
ccl_device float cubic_g1(float a)
{
return cubic_w2(a) + cubic_w3(a);
@@ -54,13 +50,11 @@ ccl_device float cubic_g1(float a)
/* h0 and h1 are the two offset functions */
ccl_device float cubic_h0(float a)
{
- /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
- return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
+ return (cubic_w1(a) / cubic_g0(a)) - 1.0f;
}
-
ccl_device float cubic_h1(float a)
{
- return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
+ return (cubic_w3(a) / cubic_g1(a)) + 1.0f;
}
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
@@ -79,10 +73,11 @@ ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, f
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
- float x0 = (px + cubic_h0(fx)) / info.width;
- float x1 = (px + cubic_h1(fx)) / info.width;
- float y0 = (py + cubic_h0(fy)) / info.height;
- float y1 = (py + cubic_h1(fy)) / info.height;
+ /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+ float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
+ float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
+ float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
+ float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
@@ -90,7 +85,7 @@ ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, f
/* Fast tricubic texture lookup using 8 trilinear lookups. */
template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x, float y, float z)
+ccl_device T kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
{
CUtexObject tex = (CUtexObject)info.data;
@@ -112,12 +107,13 @@ ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x
float g0z = cubic_g0(fz);
float g1z = cubic_g1(fz);
- float x0 = (px + cubic_h0(fx)) / info.width;
- float x1 = (px + cubic_h1(fx)) / info.width;
- float y0 = (py + cubic_h0(fy)) / info.height;
- float y1 = (py + cubic_h1(fy)) / info.height;
- float z0 = (pz + cubic_h0(fz)) / info.depth;
- float z1 = (pz + cubic_h1(fz)) / info.depth;
+ /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+ float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
+ float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
+ float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
+ float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
+ float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
+ float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
@@ -126,22 +122,56 @@ ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x
}
#ifdef WITH_NANOVDB
+template<typename T, typename S>
+ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
+{
+ float px = floor(x);
+ float py = floor(y);
+ float pz = floor(z);
+ float fx = x - px;
+ float fy = y - py;
+ float fz = z - pz;
+
+ float g0x = cubic_g0(fx);
+ float g1x = cubic_g1(fx);
+ float g0y = cubic_g0(fy);
+ float g1y = cubic_g1(fy);
+ float g0z = cubic_g0(fz);
+ float g1z = cubic_g1(fz);
+
+ float x0 = px + cubic_h0(fx);
+ float x1 = px + cubic_h1(fx);
+ float y0 = py + cubic_h0(fy);
+ float y1 = py + cubic_h1(fy);
+ float z0 = pz + cubic_h0(fz);
+ float z1 = pz + cubic_h1(fz);
+
+ using namespace nanovdb;
+
+ return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) +
+ g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) +
+ g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) +
+ g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
+}
+
template<typename T>
ccl_device_inline T kernel_tex_image_interp_nanovdb(
const TextureInfo &info, float x, float y, float z, uint interpolation)
{
- const nanovdb::Vec3f xyz(x, y, z);
- nanovdb::NanoGrid<T> *const grid = (nanovdb::NanoGrid<T> *)info.data;
- const nanovdb::NanoRoot<T> &root = grid->tree().root();
+ using namespace nanovdb;
+ typedef ReadAccessor<NanoRoot<T>> ReadAccessorT;
+
+ NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
+ const NanoRoot<T> &root = grid->tree().root();
- typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT;
switch (interpolation) {
case INTERPOLATION_CLOSEST:
- return nanovdb::SampleFromVoxels<ReadAccessorT, 0, false>(root)(xyz);
+ return NearestNeighborSampler<ReadAccessorT, false>(root)(Vec3f(x, y, z));
case INTERPOLATION_LINEAR:
- return nanovdb::SampleFromVoxels<ReadAccessorT, 1, false>(root)(xyz);
+ return TrilinearSampler<ReadAccessorT, false>(root)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f));
default:
- return nanovdb::SampleFromVoxels<ReadAccessorT, 3, false>(root)(xyz);
+ TrilinearSampler<ReadAccessorT, false> s(root);
+ return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f);
}
}
#endif
@@ -210,7 +240,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
if (interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_bicubic_3d<float4>(info, x, y, z);
+ return kernel_tex_image_interp_tricubic<float4>(info, x, y, z);
}
else {
CUtexObject tex = (CUtexObject)info.data;
@@ -221,7 +251,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
float f;
if (interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_bicubic_3d<float>(info, x, y, z);
+ f = kernel_tex_image_interp_tricubic<float>(info, x, y, z);
}
else {
CUtexObject tex = (CUtexObject)info.data;