Cycles: merge of cycles-x branch, a major update to the renderer

This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
author: Brecht Van Lommel <brecht@blender.org> 2021-09-20 18:59:20 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-09-21 15:55:54 +0300
commit: 08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree: 6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/device/gpu/image.h
parent: fa6b1007bad065440950cd67deb16a04f368856f (diff)
1 files changed, 278 insertions, 0 deletions
diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h
new file mode 100644
index 00000000000..b015c78a8f5
--- /dev/null
+++ b/intern/cycles/kernel/device/gpu/image.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_NANOVDB
+#  define NDEBUG /* Disable "assert" in device code */
+#  define NANOVDB_USE_INTRINSICS
+#  include "nanovdb/NanoVDB.h"
+#  include "nanovdb/util/SampleFromVoxels.h"
+#endif
+
+/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
+ccl_device float cubic_w0(float a)
+{
+  return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
+}
+ccl_device float cubic_w1(float a)
+{
+  return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
+}
+ccl_device float cubic_w2(float a)
+{
+  return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
+}
+ccl_device float cubic_w3(float a)
+{
+  return (1.0f / 6.0f) * (a * a * a);
+}
+
+/* g0 and g1 are the two amplitude functions. */
+ccl_device float cubic_g0(float a)
+{
+  return cubic_w0(a) + cubic_w1(a);
+}
+ccl_device float cubic_g1(float a)
+{
+  return cubic_w2(a) + cubic_w3(a);
+}
+
+/* h0 and h1 are the two offset functions */
+ccl_device float cubic_h0(float a)
+{
+  return (cubic_w1(a) / cubic_g0(a)) - 1.0f;
+}
+ccl_device float cubic_h1(float a)
+{
+  return (cubic_w3(a) / cubic_g1(a)) + 1.0f;
+}
+
+/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
+template<typename T>
+ccl_device_noinline T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y)
+{
+  ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
+
+  x = (x * info.width) - 0.5f;
+  y = (y * info.height) - 0.5f;
+
+  float px = floorf(x);
+  float py = floorf(y);
+  float fx = x - px;
+  float fy = y - py;
+
+  float g0x = cubic_g0(fx);
+  float g1x = cubic_g1(fx);
+  /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+  float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
+  float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
+  float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
+  float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
+
+  return cubic_g0(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y0) +
+                         g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y0)) +
+         cubic_g1(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y1) +
+                         g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y1));
+}
+
+/* Fast tricubic texture lookup using 8 trilinear lookups. */
+template<typename T>
+ccl_device_noinline T
+kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
+{
+  ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
+
+  x = (x * info.width) - 0.5f;
+  y = (y * info.height) - 0.5f;
+  z = (z * info.depth) - 0.5f;
+
+  float px = floorf(x);
+  float py = floorf(y);
+  float pz = floorf(z);
+  float fx = x - px;
+  float fy = y - py;
+  float fz = z - pz;
+
+  float g0x = cubic_g0(fx);
+  float g1x = cubic_g1(fx);
+  float g0y = cubic_g0(fy);
+  float g1y = cubic_g1(fy);
+  float g0z = cubic_g0(fz);
+  float g1z = cubic_g1(fz);
+
+  /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+  float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
+  float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
+  float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
+  float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
+  float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
+  float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
+
+  return g0z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z0) +
+                       g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z0)) +
+                g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z0) +
+                       g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z0))) +
+         g1z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z1) +
+                       g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z1)) +
+                g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z1) +
+                       g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z1)));
+}
+
+#ifdef WITH_NANOVDB
+template<typename T, typename S>
+ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
+{
+  float px = floorf(x);
+  float py = floorf(y);
+  float pz = floorf(z);
+  float fx = x - px;
+  float fy = y - py;
+  float fz = z - pz;
+
+  float g0x = cubic_g0(fx);
+  float g1x = cubic_g1(fx);
+  float g0y = cubic_g0(fy);
+  float g1y = cubic_g1(fy);
+  float g0z = cubic_g0(fz);
+  float g1z = cubic_g1(fz);
+
+  float x0 = px + cubic_h0(fx);
+  float x1 = px + cubic_h1(fx);
+  float y0 = py + cubic_h0(fy);
+  float y1 = py + cubic_h1(fy);
+  float z0 = pz + cubic_h0(fz);
+  float z1 = pz + cubic_h1(fz);
+
+  using namespace nanovdb;
+
+  return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) +
+                g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) +
+         g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) +
+                g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
+}
+
+template<typename T>
+ccl_device_noinline T kernel_tex_image_interp_nanovdb(
+    const TextureInfo &info, float x, float y, float z, uint interpolation)
+{
+  using namespace nanovdb;
+
+  NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
+  typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
+  AccessorType acc = grid->getAccessor();
+
+  switch (interpolation) {
+    case INTERPOLATION_CLOSEST:
+      return SampleFromVoxels<AccessorType, 0, false>(acc)(Vec3f(x, y, z));
+    case INTERPOLATION_LINEAR:
+      return SampleFromVoxels<AccessorType, 1, false>(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f));
+    default:
+      SampleFromVoxels<AccessorType, 1, false> s(acc);
+      return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f);
+  }
+}
+#endif
+
+ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float x, float y)
+{
+  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+  /* float4, byte4, ushort4 and half4 */
+  const int texture_type = info.data_type;
+  if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+      texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+    if (info.interpolation == INTERPOLATION_CUBIC) {
+      return kernel_tex_image_interp_bicubic<float4>(info, x, y);
+    }
+    else {
+      ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
+      return ccl_gpu_tex_object_read_2D<float4>(tex, x, y);
+    }
+  }
+  /* float, byte and half */
+  else {
+    float f;
+
+    if (info.interpolation == INTERPOLATION_CUBIC) {
+      f = kernel_tex_image_interp_bicubic<float>(info, x, y);
+    }
+    else {
+      ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
+      f = ccl_gpu_tex_object_read_2D<float>(tex, x, y);
+    }
+
+    return make_float4(f, f, f, 1.0f);
+  }
+}
+
+ccl_device float4 kernel_tex_image_interp_3d(const KernelGlobals *kg,
+                                             int id,
+                                             float3 P,
+                                             InterpolationType interp)
+{
+  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+  if (info.use_transform_3d) {
+    P = transform_point(&info.transform_3d, P);
+  }
+
+  const float x = P.x;
+  const float y = P.y;
+  const float z = P.z;
+
+  uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
+  const int texture_type = info.data_type;
+
+#ifdef WITH_NANOVDB
+  if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
+    float f = kernel_tex_image_interp_nanovdb<float>(info, x, y, z, interpolation);
+    return make_float4(f, f, f, 1.0f);
+  }
+  if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+    nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>(
+        info, x, y, z, interpolation);
+    return make_float4(f[0], f[1], f[2], 1.0f);
+  }
+#endif
+  if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+      texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+    if (interpolation == INTERPOLATION_CUBIC) {
+      return kernel_tex_image_interp_tricubic<float4>(info, x, y, z);
+    }
+    else {
+      ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
+      return ccl_gpu_tex_object_read_3D<float4>(tex, x, y, z);
+    }
+  }
+  else {
+    float f;
+
+    if (interpolation == INTERPOLATION_CUBIC) {
+      f = kernel_tex_image_interp_tricubic<float>(info, x, y, z);
+    }
+    else {
+      ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
+      f = ccl_gpu_tex_object_read_3D<float>(tex, x, y, z);
+    }
+
+    return make_float4(f, f, f, 1.0f);
+  }
+}
+
+CCL_NAMESPACE_END
author	Brecht Van Lommel <brecht@blender.org>	2021-09-20 18:59:20 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-09-21 15:55:54 +0300
commit	08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree	6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/device/gpu/image.h
parent	fa6b1007bad065440950cd67deb16a04f368856f (diff)