Cycles: merge of cycles-x branch, a major update to the renderer

This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
author: Brecht Van Lommel <brecht@blender.org> 2021-09-20 18:59:20 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-09-21 15:55:54 +0300
commit: 08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree: 6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernel_jitter.h
parent: fa6b1007bad065440950cd67deb16a04f368856f (diff)
1 files changed, 94 insertions, 158 deletions
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index f4e60a807f7..354e8115538 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -14,93 +14,27 @@
  * limitations under the License.
  */
 
-/* TODO(sergey): Consider moving portable ctz/clz stuff to util. */
-
+#pragma once
 CCL_NAMESPACE_BEGIN
 
-/* "Correlated Multi-Jittered Sampling"
- * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
-
-/* TODO: find good value, suggested 64 gives pattern on cornell box ceiling. */
-#define CMJ_RANDOM_OFFSET_LIMIT 4096
-
-ccl_device_inline bool cmj_is_pow2(int i)
+ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
 {
-  return (i > 1) && ((i & (i - 1)) == 0);
-}
+  x += seed;
+  x ^= (x * 0x6c50b47cu);
+  x ^= x * 0xb82f1e52u;
+  x ^= x * 0xc7afe638u;
+  x ^= x * 0x8d22f6e6u;
 
-ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
-{
-  return (a & (b - 1));
+  return x;
 }
 
-/* b must be > 1 */
-ccl_device_inline int cmj_fast_div_pow2(int a, int b)
+ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
 {
-  kernel_assert(b > 1);
-  return a >> count_trailing_zeros(b);
-}
+  x = reverse_integer_bits(x);
+  x = laine_karras_permutation(x, seed);
+  x = reverse_integer_bits(x);
 
-ccl_device_inline uint cmj_w_mask(uint w)
-{
-  kernel_assert(w > 1);
-  return ((1 << (32 - count_leading_zeros(w))) - 1);
-}
-
-ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
-{
-  uint w = l - 1;
-
-  if ((l & w) == 0) {
-    /* l is a power of two (fast) */
-    i ^= p;
-    i *= 0xe170893d;
-    i ^= p >> 16;
-    i ^= (i & w) >> 4;
-    i ^= p >> 8;
-    i *= 0x0929eb3f;
-    i ^= p >> 23;
-    i ^= (i & w) >> 1;
-    i *= 1 | p >> 27;
-    i *= 0x6935fa69;
-    i ^= (i & w) >> 11;
-    i *= 0x74dcb303;
-    i ^= (i & w) >> 2;
-    i *= 0x9e501cc3;
-    i ^= (i & w) >> 2;
-    i *= 0xc860a3df;
-    i &= w;
-    i ^= i >> 5;
-
-    return (i + p) & w;
-  }
-  else {
-    /* l is not a power of two (slow) */
-    w = cmj_w_mask(w);
-
-    do {
-      i ^= p;
-      i *= 0xe170893d;
-      i ^= p >> 16;
-      i ^= (i & w) >> 4;
-      i ^= p >> 8;
-      i *= 0x0929eb3f;
-      i ^= p >> 23;
-      i ^= (i & w) >> 1;
-      i *= 1 | p >> 27;
-      i *= 0x6935fa69;
-      i ^= (i & w) >> 11;
-      i *= 0x74dcb303;
-      i ^= (i & w) >> 2;
-      i *= 0x9e501cc3;
-      i ^= (i & w) >> 2;
-      i *= 0xc860a3df;
-      i &= w;
-      i ^= i >> 5;
-    } while (i >= l);
-
-    return (i + p) % l;
-  }
+  return x;
 }
 
 ccl_device_inline uint cmj_hash(uint i, uint p)
@@ -133,99 +67,101 @@ ccl_device_inline float cmj_randfloat(uint i, uint p)
   return cmj_hash(i, p) * (1.0f / 4294967808.0f);
 }
 
-#ifdef __CMJ__
-ccl_device float cmj_sample_1D(int s, int N, int p)
+ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
 {
-  kernel_assert(s < N);
-
-  uint x = cmj_permute(s, N, p * 0x68bc21eb);
-  float jx = cmj_randfloat(s, p * 0x967a889b);
-
-  float invN = 1.0f / N;
-  return (x + jx) * invN;
+  return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
 }
 
-/* TODO(sergey): Do some extra tests and consider moving to util_math.h. */
-ccl_device_inline int cmj_isqrt(int value)
+ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
 {
-#  if defined(__KERNEL_CUDA__)
-  return float_to_int(__fsqrt_ru(value));
-#  elif defined(__KERNEL_GPU__)
-  return float_to_int(sqrtf(value));
-#  else
-  /* This is a work around for fast-math on CPU which might replace sqrtf()
-   * with am approximated version.
-   */
-  return float_to_int(sqrtf(value) + 1e-6f);
-#  endif
-}
+  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
+   *  the x part is used as the sample (TODO(@leesonw): Add using both x and y parts
+   * independently). */
+
+  /* Perform Owen shuffle of the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
 
-ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
-{
-  kernel_assert(s < N);
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
+
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
 
-  int m = cmj_isqrt(N);
-  int n = (N - 1) / m + 1;
-  float invN = 1.0f / N;
-  float invm = 1.0f / m;
-  float invn = 1.0f / n;
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+#  else
+  /* Only jitter within the grid interval. */
+  float dx = cmj_randfloat(d, rng_hash);
+#  endif
+  fx = fx + dx * (1.0f / NUM_PMJ_SAMPLES);
+  fx = fx - floorf(fx);
 
-  s = cmj_permute(s, N, p * 0x51633e2d);
+#else
+#  warning "Not using Cranley-Patterson Rotation."
+#endif
 
-  int sdivm, smodm;
+  return fx;
+}
 
-  if (cmj_is_pow2(m)) {
-    sdivm = cmj_fast_div_pow2(s, m);
-    smodm = cmj_fast_mod_pow2(s, m);
-  }
-  else {
-    /* Doing `s * inmv` gives precision issues here. */
-    sdivm = s / m;
-    smodm = s - sdivm * m;
-  }
+ccl_device void pmj_sample_2D(
+    const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension, float *x, float *y)
+{
+  /* Perform a shuffle on the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
 
-  uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
-  uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
 
-  float jx = cmj_randfloat(s, p * 0x967a889b);
-  float jy = cmj_randfloat(s, p * 0x368cc8b7);
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
+  float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1);
 
-  *fx = (sx + (sy + jx) * invn) * invm;
-  *fy = (s + jy) * invN;
-}
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+  float dy = cmj_randfloat_simple(d + 1, rng_hash);
+#  else
+  float dx = cmj_randfloat(d, rng_hash);
+  float dy = cmj_randfloat(d + 1, rng_hash);
+#  endif
+  /* Only jitter within the grid cells. */
+  fx = fx + dx * (1.0f / NUM_PMJ_DIVISIONS);
+  fy = fy + dy * (1.0f / NUM_PMJ_DIVISIONS);
+  fx = fx - floorf(fx);
+  fy = fy - floorf(fy);
+#else
+#  warning "Not using Cranley Patterson Rotation."
 #endif
 
-ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
-{
-  /* Fallback to random */
-  if (sample >= NUM_PMJ_SAMPLES) {
-    const int p = rng_hash + dimension;
-    return cmj_randfloat(sample, p);
-  }
-  else {
-    const uint mask = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
-    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-    return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ mask) - 1.0f;
-  }
-}
-
-ccl_device float2 pmj_sample_2D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
-{
-  if (sample >= NUM_PMJ_SAMPLES) {
-    const int p = rng_hash + dimension;
-    const float fx = cmj_randfloat(sample, p);
-    const float fy = cmj_randfloat(sample, p + 1);
-    return make_float2(fx, fy);
-  }
-  else {
-    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-    const uint maskx = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
-    const uint masky = cmj_hash_simple(dimension + 1, rng_hash) & 0x007fffff;
-    const float fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ maskx) - 1.0f;
-    const float fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ masky) -
-                     1.0f;
-    return make_float2(fx, fy);
-  }
+  (*x) = fx;
+  (*y) = fy;
 }
 
 CCL_NAMESPACE_END
author	Brecht Van Lommel <brecht@blender.org>	2021-09-20 18:59:20 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-09-21 15:55:54 +0300
commit	08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree	6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernel_jitter.h
parent	fa6b1007bad065440950cd67deb16a04f368856f (diff)