1 files changed, 94 insertions, 158 deletions
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index f4e60a807f7..354e8115538 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -14,93 +14,27 @@
  * limitations under the License.
  */
 
-/* TODO(sergey): Consider moving portable ctz/clz stuff to util. */
-
+#pragma once
 CCL_NAMESPACE_BEGIN
 
-/* "Correlated Multi-Jittered Sampling"
- * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
-
-/* TODO: find good value, suggested 64 gives pattern on cornell box ceiling. */
-#define CMJ_RANDOM_OFFSET_LIMIT 4096
-
-ccl_device_inline bool cmj_is_pow2(int i)
+ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
 {
-  return (i > 1) && ((i & (i - 1)) == 0);
-}
+  x += seed;
+  x ^= (x * 0x6c50b47cu);
+  x ^= x * 0xb82f1e52u;
+  x ^= x * 0xc7afe638u;
+  x ^= x * 0x8d22f6e6u;
 
-ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
-{
-  return (a & (b - 1));
+  return x;
 }
 
-/* b must be > 1 */
-ccl_device_inline int cmj_fast_div_pow2(int a, int b)
+ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
 {
-  kernel_assert(b > 1);
-  return a >> count_trailing_zeros(b);
-}
+  x = reverse_integer_bits(x);
+  x = laine_karras_permutation(x, seed);
+  x = reverse_integer_bits(x);
 
-ccl_device_inline uint cmj_w_mask(uint w)
-{
-  kernel_assert(w > 1);
-  return ((1 << (32 - count_leading_zeros(w))) - 1);
-}
-
-ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
-{
-  uint w = l - 1;
-
-  if ((l & w) == 0) {
-    /* l is a power of two (fast) */
-    i ^= p;
-    i *= 0xe170893d;
-    i ^= p >> 16;
-    i ^= (i & w) >> 4;
-    i ^= p >> 8;
-    i *= 0x0929eb3f;
-    i ^= p >> 23;
-    i ^= (i & w) >> 1;
-    i *= 1 | p >> 27;
-    i *= 0x6935fa69;
-    i ^= (i & w) >> 11;
-    i *= 0x74dcb303;
-    i ^= (i & w) >> 2;
-    i *= 0x9e501cc3;
-    i ^= (i & w) >> 2;
-    i *= 0xc860a3df;
-    i &= w;
-    i ^= i >> 5;
-
-    return (i + p) & w;
-  }
-  else {
-    /* l is not a power of two (slow) */
-    w = cmj_w_mask(w);
-
-    do {
-      i ^= p;
-      i *= 0xe170893d;
-      i ^= p >> 16;
-      i ^= (i & w) >> 4;
-      i ^= p >> 8;
-      i *= 0x0929eb3f;
-      i ^= p >> 23;
-      i ^= (i & w) >> 1;
-      i *= 1 | p >> 27;
-      i *= 0x6935fa69;
-      i ^= (i & w) >> 11;
-      i *= 0x74dcb303;
-      i ^= (i & w) >> 2;
-      i *= 0x9e501cc3;
-      i ^= (i & w) >> 2;
-      i *= 0xc860a3df;
-      i &= w;
-      i ^= i >> 5;
-    } while (i >= l);
-
-    return (i + p) % l;
-  }
+  return x;
 }
 
 ccl_device_inline uint cmj_hash(uint i, uint p)
@@ -133,99 +67,101 @@ ccl_device_inline float cmj_randfloat(uint i, uint p)
   return cmj_hash(i, p) * (1.0f / 4294967808.0f);
 }
 
-#ifdef __CMJ__
-ccl_device float cmj_sample_1D(int s, int N, int p)
+ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
 {
-  kernel_assert(s < N);
-
-  uint x = cmj_permute(s, N, p * 0x68bc21eb);
-  float jx = cmj_randfloat(s, p * 0x967a889b);
-
-  float invN = 1.0f / N;
-  return (x + jx) * invN;
+  return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
 }
 
-/* TODO(sergey): Do some extra tests and consider moving to util_math.h. */
-ccl_device_inline int cmj_isqrt(int value)
+ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
 {
-#  if defined(__KERNEL_CUDA__)
-  return float_to_int(__fsqrt_ru(value));
-#  elif defined(__KERNEL_GPU__)
-  return float_to_int(sqrtf(value));
-#  else
-  /* This is a work around for fast-math on CPU which might replace sqrtf()
-   * with am approximated version.
-   */
-  return float_to_int(sqrtf(value) + 1e-6f);
-#  endif
-}
+  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
+   *  the x part is used as the sample (TODO(@leesonw): Add using both x and y parts
+   * independently). */
+
+  /* Perform Owen shuffle of the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
 
-ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
-{
-  kernel_assert(s < N);
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
+
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
 
-  int m = cmj_isqrt(N);
-  int n = (N - 1) / m + 1;
-  float invN = 1.0f / N;
-  float invm = 1.0f / m;
-  float invn = 1.0f / n;
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+#  else
+  /* Only jitter within the grid interval. */
+  float dx = cmj_randfloat(d, rng_hash);
+#  endif
+  fx = fx + dx * (1.0f / NUM_PMJ_SAMPLES);
+  fx = fx - floorf(fx);
 
-  s = cmj_permute(s, N, p * 0x51633e2d);
+#else
+#  warning "Not using Cranley-Patterson Rotation."
+#endif
 
-  int sdivm, smodm;
+  return fx;
+}
 
-  if (cmj_is_pow2(m)) {
-    sdivm = cmj_fast_div_pow2(s, m);
-    smodm = cmj_fast_mod_pow2(s, m);
-  }
-  else {
-    /* Doing `s * inmv` gives precision issues here. */
-    sdivm = s / m;
-    smodm = s - sdivm * m;
-  }
+ccl_device void pmj_sample_2D(
+    const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension, float *x, float *y)
+{
+  /* Perform a shuffle on the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
 
-  uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
-  uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
 
-  float jx = cmj_randfloat(s, p * 0x967a889b);
-  float jy = cmj_randfloat(s, p * 0x368cc8b7);
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
+  float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1);
 
-  *fx = (sx + (sy + jx) * invn) * invm;
-  *fy = (s + jy) * invN;
-}
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+  float dy = cmj_randfloat_simple(d + 1, rng_hash);
+#  else
+  float dx = cmj_randfloat(d, rng_hash);
+  float dy = cmj_randfloat(d + 1, rng_hash);
+#  endif
+  /* Only jitter within the grid cells. */
+  fx = fx + dx * (1.0f / NUM_PMJ_DIVISIONS);
+  fy = fy + dy * (1.0f / NUM_PMJ_DIVISIONS);
+  fx = fx - floorf(fx);
+  fy = fy - floorf(fy);
+#else
+#  warning "Not using Cranley Patterson Rotation."
 #endif
 
-ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
-{
-  /* Fallback to random */
-  if (sample >= NUM_PMJ_SAMPLES) {
-    const int p = rng_hash + dimension;
-    return cmj_randfloat(sample, p);
-  }
-  else {
-    const uint mask = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
-    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-    return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ mask) - 1.0f;
-  }
-}
-
-ccl_device float2 pmj_sample_2D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
-{
-  if (sample >= NUM_PMJ_SAMPLES) {
-    const int p = rng_hash + dimension;
-    const float fx = cmj_randfloat(sample, p);
-    const float fy = cmj_randfloat(sample, p + 1);
-    return make_float2(fx, fy);
-  }
-  else {
-    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-    const uint maskx = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
-    const uint masky = cmj_hash_simple(dimension + 1, rng_hash) & 0x007fffff;
-    const float fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ maskx) - 1.0f;
-    const float fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ masky) -
-                     1.0f;
-    return make_float2(fx, fy);
-  }
+  (*x) = fx;
+  (*y) = fy;
 }
 
 CCL_NAMESPACE_END