From 50df9caef01a4225db216d9c4c0515134f7a37bf Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen>
Date: Tue, 23 Aug 2022 20:48:48 +0200
Subject: Cycles: improve Progressive Multi-Jittered sampling

Fix two issues in the previous implementation:
* Only power-of-two prefixes were progressively stratified, not suffixes.
  This resulted in unnecessarily increased noise when using non-power-of-two
  sample counts.
* In order to try to get away with just a single sample pattern, the code
  used a combination of sample index shuffling and Cranley-Patterson rotation.
  Index shuffling is normally fine, but due to the sample patterns themselves
  not being quite right (as described above) this actually resulted in
  additional increased noise. Cranley-Patterson, on the other hand, always
  increases noise with randomized (t,s) nets like PMJ02, and should be avoided
  with these kinds of sequences.

Addressed with the following changes:
* Replace the sample pattern generation code with a much simpler algorithm
  recently published in the paper "Stochastic Generation of (t, s) Sample
  Sequences". This new implementation is easier to verify, produces fully
  progressively stratified PMJ02, and is *far* faster than the previous code,
  being O(N) in the number of samples generated.
* It keeps the sample index shuffling, which works correctly now due to the
  improved sample patterns. But it now uses a newer high-quality hash instead
  of the original Laine-Karras hash.
* The scrambling distance feature cannot (to my knowledge) be implemented with
  any decorrelation strategy other than Cranley-Patterson, so Cranley-Patterson
  is still used when that feature is enabled. But it is now disabled otherwise,
  since it increases noise.
* In place of Cranley-Patterson, multiple independent patterns are generated
  and randomly chosen for different pixels and dimensions as described in the
  original PMJ paper. In this patch, the pattern selection is done via
  hash-based shuffling to ensure there are no repeats within a single pixel
  until all patterns have been used.

The combination of these fixes brings the quality of Cycles' PMJ sampler in
line with the previously submitted Sobol-Burley sampler in D15679. They are
essentially indistinguishable in terms of quality/noise, which is expected
since they are both randomized (0,2) sequences.

Differential Revision: https://developer.blender.org/D15746
---
 intern/cycles/util/hash.h | 91 +++++++++++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 34 deletions(-)

(limited to 'intern/cycles/util')
diff --git a/intern/cycles/util/hash.h b/intern/cycles/util/hash.h
index 351b8796be7..4f83f331229 100644
--- a/intern/cycles/util/hash.h
+++ b/intern/cycles/util/hash.h
@@ -4,6 +4,7 @@
 #ifndef __UTIL_HASH_H__
 #define __UTIL_HASH_H__
 
+#include "util/math.h"
 #include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
@@ -407,42 +408,16 @@ ccl_device_inline uint hash_hp_seeded_uint(uint i, uint seed)
   return hash_hp_uint(i ^ seed);
 }
 
-/* Outputs [0.0, 1.0]. */
-ccl_device_inline float hash_hp_seeded_float(uint i, uint seed)
+/* Outputs [0.0, 1.0). */
+ccl_device_inline float hash_hp_float(uint i)
 {
-  return uint_to_float_incl(hash_hp_seeded_uint(i, seed));
+  return uint_to_float_excl(hash_hp_uint(i));
 }
 
-/* ***** CMJ Hash Functions *****
- *
- * These are based on one of the hash functions in the paper
- * "Correlated Multi-Jittered Sampling" by Andrew Kensler, 2013.
- *
- * These are here for backwards-compatibility, and can be replaced
- * by the Hash Prospector hashes above at some point.
- * See https://developer.blender.org/D15679#426304
- */
-
-ccl_device_inline uint hash_cmj_seeded_uint(uint i, uint seed)
-{
-  i ^= seed;
-  i ^= i >> 17;
-  i ^= i >> 10;
-  i *= 0xb36534e5;
-  i ^= i >> 12;
-  i ^= i >> 21;
-  i *= 0x93fc4795;
-  i ^= 0xdf6e307f;
-  i ^= i >> 17;
-  i *= 1 | seed >> 18;
-
-  return i;
-}
-
-/* Outputs [0.0, 1.0]. */
-ccl_device_inline float hash_cmj_seeded_float(uint i, uint seed)
+/* Outputs [0.0, 1.0). */
+ccl_device_inline float hash_hp_seeded_float(uint i, uint seed)
 {
-  return uint_to_float_excl(hash_cmj_seeded_uint(i, seed));
+  return uint_to_float_excl(hash_hp_seeded_uint(i, seed));
 }
 
 /* ***** Modified Wang Hash Functions *****
@@ -463,10 +438,58 @@ ccl_device_inline uint hash_wang_seeded_uint(uint i, uint seed)
   return i;
 }
 
-/* Outputs [0.0, 1.0]. */
+/* Outputs [0.0, 1.0). */
 ccl_device_inline float hash_wang_seeded_float(uint i, uint seed)
 {
-  return uint_to_float_incl(hash_wang_seeded_uint(i, seed));
+  return uint_to_float_excl(hash_wang_seeded_uint(i, seed));
+}
+
+/* ***** Index Shuffling Hash Function *****
+ *
+ * This function takes an index, the length of the thing the index points
+ * into, and returns a shuffled index.  For example, if you pass indices
+ * 0 through 19 to this function with a length parameter of 20, it will
+ * return the indices in a shuffled order with no repeats.  Indices
+ * larger than the length parameter will simply repeat the same shuffled
+ * pattern over and over.
+ *
+ * This is useful for iterating over an array in random shuffled order
+ * without having to shuffle the array itself.
+ *
+ * Passing different seeds results in different random shuffles.
+ *
+ * This function runs in average O(1) time.
+ *
+ * See https://andrew-helmer.github.io/permute/ for details on how this
+ * works.
+ */
+ccl_device_inline uint hash_shuffle_uint(uint i, uint length, uint seed)
+{
+  i = i % length;
+  uint mask = (1 << (32 - count_leading_zeros(length - 1))) - 1;
+
+  do {
+    i ^= seed;
+    i *= 0xe170893d;
+    i ^= seed >> 16;
+    i ^= (i & mask) >> 4;
+    i ^= seed >> 8;
+    i *= 0x0929eb3f;
+    i ^= seed >> 23;
+    i ^= (i & mask) >> 1;
+    i *= 1 | seed >> 27;
+    i *= 0x6935fa69;
+    i ^= (i & mask) >> 11;
+    i *= 0x74dcb303;
+    i ^= (i & mask) >> 2;
+    i *= 0x9e501cc3;
+    i ^= (i & mask) >> 2;
+    i *= 0xc860a3df;
+    i &= mask;
+    i ^= i >> 5;
+  } while (i >= length);
+
+  return i;
 }
 
 /* ********** */
-- 
cgit v1.2.3