diff options
author | Nathan Vegdahl <cessen> | 2022-08-23 21:48:48 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-09-01 15:57:39 +0300 |
commit | 50df9caef01a4225db216d9c4c0515134f7a37bf (patch) | |
tree | e6632fc669d7d5a9d084b2ad33764810b286e156 /intern/cycles/kernel | |
parent | ba1bf87bd8f13fa2c67c435eb4a31a0c898d65ac (diff) |
Cycles: improve Progressive Multi-Jittered sampling
Fix two issues in the previous implementation:
* Only power-of-two prefixes were progressively stratified, not suffixes.
This resulted in unnecessarily increased noise when using non-power-of-two
sample counts.
* In order to try to get away with just a single sample pattern, the code
used a combination of sample index shuffling and Cranley-Patterson rotation.
Index shuffling is normally fine, but due to the sample patterns themselves
not being quite right (as described above) this actually resulted in
additional increased noise. Cranley-Patterson, on the other hand, always
increases noise with randomized (t,s) nets like PMJ02, and should be avoided
with these kinds of sequences.
Addressed with the following changes:
* Replace the sample pattern generation code with a much simpler algorithm
recently published in the paper "Stochastic Generation of (t, s) Sample
Sequences". This new implementation is easier to verify, produces fully
progressively stratified PMJ02, and is *far* faster than the previous code,
being O(N) in the number of samples generated.
* It keeps the sample index shuffling, which works correctly now due to the
improved sample patterns. But it now uses a newer high-quality hash instead
of the original Laine-Karras hash.
* The scrambling distance feature cannot (to my knowledge) be implemented with
any decorrelation strategy other than Cranley-Patterson, so Cranley-Patterson
is still used when that feature is enabled. But it is now disabled otherwise,
since it increases noise.
* In place of Cranley-Patterson, multiple independent patterns are generated
and randomly chosen for different pixels and dimensions as described in the
original PMJ paper. In this patch, the pattern selection is done via
hash-based shuffling to ensure there are no repeats within a single pixel
until all patterns have been used.
The combination of these fixes brings the quality of Cycles' PMJ sampler in
line with the previously submitted Sobol-Burley sampler in D15679. They are
essentially indistinguishable in terms of quality/noise, which is expected
since they are both randomized (0,2) sequences.
Differential Revision: https://developer.blender.org/D15746
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/integrator/subsurface_random_walk.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/jitter.h | 147 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/util.h | 18 | ||||
-rw-r--r-- | intern/cycles/kernel/types.h | 8 |
4 files changed, 71 insertions, 104 deletions
diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index baca0d745e8..e0c69c96fc6 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -229,7 +229,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); /* Modify state for RNGs, decorrelated from other paths. */ - rng_state.rng_hash = hash_cmj_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); /* Random walk until we hit the surface again. */ bool hit = false; diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h index dd170cf2120..6a9ff1beec5 100644 --- a/intern/cycles/kernel/sample/jitter.h +++ b/intern/cycles/kernel/sample/jitter.h @@ -7,57 +7,40 @@ #pragma once CCL_NAMESPACE_BEGIN -ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed) -{ - x = reverse_integer_bits(x); - x = laine_karras_permutation(x, seed); - x = reverse_integer_bits(x); - - return x; -} - ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension) { - uint hash = rng_hash; - float jitter_x = 0.0f; - if (kernel_data.integrator.scrambling_distance < 1.0f) { - hash = kernel_data.integrator.seed; + uint seed = rng_hash; - jitter_x = hash_wang_seeded_float(dimension, rng_hash) * - kernel_data.integrator.scrambling_distance; + /* Use the same sample sequence seed for all pixels when using + * scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + seed = kernel_data.integrator.seed; } - /* Perform Owen shuffle of the sample number to reorder the samples. */ - const uint rv = hash_cmj_seeded_uint(dimension, hash); -#ifdef _XOR_SHUFFLE_ -# warning "Using XOR shuffle." - const uint s = sample ^ rv; -#else /* Use _OWEN_SHUFFLE_ for reordering. */ - const uint s = nested_uniform_scramble(sample, rv); -#endif - - /* Based on the sample number a sample pattern is selected and offset by the dimension. */ - const uint sample_set = s / NUM_PMJ_SAMPLES; - const uint d = (dimension + sample_set); - const uint dim = d % NUM_PMJ_PATTERNS; - - /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D - * the x part is used for even dims and the y for odd. */ - int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1); - - float fx = kernel_data_fetch(sample_pattern_lut, index); - -#ifndef _NO_CRANLEY_PATTERSON_ROTATION_ - /* Use Cranley-Patterson rotation to displace the sample pattern. */ - float dx = hash_cmj_seeded_float(d, hash); - /* Jitter sample locations and map back into [0 1]. */ - fx = fx + dx + jitter_x; - fx = fx - floorf(fx); -#else -# warning "Not using Cranley-Patterson Rotation." -#endif + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ + uint sample_mask = NUM_PMJ_SAMPLES - 1; + uint sample_shuffled = nested_uniform_scramble(sample, hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + /* Fetch the sample. */ + uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); + float x = kernel_data_fetch(sample_pattern_lut, index * 2); + + /* Do limited Cranley-Patterson rotation when using scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + float jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + x += jitter_x; + x -= floorf(x); + } - return fx; + return x; } ccl_device void pmj_sample_2D(KernelGlobals kg, @@ -67,51 +50,41 @@ ccl_device void pmj_sample_2D(KernelGlobals kg, ccl_private float *x, ccl_private float *y) { - uint hash = rng_hash; - float jitter_x = 0.0f; - float jitter_y = 0.0f; - if (kernel_data.integrator.scrambling_distance < 1.0f) { - hash = kernel_data.integrator.seed; + uint seed = rng_hash; - jitter_x = hash_wang_seeded_float(dimension, rng_hash) * - kernel_data.integrator.scrambling_distance; - jitter_y = hash_wang_seeded_float(dimension + 1, rng_hash) * - kernel_data.integrator.scrambling_distance; + /* Use the same sample sequence seed for all pixels when using + * scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + seed = kernel_data.integrator.seed; } - /* Perform a shuffle on the sample number to reorder the samples. */ - const uint rv = hash_cmj_seeded_uint(dimension, hash); -#ifdef _XOR_SHUFFLE_ -# warning "Using XOR shuffle." - const uint s = sample ^ rv; -#else /* Use _OWEN_SHUFFLE_ for reordering. */ - const uint s = nested_uniform_scramble(sample, rv); -#endif - - /* Based on the sample number a sample pattern is selected and offset by the dimension. */ - const uint sample_set = s / NUM_PMJ_SAMPLES; - const uint d = dimension + sample_set; - uint dim = d % NUM_PMJ_PATTERNS; - int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)); - - float fx = kernel_data_fetch(sample_pattern_lut, index); - float fy = kernel_data_fetch(sample_pattern_lut, index + 1); - -#ifndef _NO_CRANLEY_PATTERSON_ROTATION_ - /* Use Cranley-Patterson rotation to displace the sample pattern. */ - float dx = hash_cmj_seeded_float(d, hash); - float dy = hash_cmj_seeded_float(d + 1, hash); - /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */ - float sx = fx + dx + jitter_x; - float sy = fy + dy + jitter_y; - sx = sx - floorf(sx); - sy = sy - floorf(sy); -#else -# warning "Not using Cranley Patterson Rotation." -#endif - - (*x) = sx; - (*y) = sy; + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ + uint sample_mask = NUM_PMJ_SAMPLES - 1; + uint sample_shuffled = nested_uniform_scramble(sample, hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + /* Fetch the sample. */ + uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); + (*x) = kernel_data_fetch(sample_pattern_lut, index * 2); + (*y) = kernel_data_fetch(sample_pattern_lut, index * 2 + 1); + + /* Do limited Cranley-Patterson rotation when using scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + float jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + float jitter_y = hash_wang_seeded_float(dimension, rng_hash ^ 0xca0e1151) * + kernel_data.integrator.scrambling_distance; + (*x) += jitter_x; + (*y) += jitter_y; + (*x) -= floorf(*x); + (*y) -= floorf(*y); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/util.h b/intern/cycles/kernel/sample/util.h index 33056bb7819..29cda179aa2 100644 --- a/intern/cycles/kernel/sample/util.h +++ b/intern/cycles/kernel/sample/util.h @@ -8,7 +8,7 @@ CCL_NAMESPACE_BEGIN /* - * Performs base-2 Owen scrambling on a reversed-bit integer. + * Performs base-2 Owen scrambling on a reversed-bit unsigned integer. * * This is equivalent to the Laine-Karras permutation, but much higher * quality. See https://psychopath.io/post/2021_01_30_building_a_better_lk_hash @@ -25,21 +25,11 @@ ccl_device_inline uint reversed_bit_owen(uint n, uint seed) } /* - * Performs base-2 Owen scrambling on a reversed-bit integer. - * - * This is here for backwards-compatibility, and can be replaced - * with reversed_bit_owen() above at some point. - * See https://developer.blender.org/D15679#426304 + * Performs base-2 Owen scrambling on an unsigned integer. */ -ccl_device_inline uint laine_karras_permutation(uint x, uint seed) +ccl_device_inline uint nested_uniform_scramble(uint i, uint seed) { - x += seed; - x ^= (x * 0x6c50b47cu); - x ^= x * 0xb82f1e52u; - x ^= x * 0xc7afe638u; - x ^= x * 0x8d22f6e6u; - - return x; + return reverse_integer_bits(reversed_bit_owen(reverse_integer_bits(i), seed)); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index f55ace1a227..655c9c5503b 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1364,10 +1364,14 @@ typedef struct KernelShaderEvalInput { } KernelShaderEvalInput; static_assert_align(KernelShaderEvalInput, 16); -/* Pre-computed sample table sizes for PMJ02 sampler. */ +/* Pre-computed sample table sizes for PMJ02 sampler. + * + * Note: divisions *must* be a power of two, and patterns + * ideally should be as well. + */ #define NUM_PMJ_DIVISIONS 32 #define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS)) -#define NUM_PMJ_PATTERNS 1 +#define NUM_PMJ_PATTERNS 64 /* Device kernels. * |