diff options
Diffstat (limited to 'intern/cycles/kernel/sample')
-rw-r--r-- | intern/cycles/kernel/sample/jitter.h | 231 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/pattern.h | 122 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/sobol_burley.h | 133 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/util.h | 35 |
4 files changed, 282 insertions, 239 deletions
diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h index b8da94248a4..e748f95fc7d 100644 --- a/intern/cycles/kernel/sample/jitter.h +++ b/intern/cycles/kernel/sample/jitter.h @@ -1,182 +1,97 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ +#include "kernel/sample/util.h" +#include "util/hash.h" + #pragma once CCL_NAMESPACE_BEGIN -ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed) +ccl_device float pmj_sample_1D(KernelGlobals kg, + uint sample, + const uint rng_hash, + const uint dimension) { - x += seed; - x ^= (x * 0x6c50b47cu); - x ^= x * 0xb82f1e52u; - x ^= x * 0xc7afe638u; - x ^= x * 0x8d22f6e6u; + uint seed = rng_hash; - return x; -} + /* Use the same sample sequence seed for all pixels when using + * scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + seed = kernel_data.integrator.seed; + } -ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed) -{ - x = reverse_integer_bits(x); - x = laine_karras_permutation(x, seed); - x = reverse_integer_bits(x); + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ + const uint sample_mask = NUM_PMJ_SAMPLES - 1; + const uint sample_shuffled = nested_uniform_scramble(sample, + hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + /* Fetch the sample. */ + const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % + (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); + float x = kernel_data_fetch(sample_pattern_lut, index * 2); + + /* Do limited Cranley-Patterson rotation when using scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + x += jitter_x; + x -= floorf(x); + } return x; } -ccl_device_inline uint cmj_hash(uint i, uint p) +ccl_device float2 pmj_sample_2D(KernelGlobals kg, + uint sample, + const uint rng_hash, + const uint dimension) { - i ^= p; - i ^= i >> 17; - i ^= i >> 10; - i *= 0xb36534e5; - i ^= i >> 12; - i ^= i >> 21; - i *= 0x93fc4795; - i ^= 0xdf6e307f; - i ^= i >> 17; - i *= 1 | p >> 18; - - return i; -} - -ccl_device_inline uint cmj_hash_simple(uint i, uint p) -{ - i = (i ^ 61) ^ p; - i += i << 3; - i ^= i >> 4; - i *= 0x27d4eb2d; - return i; -} - -ccl_device_inline float cmj_randfloat(uint i, uint p) -{ - return cmj_hash(i, p) * (1.0f / 4294967808.0f); -} - -ccl_device_inline float cmj_randfloat_simple(uint i, uint p) -{ - return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF); -} + uint seed = rng_hash; -ccl_device_inline float cmj_randfloat_simple_dist(uint i, uint p, float d) -{ - return cmj_hash_simple(i, p) * (d / (float)0xFFFFFFFF); -} - -ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension) -{ - uint hash = rng_hash; - float jitter_x = 0.0f; + /* Use the same sample sequence seed for all pixels when using + * scrambling distance. */ if (kernel_data.integrator.scrambling_distance < 1.0f) { - hash = kernel_data.integrator.seed; - - jitter_x = cmj_randfloat_simple_dist( - dimension, rng_hash, kernel_data.integrator.scrambling_distance); + seed = kernel_data.integrator.seed; } - /* Perform Owen shuffle of the sample number to reorder the samples. */ -#ifdef _SIMPLE_HASH_ - const uint rv = cmj_hash_simple(dimension, hash); -#else /* Use a _REGULAR_HASH_. */ - const uint rv = cmj_hash(dimension, hash); -#endif -#ifdef _XOR_SHUFFLE_ -# warning "Using XOR shuffle." - const uint s = sample ^ rv; -#else /* Use _OWEN_SHUFFLE_ for reordering. */ - const uint s = nested_uniform_scramble(sample, rv); -#endif - - /* Based on the sample number a sample pattern is selected and offset by the dimension. */ - const uint sample_set = s / NUM_PMJ_SAMPLES; - const uint d = (dimension + sample_set); - const uint dim = d % NUM_PMJ_PATTERNS; - - /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D - * the x part is used for even dims and the y for odd. */ - int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1); - - float fx = kernel_tex_fetch(__sample_pattern_lut, index); - -#ifndef _NO_CRANLEY_PATTERSON_ROTATION_ - /* Use Cranley-Patterson rotation to displace the sample pattern. */ -# ifdef _SIMPLE_HASH_ - float dx = cmj_randfloat_simple(d, hash); -# else - float dx = cmj_randfloat(d, hash); -# endif - /* Jitter sample locations and map back into [0 1]. */ - fx = fx + dx + jitter_x; - fx = fx - floorf(fx); -#else -# warning "Not using Cranley-Patterson Rotation." -#endif - - return fx; -} - -ccl_device void pmj_sample_2D(KernelGlobals kg, - uint sample, - uint rng_hash, - uint dimension, - ccl_private float *x, - ccl_private float *y) -{ - uint hash = rng_hash; - float jitter_x = 0.0f; - float jitter_y = 0.0f; + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ + const uint sample_mask = NUM_PMJ_SAMPLES - 1; + const uint sample_shuffled = nested_uniform_scramble(sample, + hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + /* Fetch the sample. */ + const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % + (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); + float x = kernel_data_fetch(sample_pattern_lut, index * 2); + float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1); + + /* Do limited Cranley-Patterson rotation when using scrambling distance. */ if (kernel_data.integrator.scrambling_distance < 1.0f) { - hash = kernel_data.integrator.seed; - - jitter_x = cmj_randfloat_simple_dist( - dimension, rng_hash, kernel_data.integrator.scrambling_distance); - jitter_y = cmj_randfloat_simple_dist( - dimension + 1, rng_hash, kernel_data.integrator.scrambling_distance); + const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + const float jitter_y = hash_wang_seeded_float(dimension, rng_hash ^ 0xca0e1151) * + kernel_data.integrator.scrambling_distance; + x += jitter_x; + y += jitter_y; + x -= floorf(x); + y -= floorf(y); } - /* Perform a shuffle on the sample number to reorder the samples. */ -#ifdef _SIMPLE_HASH_ - const uint rv = cmj_hash_simple(dimension, hash); -#else /* Use a _REGULAR_HASH_. */ - const uint rv = cmj_hash(dimension, hash); -#endif -#ifdef _XOR_SHUFFLE_ -# warning "Using XOR shuffle." - const uint s = sample ^ rv; -#else /* Use _OWEN_SHUFFLE_ for reordering. */ - const uint s = nested_uniform_scramble(sample, rv); -#endif - - /* Based on the sample number a sample pattern is selected and offset by the dimension. */ - const uint sample_set = s / NUM_PMJ_SAMPLES; - const uint d = dimension + sample_set; - uint dim = d % NUM_PMJ_PATTERNS; - int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)); - - float fx = kernel_tex_fetch(__sample_pattern_lut, index); - float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1); - -#ifndef _NO_CRANLEY_PATTERSON_ROTATION_ - /* Use Cranley-Patterson rotation to displace the sample pattern. */ -# ifdef _SIMPLE_HASH_ - float dx = cmj_randfloat_simple(d, hash); - float dy = cmj_randfloat_simple(d + 1, hash); -# else - float dx = cmj_randfloat(d, hash); - float dy = cmj_randfloat(d + 1, hash); -# endif - /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */ - float sx = fx + dx + jitter_x; - float sy = fy + dy + jitter_y; - sx = sx - floorf(sx); - sy = sy - floorf(sy); -#else -# warning "Not using Cranley Patterson Rotation." -#endif - - (*x) = sx; - (*y) = sy; + return make_float2(x, y); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h index 1e66f39ede2..ebdecc1bff9 100644 --- a/intern/cycles/kernel/sample/pattern.h +++ b/intern/cycles/kernel/sample/pattern.h @@ -4,6 +4,7 @@ #pragma once #include "kernel/sample/jitter.h" +#include "kernel/sample/sobol_burley.h" #include "util/hash.h" CCL_NAMESPACE_BEGIN @@ -12,33 +13,6 @@ CCL_NAMESPACE_BEGIN * this single threaded on a CPU for repeatable results. */ //#define __DEBUG_CORRELATION__ -/* High Dimensional Sobol. - * - * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal - * to classic Van der Corput and Sobol sequences. */ - -#ifdef __SOBOL__ - -/* Skip initial numbers that for some dimensions have clear patterns that - * don't cover the entire sample space. Ideally we would have a better - * progressive pattern that doesn't suffer from this problem, because even - * with this offset some dimensions are quite poor. - */ -# define SOBOL_SKIP 64 - -ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension) -{ - uint result = 0; - uint i = index + SOBOL_SKIP; - for (int j = 0, x; (x = find_first_set(i)); i >>= x) { - j += x; - result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1)); - } - return result; -} - -#endif /* __SOBOL__ */ - ccl_device_forceinline float path_rng_1D(KernelGlobals kg, uint rng_hash, int sample, @@ -48,58 +22,29 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg, return (float)drand48(); #endif -#ifdef __SOBOL__ - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) -#endif - { + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) { + return sobol_burley_sample_1D(sample, dimension, rng_hash); + } + else { return pmj_sample_1D(kg, sample, rng_hash, dimension); } - -#ifdef __SOBOL__ - /* Sobol sequence value using direction vectors. */ - uint result = sobol_dimension(kg, sample, dimension); - float r = (float)result * (1.0f / (float)0xFFFFFFFF); - - /* Cranly-Patterson rotation using rng seed */ - float shift; - - /* Hash rng with dimension to solve correlation issues. - * See T38710, T50116. - */ - uint tmp_rng = cmj_hash_simple(dimension, rng_hash); - shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF); - - return r + shift - floorf(r + shift); -#endif } -ccl_device_forceinline void path_rng_2D(KernelGlobals kg, - uint rng_hash, - int sample, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg, + uint rng_hash, + int sample, + int dimension) { #ifdef __DEBUG_CORRELATION__ - *fx = (float)drand48(); - *fy = (float)drand48(); - return; -#endif - -#ifdef __SOBOL__ - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) + return make_float2((float)drand48(), (float)drand48()); #endif - { - pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy); - return; + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) { + return sobol_burley_sample_2D(sample, dimension, rng_hash); + } + else { + return pmj_sample_2D(kg, sample, rng_hash, dimension); } - -#ifdef __SOBOL__ - /* Sobol. */ - *fx = path_rng_1D(kg, rng_hash, sample, dimension); - *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1); -#endif } /** @@ -145,18 +90,33 @@ ccl_device_inline uint path_rng_hash_init(KernelGlobals kg, return rng_hash; } -ccl_device_inline bool sample_is_even(int pattern, int sample) +/** + * Splits samples into two different classes, A and B, which can be + * compared for variance estimation. + */ +ccl_device_inline bool sample_is_class_A(int pattern, int sample) { - if (pattern == SAMPLING_PATTERN_PMJ) { - /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al. - * We can use this to get divide sample sequence into two classes for easier variance - * estimation. */ - return popcount(uint(sample) & 0xaaaaaaaa) & 1; - } - else { - /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */ - return sample & 0x1; +#if 0 + if (!(pattern == SAMPLING_PATTERN_PMJ || pattern == SAMPLING_PATTERN_SOBOL_BURLEY)) { + /* Fallback: assign samples randomly. + * This is guaranteed to work "okay" for any sampler, but isn't good. + * (Note: the seed constant is just a random number to guard against + * possible interactions with other uses of the hash. There's nothing + * special about it.) + */ + return hash_hp_seeded_uint(sample, 0xa771f873) & 1; } -} +#else + (void)pattern; +#endif + /* This follows the approach from section 10.2.1 of "Progressive + * Multi-Jittered Sample Sequences" by Christensen et al., but + * implemented with efficient bit-fiddling. + * + * This approach also turns out to work equally well with Sobol-Burley + * (see https://developer.blender.org/D15746#429471). + */ + return popcount(uint(sample) & 0xaaaaaaaa) & 1; +} CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/sobol_burley.h b/intern/cycles/kernel/sample/sobol_burley.h new file mode 100644 index 00000000000..47796ae7998 --- /dev/null +++ b/intern/cycles/kernel/sample/sobol_burley.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* + * A shuffled, Owen-scrambled Sobol sampler, implemented with the + * techniques from the paper "Practical Hash-based Owen Scrambling" + * by Brent Burley, 2020, Journal of Computer Graphics Techniques. + * + * Note that unlike a standard high-dimensional Sobol sequence, this + * Sobol sampler uses padding to achieve higher dimensions, as described + * in Burley's paper. + */ + +#pragma once + +#include "kernel/sample/util.h" +#include "util/hash.h" +#include "util/math.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* + * Computes a single dimension of a sample from an Owen-scrambled + * Sobol sequence. This is used in the main sampling functions, + * sobol_burley_sample_#D(), below. + * + * - rev_bit_index: the sample index, with reversed order bits. + * - dimension: the sample dimension. + * - scramble_seed: the Owen scrambling seed. + * + * Note that the seed must be well randomized before being + * passed to this function. + */ +ccl_device_forceinline float sobol_burley(uint rev_bit_index, + const uint dimension, + const uint scramble_seed) +{ + uint result = 0; + + if (dimension == 0) { + /* Fast-path for dimension 0, which is just Van der corput. + * This makes a notable difference in performance since we reuse + * dimensions for padding, and dimension 0 is reused the most. */ + result = reverse_integer_bits(rev_bit_index); + } + else { + uint i = 0; + while (rev_bit_index != 0) { + uint j = count_leading_zeros(rev_bit_index); + result ^= sobol_burley_table[dimension][i + j]; + i += j + 1; + + /* We can't do "<<= j + 1" because that can overflow the shift + * operator, which doesn't do what we need on at least x86. */ + rev_bit_index <<= j; + rev_bit_index <<= 1; + } + } + + /* Apply Owen scrambling. */ + result = reverse_integer_bits(reversed_bit_owen(result, scramble_seed)); + + return uint_to_float_excl(result); +} + +/* + * Computes a 1D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float sobol_burley_sample_1D(uint index, uint const dimension, uint seed) +{ + /* Include the dimension in the seed, so we get decorrelated + * sequences for different dimensions via shuffling. */ + seed ^= hash_hp_uint(dimension); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe); + + return sobol_burley(index, 0, seed ^ 0x635c77bd); +} + +/* + * Computes a 2D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, uint seed) +{ + /* Include the dimension set in the seed, so we get decorrelated + * sequences for different dimension sets via shuffling. */ + seed ^= hash_hp_uint(dimension_set); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a); + + return make_float2(sobol_burley(index, 0, seed ^ 0xe0aaaf76), + sobol_burley(index, 1, seed ^ 0x94964d4e)); +} + +/* + * Computes a 3D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, uint seed) +{ + /* Include the dimension set in the seed, so we get decorrelated + * sequences for different dimension sets via shuffling. */ + seed ^= hash_hp_uint(dimension_set); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac); + + return make_float3(sobol_burley(index, 0, seed ^ 0x9e78e391), + sobol_burley(index, 1, seed ^ 0x67c33241), + sobol_burley(index, 2, seed ^ 0x78c395c5)); +} + +/* + * Computes a 4D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float4 sobol_burley_sample_4D(uint index, const uint dimension_set, uint seed) +{ + /* Include the dimension set in the seed, so we get decorrelated + * sequences for different dimension sets via shuffling. */ + seed ^= hash_hp_uint(dimension_set); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055); + + return make_float4(sobol_burley(index, 0, seed ^ 0x39468210), + sobol_burley(index, 1, seed ^ 0xe9d8a845), + sobol_burley(index, 2, seed ^ 0x5f32b482), + sobol_burley(index, 3, seed ^ 0x1524cc56)); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/util.h b/intern/cycles/kernel/sample/util.h new file mode 100644 index 00000000000..29cda179aa2 --- /dev/null +++ b/intern/cycles/kernel/sample/util.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* + * Performs base-2 Owen scrambling on a reversed-bit unsigned integer. + * + * This is equivalent to the Laine-Karras permutation, but much higher + * quality. See https://psychopath.io/post/2021_01_30_building_a_better_lk_hash + */ +ccl_device_inline uint reversed_bit_owen(uint n, uint seed) +{ + n ^= n * 0x3d20adea; + n += seed; + n *= (seed >> 16) | 1; + n ^= n * 0x05526c56; + n ^= n * 0x53a22864; + + return n; +} + +/* + * Performs base-2 Owen scrambling on an unsigned integer. + */ +ccl_device_inline uint nested_uniform_scramble(uint i, uint seed) +{ + return reverse_integer_bits(reversed_bit_owen(reverse_integer_bits(i), seed)); +} + +CCL_NAMESPACE_END |