diff options
-rw-r--r-- | intern/cycles/blender/addon/properties.py | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/path_state.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/subsurface_random_walk.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/jitter.h | 93 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/pattern.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/sobol_burley.h | 143 | ||||
-rw-r--r-- | intern/cycles/kernel/sample/util.h | 45 | ||||
-rw-r--r-- | intern/cycles/kernel/tables.h | 53 | ||||
-rw-r--r-- | intern/cycles/kernel/types.h | 1 | ||||
-rw-r--r-- | intern/cycles/scene/integrator.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/util/hash.h | 120 |
12 files changed, 393 insertions, 88 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 2c926893f9d..859560c8062 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -83,6 +83,7 @@ enum_use_layer_samples = ( enum_sampling_pattern = ( ('SOBOL', "Sobol", "Use Sobol random sampling pattern", 0), ('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1), + ('SOBOL_BURLEY', "Sobol-Burley", "Use Sobol-Burley random sampling pattern", 2), ) enum_volume_sampling = ( diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index fbc30234dac..c7dcc928c0d 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -274,6 +274,8 @@ set(SRC_KERNEL_SAMPLE_HEADERS sample/mapping.h sample/mis.h sample/pattern.h + sample/sobol_burley.h + sample/util.h ) set(SRC_KERNEL_UTIL_HEADERS diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index 5ec94b934ca..a41e922b593 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -321,8 +321,10 @@ ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg, /* Use a hash instead of dimension, this is not great but avoids adding * more dimensions to each bounce which reduces quality of dimensions we * are already using. */ - return path_rng_1D( - kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset); + return path_rng_1D(kg, + hash_wang_seeded_uint(rng_state->rng_hash, hash), + rng_state->sample, + rng_state->rng_offset); } ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index 9c67d909bd4..baca0d745e8 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -229,7 +229,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); /* Modify state for RNGs, decorrelated from other paths. */ - rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + rng_state.rng_hash = hash_cmj_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); /* Random walk until we hit the surface again. */ bool hit = false; diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h index b5cfa624406..dd170cf2120 100644 --- a/intern/cycles/kernel/sample/jitter.h +++ b/intern/cycles/kernel/sample/jitter.h @@ -1,20 +1,12 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ +#include "kernel/sample/util.h" +#include "util/hash.h" + #pragma once CCL_NAMESPACE_BEGIN -ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed) -{ - x += seed; - x ^= (x * 0x6c50b47cu); - x ^= x * 0xb82f1e52u; - x ^= x * 0xc7afe638u; - x ^= x * 0x8d22f6e6u; - - return x; -} - ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed) { x = reverse_integer_bits(x); @@ -24,46 +16,6 @@ ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed) return x; } -ccl_device_inline uint cmj_hash(uint i, uint p) -{ - i ^= p; - i ^= i >> 17; - i ^= i >> 10; - i *= 0xb36534e5; - i ^= i >> 12; - i ^= i >> 21; - i *= 0x93fc4795; - i ^= 0xdf6e307f; - i ^= i >> 17; - i *= 1 | p >> 18; - - return i; -} - -ccl_device_inline uint cmj_hash_simple(uint i, uint p) -{ - i = (i ^ 61) ^ p; - i += i << 3; - i ^= i >> 4; - i *= 0x27d4eb2d; - return i; -} - -ccl_device_inline float cmj_randfloat(uint i, uint p) -{ - return cmj_hash(i, p) * (1.0f / 4294967808.0f); -} - -ccl_device_inline float cmj_randfloat_simple(uint i, uint p) -{ - return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF); -} - -ccl_device_inline float cmj_randfloat_simple_dist(uint i, uint p, float d) -{ - return cmj_hash_simple(i, p) * (d / (float)0xFFFFFFFF); -} - ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension) { uint hash = rng_hash; @@ -71,16 +23,12 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uin if (kernel_data.integrator.scrambling_distance < 1.0f) { hash = kernel_data.integrator.seed; - jitter_x = cmj_randfloat_simple_dist( - dimension, rng_hash, kernel_data.integrator.scrambling_distance); + jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; } /* Perform Owen shuffle of the sample number to reorder the samples. */ -#ifdef _SIMPLE_HASH_ - const uint rv = cmj_hash_simple(dimension, hash); -#else /* Use a _REGULAR_HASH_. */ - const uint rv = cmj_hash(dimension, hash); -#endif + const uint rv = hash_cmj_seeded_uint(dimension, hash); #ifdef _XOR_SHUFFLE_ # warning "Using XOR shuffle." const uint s = sample ^ rv; @@ -101,11 +49,7 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uin #ifndef _NO_CRANLEY_PATTERSON_ROTATION_ /* Use Cranley-Patterson rotation to displace the sample pattern. */ -# ifdef _SIMPLE_HASH_ - float dx = cmj_randfloat_simple(d, hash); -# else - float dx = cmj_randfloat(d, hash); -# endif + float dx = hash_cmj_seeded_float(d, hash); /* Jitter sample locations and map back into [0 1]. */ fx = fx + dx + jitter_x; fx = fx - floorf(fx); @@ -129,18 +73,14 @@ ccl_device void pmj_sample_2D(KernelGlobals kg, if (kernel_data.integrator.scrambling_distance < 1.0f) { hash = kernel_data.integrator.seed; - jitter_x = cmj_randfloat_simple_dist( - dimension, rng_hash, kernel_data.integrator.scrambling_distance); - jitter_y = cmj_randfloat_simple_dist( - dimension + 1, rng_hash, kernel_data.integrator.scrambling_distance); + jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + jitter_y = hash_wang_seeded_float(dimension + 1, rng_hash) * + kernel_data.integrator.scrambling_distance; } /* Perform a shuffle on the sample number to reorder the samples. */ -#ifdef _SIMPLE_HASH_ - const uint rv = cmj_hash_simple(dimension, hash); -#else /* Use a _REGULAR_HASH_. */ - const uint rv = cmj_hash(dimension, hash); -#endif + const uint rv = hash_cmj_seeded_uint(dimension, hash); #ifdef _XOR_SHUFFLE_ # warning "Using XOR shuffle." const uint s = sample ^ rv; @@ -159,13 +99,8 @@ ccl_device void pmj_sample_2D(KernelGlobals kg, #ifndef _NO_CRANLEY_PATTERSON_ROTATION_ /* Use Cranley-Patterson rotation to displace the sample pattern. */ -# ifdef _SIMPLE_HASH_ - float dx = cmj_randfloat_simple(d, hash); - float dy = cmj_randfloat_simple(d + 1, hash); -# else - float dx = cmj_randfloat(d, hash); - float dy = cmj_randfloat(d + 1, hash); -# endif + float dx = hash_cmj_seeded_float(d, hash); + float dy = hash_cmj_seeded_float(d + 1, hash); /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */ float sx = fx + dx + jitter_x; float sy = fy + dy + jitter_y; diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h index 89500d51872..e8c3acb5cf7 100644 --- a/intern/cycles/kernel/sample/pattern.h +++ b/intern/cycles/kernel/sample/pattern.h @@ -4,6 +4,7 @@ #pragma once #include "kernel/sample/jitter.h" +#include "kernel/sample/sobol_burley.h" #include "util/hash.h" CCL_NAMESPACE_BEGIN @@ -48,6 +49,10 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg, return (float)drand48(); #endif + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) { + return sobol_burley_sample_1D(sample, dimension, rng_hash); + } + #ifdef __SOBOL__ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) #endif @@ -66,7 +71,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg, /* Hash rng with dimension to solve correlation issues. * See T38710, T50116. */ - uint tmp_rng = cmj_hash_simple(dimension, rng_hash); + uint tmp_rng = hash_wang_seeded_uint(dimension, rng_hash); shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF); return r + shift - floorf(r + shift); @@ -86,6 +91,11 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals kg, return; #endif + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) { + sobol_burley_sample_2D(sample, dimension, rng_hash, fx, fy); + return; + } + #ifdef __SOBOL__ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) #endif diff --git a/intern/cycles/kernel/sample/sobol_burley.h b/intern/cycles/kernel/sample/sobol_burley.h new file mode 100644 index 00000000000..4e041aa075e --- /dev/null +++ b/intern/cycles/kernel/sample/sobol_burley.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* + * A shuffled, Owen-scrambled Sobol sampler, implemented with the + * techniques from the paper "Practical Hash-based Owen Scrambling" + * by Brent Burley, 2020, Journal of Computer Graphics Techniques. + * + * Note that unlike a standard high-dimensional Sobol sequence, this + * Sobol sampler uses padding to achieve higher dimensions, as described + * in Burley's paper. + */ + +#pragma once + +#include "kernel/sample/util.h" +#include "util/hash.h" +#include "util/math.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* + * Computes a single dimension of a sample from an Owen-scrambled + * Sobol sequence. This is used in the main sampling functions, + * sobol_burley_sample_#D(), below. + * + * - rev_bit_index: the sample index, with reversed order bits. + * - dimension: the sample dimension. + * - scramble_seed: the Owen scrambling seed. + * + * Note that the seed must be well randomized before being + * passed to this function. + */ +ccl_device_forceinline float sobol_burley(uint rev_bit_index, uint dimension, uint scramble_seed) +{ + uint result = 0; + + if (dimension == 0) { + // Fast-path for dimension 0, which is just Van der corput. + // This makes a notable difference in performance since we reuse + // dimensions for padding, and dimension 0 is reused the most. + result = reverse_integer_bits(rev_bit_index); + } + else { + uint i = 0; + while (rev_bit_index != 0) { + uint j = count_leading_zeros(rev_bit_index); + result ^= sobol_burley_table[dimension][i + j]; + i += j + 1; + + // We can't do "<<= j + 1" because that can overflow the shift + // operator, which doesn't do what we need on at least x86. + rev_bit_index <<= j; + rev_bit_index <<= 1; + } + } + + // Apply Owen scrambling. + result = reverse_integer_bits(reversed_bit_owen(result, scramble_seed)); + + return uint_to_float_excl(result); +} + +/* + * Computes a 1D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float sobol_burley_sample_1D(uint index, uint dimension, uint seed) +{ + // Include the dimension in the seed, so we get decorrelated + // sequences for different dimensions via shuffling. + seed ^= hash_hp_uint(dimension); + + // Shuffle. + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe); + + return sobol_burley(index, 0, seed ^ 0x635c77bd); +} + +/* + * Computes a 2D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device void sobol_burley_sample_2D( + uint index, uint dimension_set, uint seed, ccl_private float *x, ccl_private float *y) +{ + // Include the dimension set in the seed, so we get decorrelated + // sequences for different dimension sets via shuffling. + seed ^= hash_hp_uint(dimension_set); + + // Shuffle. + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a); + + *x = sobol_burley(index, 0, seed ^ 0xe0aaaf76); + *y = sobol_burley(index, 1, seed ^ 0x94964d4e); +} + +/* + * Computes a 3D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device void sobol_burley_sample_3D(uint index, + uint dimension_set, + uint seed, + ccl_private float *x, + ccl_private float *y, + ccl_private float *z) +{ + // Include the dimension set in the seed, so we get decorrelated + // sequences for different dimension sets via shuffling. + seed ^= hash_hp_uint(dimension_set); + + // Shuffle. + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac); + + *x = sobol_burley(index, 0, seed ^ 0x9e78e391); + *y = sobol_burley(index, 1, seed ^ 0x67c33241); + *z = sobol_burley(index, 2, seed ^ 0x78c395c5); +} + +/* + * Computes a 4D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device void sobol_burley_sample_4D(uint index, + uint dimension_set, + uint seed, + ccl_private float *x, + ccl_private float *y, + ccl_private float *z, + ccl_private float *w) +{ + // Include the dimension set in the seed, so we get decorrelated + // sequences for different dimension sets via shuffling. + seed ^= hash_hp_uint(dimension_set); + + // Shuffle. + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055); + + *x = sobol_burley(index, 0, seed ^ 0x39468210); + *y = sobol_burley(index, 1, seed ^ 0xe9d8a845); + *z = sobol_burley(index, 2, seed ^ 0x5f32b482); + *w = sobol_burley(index, 3, seed ^ 0x1524cc56); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/util.h b/intern/cycles/kernel/sample/util.h new file mode 100644 index 00000000000..33056bb7819 --- /dev/null +++ b/intern/cycles/kernel/sample/util.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* + * Performs base-2 Owen scrambling on a reversed-bit integer. + * + * This is equivalent to the Laine-Karras permutation, but much higher + * quality. See https://psychopath.io/post/2021_01_30_building_a_better_lk_hash + */ +ccl_device_inline uint reversed_bit_owen(uint n, uint seed) +{ + n ^= n * 0x3d20adea; + n += seed; + n *= (seed >> 16) | 1; + n ^= n * 0x05526c56; + n ^= n * 0x53a22864; + + return n; +} + +/* + * Performs base-2 Owen scrambling on a reversed-bit integer. + * + * This is here for backwards-compatibility, and can be replaced + * with reversed_bit_owen() above at some point. + * See https://developer.blender.org/D15679#426304 + */ +ccl_device_inline uint laine_karras_permutation(uint x, uint seed) +{ + x += seed; + x ^= (x * 0x6c50b47cu); + x ^= x * 0xb82f1e52u; + x ^= x * 0xc7afe638u; + x ^= x * 0x8d22f6e6u; + + return x; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/tables.h b/intern/cycles/kernel/tables.h index c1fdbba3fa7..399eea1e2b1 100644 --- a/intern/cycles/kernel/tables.h +++ b/intern/cycles/kernel/tables.h @@ -63,4 +63,57 @@ ccl_inline_constant float cie_colour_match[][3] = { {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f} }; +/* + * The direction vectors for the first four dimensions of the Sobol + * sequence, stored with reversed-order bits. + * + * This is used in the Sobol-Burley sampler implementation. We don't + * need more than four dimensions because we achieve higher dimensions + * with padding. They're stored with reversed bits because we need + * them reversed for the fast hash-based Owen scrambling anyway, and + * this avoids doing that at run time. + */ +ccl_inline_constant unsigned int sobol_burley_table[4][32] = { + { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080, + 0x00000100, 0x00000200, 0x00000400, 0x00000800, + 0x00001000, 0x00002000, 0x00004000, 0x00008000, + 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + }, + { + 0x00000001, 0x00000003, 0x00000005, 0x0000000f, + 0x00000011, 0x00000033, 0x00000055, 0x000000ff, + 0x00000101, 0x00000303, 0x00000505, 0x00000f0f, + 0x00001111, 0x00003333, 0x00005555, 0x0000ffff, + 0x00010001, 0x00030003, 0x00050005, 0x000f000f, + 0x00110011, 0x00330033, 0x00550055, 0x00ff00ff, + 0x01010101, 0x03030303, 0x05050505, 0x0f0f0f0f, + 0x11111111, 0x33333333, 0x55555555, 0xffffffff, + }, + { + 0x00000001, 0x00000003, 0x00000006, 0x00000009, + 0x00000017, 0x0000003a, 0x00000071, 0x000000a3, + 0x00000116, 0x00000339, 0x00000677, 0x000009aa, + 0x00001601, 0x00003903, 0x00007706, 0x0000aa09, + 0x00010117, 0x0003033a, 0x00060671, 0x000909a3, + 0x00171616, 0x003a3939, 0x00717777, 0x00a3aaaa, + 0x01170001, 0x033a0003, 0x06710006, 0x09a30009, + 0x16160017, 0x3939003a, 0x77770071, 0xaaaa00a3, + }, + { + 0x00000001, 0x00000003, 0x00000004, 0x0000000a, + 0x0000001f, 0x0000002e, 0x00000045, 0x000000c9, + 0x0000011b, 0x000002a4, 0x0000079a, 0x00000b67, + 0x0000101e, 0x0000302d, 0x00004041, 0x0000a0c3, + 0x0001f104, 0x0002e28a, 0x000457df, 0x000c9bae, + 0x0011a105, 0x002a7289, 0x0079e7db, 0x00b6dba4, + 0x0100011a, 0x030002a7, 0x0400079e, 0x0a000b6d, + 0x1f001001, 0x2e003003, 0x45004004, 0xc900a00a, + }, +}; + /* clang-format on */ diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 59ea6c64be7..f55ace1a227 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -178,6 +178,7 @@ enum PathTraceDimension { enum SamplingPattern { SAMPLING_PATTERN_SOBOL = 0, SAMPLING_PATTERN_PMJ = 1, + SAMPLING_PATTERN_SOBOL_BURLEY = 2, SAMPLING_NUM_PATTERNS, }; diff --git a/intern/cycles/scene/integrator.cpp b/intern/cycles/scene/integrator.cpp index aa11004fb48..58daf417ab0 100644 --- a/intern/cycles/scene/integrator.cpp +++ b/intern/cycles/scene/integrator.cpp @@ -89,6 +89,7 @@ NODE_DEFINE(Integrator) static NodeEnum sampling_pattern_enum; sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL); sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ); + sampling_pattern_enum.insert("sobol_burley", SAMPLING_PATTERN_SOBOL_BURLEY); SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL); SOCKET_FLOAT(scrambling_distance, "Scrambling Distance", 1.0f); @@ -260,7 +261,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene dscene->sample_pattern_lut.copy_to_device(); } - else { + else if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ) { constexpr int sequence_size = NUM_PMJ_SAMPLES; constexpr int num_sequences = NUM_PMJ_PATTERNS; float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * diff --git a/intern/cycles/util/hash.h b/intern/cycles/util/hash.h index 081b33025d8..61705276a90 100644 --- a/intern/cycles/util/hash.h +++ b/intern/cycles/util/hash.h @@ -8,6 +8,23 @@ CCL_NAMESPACE_BEGIN +/* [0, uint_max] -> [0.0, 1.0) */ +ccl_device_forceinline float uint_to_float_excl(uint n) +{ + // Note: we divide by 4294967808 instead of 2^32 because the latter + // leads to a [0.0, 1.0] mapping instead of [0.0, 1.0) due to floating + // point rounding error. 4294967808 unfortunately leaves (precisely) + // one unused ulp between the max number this outputs and 1.0, but + // that's the best you can do with this construction. + return (float)n * (1.0f / 4294967808.0f); +} + +/* [0, uint_max] -> [0.0, 1.0] */ +ccl_device_forceinline float uint_to_float_incl(uint n) +{ + return (float)n * (1.0f / (float)0xFFFFFFFFu); +} + /* ***** Jenkins Lookup3 Hash Functions ***** */ /* Source: http://burtleburtle.net/bob/c/lookup3.c */ @@ -116,22 +133,22 @@ ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw) ccl_device_inline float hash_uint_to_float(uint kx) { - return (float)hash_uint(kx) / (float)0xFFFFFFFFu; + return uint_to_float_incl(hash_uint(kx)); } ccl_device_inline float hash_uint2_to_float(uint kx, uint ky) { - return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu; + return uint_to_float_incl(hash_uint2(kx, ky)); } ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz) { - return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu; + return uint_to_float_incl(hash_uint3(kx, ky, kz)); } ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw) { - return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu; + return uint_to_float_incl(hash_uint4(kx, ky, kz, kw)); } /* Hashing float or float[234] into a float in the range [0, 1]. */ @@ -359,6 +376,101 @@ ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw) #endif +/* ***** Hash Prospector Hash Functions ***** + * + * These are based on the high-quality 32-bit hash/mixings functions from + * https://github.com/skeeto/hash-prospector + */ + +ccl_device_inline uint hash_hp_uint(uint i) +{ + // The actual mixing function from Hash Prospector. + i ^= i >> 16; + i *= 0x21f0aaad; + i ^= i >> 15; + i *= 0xd35a2d97; + i ^= i >> 15; + + // The xor is just to make input zero not map to output zero. + // The number is randomly selected and isn't special. + return i ^ 0xe6fe3beb; +} + +/* Seedable version of hash_hp_uint() above. */ +ccl_device_inline uint hash_hp_seeded_uint(uint i, uint seed) +{ + // Manipulate the seed so it doesn't interact poorly with n when they + // are both e.g. incrementing. This isn't fool-proof, but is good + // enough for practical use. + seed ^= seed << 19; + + return hash_hp_uint(i ^ seed); +} + +/* Outputs [0.0, 1.0]. */ +ccl_device_inline float hash_hp_seeded_float(uint i, uint seed) +{ + return uint_to_float_incl(hash_hp_seeded_uint(i, seed)); +} + +/* ***** CMJ Hash Functions ***** + * + * These are based on one of the hash functions in the paper + * "Correlated Multi-Jittered Sampling" by Andrew Kensler, 2013. + * + * These are here for backwards-compatibility, and can be replaced + * by the Hash Prospector hashes above at some point. + * See https://developer.blender.org/D15679#426304 + */ + +ccl_device_inline uint hash_cmj_seeded_uint(uint i, uint seed) +{ + i ^= seed; + i ^= i >> 17; + i ^= i >> 10; + i *= 0xb36534e5; + i ^= i >> 12; + i ^= i >> 21; + i *= 0x93fc4795; + i ^= 0xdf6e307f; + i ^= i >> 17; + i *= 1 | seed >> 18; + + return i; +} + +/* Outputs [0.0, 1.0]. */ +ccl_device_inline float hash_cmj_seeded_float(uint i, uint seed) +{ + return uint_to_float_excl(hash_cmj_seeded_uint(i, seed)); +} + +/* ***** Modified Wang Hash Functions ***** + * + * These are based on a bespoke modified version of the Wang hash, and + * can serve as a faster hash when quality isn't critical. + * + * The original Wang hash is documented here: + * https://www.burtleburtle.net/bob/hash/integer.html + */ + +ccl_device_inline uint hash_wang_seeded_uint(uint i, uint seed) +{ + i = (i ^ 61) ^ seed; + i += i << 3; + i ^= i >> 4; + i *= 0x27d4eb2d; + return i; +} + +/* Outputs [0.0, 1.0]. */ +ccl_device_inline float hash_wang_seeded_float(uint i, uint seed) +{ + return uint_to_float_incl(hash_wang_seeded_uint(i, seed)); +} + +/* ********** */ + #ifndef __KERNEL_GPU__ static inline uint hash_string(const char *str) { |