diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_random.h')
-rw-r--r-- | intern/cycles/kernel/kernel_random.h | 228 |
1 files changed, 63 insertions, 165 deletions
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 49e5e25c2e0..240c92bf9d0 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once #include "kernel/kernel_jitter.h" #include "util/util_hash.h" @@ -37,38 +38,34 @@ CCL_NAMESPACE_BEGIN */ # define SOBOL_SKIP 64 -ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension) +ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimension) { uint result = 0; uint i = index + SOBOL_SKIP; for (int j = 0, x; (x = find_first_set(i)); i >>= x) { j += x; - result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1); + result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1)); } return result; } #endif /* __SOBOL__ */ -ccl_device_forceinline float path_rng_1D( - KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension) +ccl_device_forceinline float path_rng_1D(const KernelGlobals *kg, + uint rng_hash, + int sample, + int dimension) { #ifdef __DEBUG_CORRELATION__ return (float)drand48(); #endif - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) { - return pmj_sample_1D(kg, sample, rng_hash, dimension); - } -#ifdef __CMJ__ -# ifdef __SOBOL__ - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) -# endif + +#ifdef __SOBOL__ + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) +#endif { - /* Correlated multi-jitter. */ - int p = rng_hash + dimension; - return cmj_sample_1D(sample, num_samples, p); + return pmj_sample_1D(kg, sample, rng_hash, dimension); } -#endif #ifdef __SOBOL__ /* Sobol sequence value using direction vectors. */ @@ -88,68 +85,72 @@ ccl_device_forceinline float path_rng_1D( #endif } -ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, - uint rng_hash, - int sample, - int num_samples, - int dimension, - float *fx, - float *fy) +ccl_device_forceinline void path_rng_2D( + const KernelGlobals *kg, uint rng_hash, int sample, int dimension, float *fx, float *fy) { #ifdef __DEBUG_CORRELATION__ *fx = (float)drand48(); *fy = (float)drand48(); return; #endif - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) { - const float2 f = pmj_sample_2D(kg, sample, rng_hash, dimension); - *fx = f.x; - *fy = f.y; - return; - } -#ifdef __CMJ__ -# ifdef __SOBOL__ - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) -# endif + +#ifdef __SOBOL__ + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) +#endif { - /* Correlated multi-jitter. */ - int p = rng_hash + dimension; - cmj_sample_2D(sample, num_samples, p, fx, fy); + pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy); + return; } -#endif #ifdef __SOBOL__ /* Sobol. */ - *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension); - *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1); + *fx = path_rng_1D(kg, rng_hash, sample, dimension); + *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1); #endif } -ccl_device_inline void path_rng_init(KernelGlobals *kg, - int sample, - int num_samples, - uint *rng_hash, - int x, - int y, - float *fx, - float *fy) +/** + * 1D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020 + * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh + * http://www.jcgt.org/published/0009/03/02/paper.pdf + */ +ccl_device_inline uint hash_iqint1(uint n) +{ + n = (n << 13U) ^ n; + n = n * (n * n * 15731U + 789221U) + 1376312589U; + + return n; +} + +/** + * 2D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020 + * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh + * http://www.jcgt.org/published/0009/03/02/paper.pdf + */ +ccl_device_inline uint hash_iqnt2d(const uint x, const uint y) { - /* load state */ - *rng_hash = hash_uint2(x, y); - *rng_hash ^= kernel_data.integrator.seed; + const uint qx = 1103515245U * ((x >> 1U) ^ (y)); + const uint qy = 1103515245U * ((y >> 1U) ^ (x)); + const uint n = 1103515245U * ((qx) ^ (qy >> 3U)); + + return n; +} + +ccl_device_inline uint path_rng_hash_init(const KernelGlobals *ccl_restrict kg, + const int sample, + const int x, + const int y) +{ + const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed; #ifdef __DEBUG_CORRELATION__ - srand48(*rng_hash + sample); + srand48(rng_hash + sample); +#else + (void)sample; #endif - if (sample == 0) { - *fx = 0.5f; - *fy = 0.5f; - } - else { - path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy); - } + return rng_hash; } /* Linear Congruential Generator */ @@ -175,113 +176,12 @@ ccl_device uint lcg_init(uint seed) return rng; } -/* Path Tracing Utility Functions - * - * For each random number in each step of the path we must have a unique - * dimension to avoid using the same sequence twice. - * - * For branches in the path we must be careful not to reuse the same number - * in a sequence and offset accordingly. - */ - -ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, - const ccl_addr_space PathState *state, - int dimension) -{ - return path_rng_1D( - kg, state->rng_hash, state->sample, state->num_samples, state->rng_offset + dimension); -} - -ccl_device_inline void path_state_rng_2D( - KernelGlobals *kg, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy) -{ - path_rng_2D(kg, - state->rng_hash, - state->sample, - state->num_samples, - state->rng_offset + dimension, - fx, - fy); -} - -ccl_device_inline float path_state_rng_1D_hash(KernelGlobals *kg, - const ccl_addr_space PathState *state, - uint hash) -{ - /* Use a hash instead of dimension, this is not great but avoids adding - * more dimensions to each bounce which reduces quality of dimensions we - * are already using. */ - return path_rng_1D(kg, - cmj_hash_simple(state->rng_hash, hash), - state->sample, - state->num_samples, - state->rng_offset); -} - -ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, - uint rng_hash, - const ccl_addr_space PathState *state, - int branch, - int num_branches, - int dimension) -{ - return path_rng_1D(kg, - rng_hash, - state->sample * num_branches + branch, - state->num_samples * num_branches, - state->rng_offset + dimension); -} - -ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, - uint rng_hash, - const ccl_addr_space PathState *state, - int branch, - int num_branches, - int dimension, - float *fx, - float *fy) -{ - path_rng_2D(kg, - rng_hash, - state->sample * num_branches + branch, - state->num_samples * num_branches, - state->rng_offset + dimension, - fx, - fy); -} - -/* Utility functions to get light termination value, - * since it might not be needed in many cases. - */ -ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, - const ccl_addr_space PathState *state) -{ - if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE); - } - return 0.0f; -} - -ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, - uint rng_hash, - const ccl_addr_space PathState *state, - int branch, - int num_branches) -{ - if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - return path_branched_rng_1D(kg, rng_hash, state, branch, num_branches, PRNG_LIGHT_TERMINATE); - } - return 0.0f; -} - -ccl_device_inline uint lcg_state_init(PathState *state, uint scramble) -{ - return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble); -} - -ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, uint scramble) +ccl_device_inline uint lcg_state_init(const uint rng_hash, + const uint rng_offset, + const uint sample, + const uint scramble) { - return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble); + return lcg_init(rng_hash + rng_offset + sample * scramble); } ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng) @@ -301,8 +201,6 @@ ccl_device_inline bool sample_is_even(int pattern, int sample) return __builtin_popcount(sample & 0xaaaaaaaa) & 1; #elif defined(__NVCC__) return __popc(sample & 0xaaaaaaaa) & 1; -#elif defined(__KERNEL_OPENCL__) - return popcount(sample & 0xaaaaaaaa) & 1; #else /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */ int i = sample & 0xaaaaaaaa; |