diff options
author | Lukas Tönne <lukas.toenne@gmail.com> | 2017-10-16 12:16:13 +0300 |
---|---|---|
committer | Lukas Tönne <lukas.toenne@gmail.com> | 2017-10-16 12:22:35 +0300 |
commit | a78b3ee53aa53020b086a6df25c0e28491223dcc (patch) | |
tree | bd883e95580f5777f7eae7cac4e47f182ac9fc00 /intern/cycles/kernel/kernel_random.h | |
parent | 4842cc017c3bb7df2070c2f96605190ff88e6a2e (diff) | |
parent | 49f4ac17bf704614de59a4db7a65c205c085d694 (diff) |
Merge remote-tracking branch 'origin/master' into openvdbopenvdb
Diffstat (limited to 'intern/cycles/kernel/kernel_random.h')
-rw-r--r-- | intern/cycles/kernel/kernel_random.h | 354 |
1 files changed, 141 insertions, 213 deletions
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 2b767da5041..e7a6134b8eb 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -14,222 +14,130 @@ * limitations under the License. */ -#include "kernel_jitter.h" +#include "kernel/kernel_jitter.h" +#include "util/util_hash.h" CCL_NAMESPACE_BEGIN -#ifdef __SOBOL__ - -/* skip initial numbers that are not as well distributed, especially the - * first sequence is just 0 everywhere, which can be problematic for e.g. - * path termination */ -#define SOBOL_SKIP 64 - -/* High Dimensional Sobol */ +/* Pseudo random numbers, uncomment this for debugging correlations. Only run + * this single threaded on a CPU for repeatable resutls. */ +//#define __DEBUG_CORRELATION__ -/* van der corput radical inverse */ -ccl_device uint van_der_corput(uint bits) -{ - bits = (bits << 16) | (bits >> 16); - bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8); - bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4); - bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2); - bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1); - return bits; -} -/* sobol radical inverse */ -ccl_device uint sobol(uint i) -{ - uint r = 0; - - for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1) - if(i & 1) - r ^= v; - - return r; -} - -/* inverse of sobol radical inverse */ -ccl_device uint sobol_inverse(uint i) -{ - const uint msb = 1U << 31; - uint r = 0; - - for(uint v = 1; i; i <<= 1, v ^= v << 1) - if(i & msb) - r ^= v; +/* High Dimensional Sobol. + * + * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal + * to classic Van der Corput and Sobol sequences. */ - return r; -} +#ifdef __SOBOL__ -/* multidimensional sobol with generator matrices - * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively */ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension) { uint result = 0; uint i = index; - - for(uint j = 0; i; i >>= 1, j++) - if(i & 1) + for(uint j = 0; i; i >>= 1, j++) { + if(i & 1) { result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j); - + } + } return result; } -/* lookup index and x/y coordinate, assumes m is a power of two */ -ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, const uint ey, uint *x, uint *y) -{ - /* shift is constant per frame */ - const uint shift = frame << (m << 1); - const uint sobol_shift = sobol(shift); - /* van der Corput is its own inverse */ - const uint lower = van_der_corput(ex << (32 - m)); - /* need to compensate for ey difference and shift */ - const uint sobol_lower = sobol(lower); - const uint mask = ~-(1 << m) << (32 - m); /* only m upper bits */ - const uint delta = ((ey << (32 - m)) ^ sobol_lower ^ sobol_shift) & mask; - /* only use m upper bits for the index (m is a power of two) */ - const uint sobol_result = delta | (delta >> m); - const uint upper = sobol_inverse(sobol_result); - const uint index = shift | upper | lower; - *x = van_der_corput(index); - *y = sobol_shift ^ sobol_result ^ sobol_lower; - return index; -} +#endif /* __SOBOL__ */ -ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension) + +ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, + uint rng_hash, + int sample, int num_samples, + int dimension) { +#ifdef __DEBUG_CORRELATION__ + return (float)drand48(); +#endif + #ifdef __CMJ__ - if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { - /* correlated multi-jittered */ - int p = *rng + dimension; +# ifdef __SOBOL__ + if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) +# endif + { + /* Correlated multi-jitter. */ + int p = rng_hash + dimension; return cmj_sample_1D(sample, num_samples, p); } #endif -#ifdef __SOBOL_FULL_SCREEN__ - uint result = sobol_dimension(kg, *rng, dimension); - float r = (float)result * (1.0f/(float)0xFFFFFFFF); - return r; -#else - /* compute sobol sequence value using direction vectors */ - uint result = sobol_dimension(kg, sample + SOBOL_SKIP, dimension); +#ifdef __SOBOL__ + /* Sobol sequence value using direction vectors. */ + uint result = sobol_dimension(kg, sample, dimension); float r = (float)result * (1.0f/(float)0xFFFFFFFF); /* Cranly-Patterson rotation using rng seed */ float shift; - /* using the same *rng value to offset seems to give correlation issues, - * we could hash it with the dimension but this has a performance impact, - * we need to find a solution for this */ - if(dimension & 1) - shift = (*rng >> 16) * (1.0f/(float)0xFFFF); - else - shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF); + /* Hash rng with dimension to solve correlation issues. + * See T38710, T50116. + */ + uint tmp_rng = cmj_hash_simple(dimension, rng_hash); + shift = tmp_rng * (1.0f/(float)0xFFFFFFFF); return r + shift - floorf(r + shift); #endif } -ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy) +ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, + uint rng_hash, + int sample, int num_samples, + int dimension, + float *fx, float *fy) { +#ifdef __DEBUG_CORRELATION__ + *fx = (float)drand48(); + *fy = (float)drand48(); + return; +#endif + #ifdef __CMJ__ - if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { - /* correlated multi-jittered */ - int p = *rng + dimension; +# ifdef __SOBOL__ + if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) +# endif + { + /* Correlated multi-jitter. */ + int p = rng_hash + dimension; cmj_sample_2D(sample, num_samples, p, fx, fy); + return; } - else #endif - { - /* sobol */ - *fx = path_rng_1D(kg, rng, sample, num_samples, dimension); - *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1); - } -} - -ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, ccl_addr_space RNG *rng, int x, int y, float *fx, float *fy) -{ -#ifdef __SOBOL_FULL_SCREEN__ - uint px, py; - uint bits = 16; /* limits us to 65536x65536 and 65536 samples */ - uint size = 1 << bits; - uint frame = sample; - - *rng = sobol_lookup(bits, frame, x, y, &px, &py); - - *rng ^= kernel_data.integrator.seed; - - if(sample == 0) { - *fx = 0.5f; - *fy = 0.5f; - } - else { - *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x; - *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y; - } -#else - *rng = *rng_state; - *rng ^= kernel_data.integrator.seed; - - if(sample == 0) { - *fx = 0.5f; - *fy = 0.5f; - } - else { - path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy); - } +#ifdef __SOBOL__ + /* Sobol. */ + *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension); + *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1); #endif } -ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG rng) -{ - /* nothing to do */ -} - -#else - -/* Linear Congruential Generator */ - -ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension) -{ - /* implicit mod 2^32 */ - rng = (1103515245*(rng) + 12345); - return (float)rng * (1.0f/(float)0xFFFFFFFF); -} - -ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension, float *fx, float *fy) -{ - *fx = path_rng_1D(kg, rng, sample, num_samples, dimension); - *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1); -} - -ccl_device void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy) +ccl_device_inline void path_rng_init(KernelGlobals *kg, + int sample, int num_samples, + uint *rng_hash, + int x, int y, + float *fx, float *fy) { /* load state */ - *rng = *rng_state; + *rng_hash = hash_int_2d(x, y); + *rng_hash ^= kernel_data.integrator.seed; - *rng ^= kernel_data.integrator.seed; +#ifdef __DEBUG_CORRELATION__ + srand48(*rng_hash + sample); +#endif if(sample == 0) { *fx = 0.5f; *fy = 0.5f; } else { - path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy); + path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy); } } -ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG rng) -{ - /* store state for next sample */ - *rng_state = rng; -} - -#endif - /* Linear Congruential Generator */ ccl_device uint lcg_step_uint(uint *rng) @@ -259,90 +167,110 @@ ccl_device uint lcg_init(uint seed) * dimension to avoid using the same sequence twice. * * For branches in the path we must be careful not to reuse the same number - * in a sequence and offset accordingly. */ - -ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension) -{ - return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension); -} - -ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension) -{ - /* the rng_offset is not increased for transparent bounces. if we do then - * fully transparent objects can become subtly visible by the different - * sampling patterns used where the transparent object is. - * - * however for some random numbers that will determine if we next bounce - * is transparent we do need to increase the offset to avoid always making - * the same decision */ - int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM; - return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension); -} + * in a sequence and offset accordingly. + */ -ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy) +ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, + const ccl_addr_space PathState *state, + int dimension) { - path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy); + return path_rng_1D(kg, + state->rng_hash, + state->sample, state->num_samples, + state->rng_offset + dimension); } -ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension) +ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, + const ccl_addr_space PathState *state, + int dimension, + float *fx, float *fy) { - return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension); + path_rng_2D(kg, + state->rng_hash, + state->sample, state->num_samples, + state->rng_offset + dimension, + fx, fy); } -ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension) +ccl_device_inline float path_branched_rng_1D( + KernelGlobals *kg, + uint rng_hash, + const ccl_addr_space PathState *state, + int branch, + int num_branches, + int dimension) { - int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM; - return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension); + return path_rng_1D(kg, + rng_hash, + state->sample * num_branches + branch, + state->num_samples * num_branches, + state->rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy) +ccl_device_inline void path_branched_rng_2D( + KernelGlobals *kg, + uint rng_hash, + const ccl_addr_space PathState *state, + int branch, + int num_branches, + int dimension, + float *fx, float *fy) { - path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy); + path_rng_2D(kg, + rng_hash, + state->sample * num_branches + branch, + state->num_samples * num_branches, + state->rng_offset + dimension, + fx, fy); } -/* Utitility functions to get light termination value, since it might not be needed in many cases. */ -ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state) +/* Utitility functions to get light termination value, + * since it might not be needed in many cases. + */ +ccl_device_inline float path_state_rng_light_termination( + KernelGlobals *kg, + const ccl_addr_space PathState *state) { if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - return path_state_rng_1D_for_decision(kg, rng, state, PRNG_LIGHT_TERMINATE); + return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE); } return 0.0f; } -ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches) +ccl_device_inline float path_branched_rng_light_termination( + KernelGlobals *kg, + uint rng_hash, + const ccl_addr_space PathState *state, + int branch, + int num_branches) { if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - return path_branched_rng_1D_for_decision(kg, rng, state, branch, num_branches, PRNG_LIGHT_TERMINATE); + return path_branched_rng_1D(kg, + rng_hash, + state, + branch, + num_branches, + PRNG_LIGHT_TERMINATE); } return 0.0f; } -ccl_device_inline void path_state_branch(PathState *state, int branch, int num_branches) -{ - /* path is splitting into a branch, adjust so that each branch - * still gets a unique sample from the same sequence */ - state->rng_offset += PRNG_BOUNCE_NUM; - state->sample = state->sample*num_branches + branch; - state->num_samples = state->num_samples*num_branches; -} - -ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble) +ccl_device_inline uint lcg_state_init(PathState *state, + uint scramble) { - return lcg_init(*rng + state->rng_offset + state->sample*scramble); + return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble); } -/* TODO(sergey): For until we can use generic address space from OpenCL 2.0. */ - -ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space RNG *rng, - const ccl_addr_space PathState *state, +ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, uint scramble) { - return lcg_init(*rng + state->rng_offset + state->sample*scramble); + return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble); } + ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng) { - /* implicit mod 2^32 */ + /* Implicit mod 2^32 */ *rng = (1103515245*(*rng) + 12345); return (float)*rng * (1.0f/(float)0xFFFFFFFF); } |