1 files changed, 141 insertions, 213 deletions
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 2b767da5041..e7a6134b8eb 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -14,222 +14,130 @@
  * limitations under the License.
  */
 
-#include "kernel_jitter.h"
+#include "kernel/kernel_jitter.h"
+#include "util/util_hash.h"
 
 CCL_NAMESPACE_BEGIN
 
-#ifdef __SOBOL__
-
-/* skip initial numbers that are not as well distributed, especially the
- * first sequence is just 0 everywhere, which can be problematic for e.g.
- * path termination */
-#define SOBOL_SKIP 64
-
-/* High Dimensional Sobol */
+/* Pseudo random numbers, uncomment this for debugging correlations. Only run
+ * this single threaded on a CPU for repeatable resutls. */
+//#define __DEBUG_CORRELATION__
 
-/* van der corput radical inverse */
-ccl_device uint van_der_corput(uint bits)
-{
-	bits = (bits << 16) | (bits >> 16);
-	bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
-	bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
-	bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
-	bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
-	return bits;
-}
 
-/* sobol radical inverse */
-ccl_device uint sobol(uint i)
-{
-	uint r = 0;
-
-	for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1)
-		if(i & 1)
-			r ^= v;
-
-	return r;
-}
-
-/* inverse of sobol radical inverse */
-ccl_device uint sobol_inverse(uint i)
-{
-	const uint msb = 1U << 31;
-	uint r = 0;
-
-	for(uint v = 1; i; i <<= 1, v ^= v << 1)
-		if(i & msb)
-			r ^= v;
+/* High Dimensional Sobol.
+ *
+ * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
+ * to classic Van der Corput and Sobol sequences. */
 
-	return r;
-}
+#ifdef __SOBOL__
 
-/* multidimensional sobol with generator matrices
- * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively */
 ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
 {
 	uint result = 0;
 	uint i = index;
-
-	for(uint j = 0; i; i >>= 1, j++)
-		if(i & 1)
+	for(uint j = 0; i; i >>= 1, j++) {
+		if(i & 1) {
 			result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j);
-	
+		}
+	}
 	return result;
 }
 
-/* lookup index and x/y coordinate, assumes m is a power of two */
-ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, const uint ey, uint *x, uint *y)
-{
-	/* shift is constant per frame */
-	const uint shift = frame << (m << 1);
-	const uint sobol_shift = sobol(shift);
-	/* van der Corput is its own inverse */
-	const uint lower = van_der_corput(ex << (32 - m));
-	/* need to compensate for ey difference and shift */
-	const uint sobol_lower = sobol(lower);
-	const uint mask = ~-(1 << m) << (32 - m); /* only m upper bits */
-	const uint delta = ((ey << (32 - m)) ^ sobol_lower ^ sobol_shift) & mask;
-	/* only use m upper bits for the index (m is a power of two) */
-	const uint sobol_result = delta | (delta >> m);
-	const uint upper = sobol_inverse(sobol_result);
-	const uint index = shift | upper | lower;
-	*x = van_der_corput(index);
-	*y = sobol_shift ^ sobol_result ^ sobol_lower;
-	return index;
-}
+#endif /* __SOBOL__ */
 
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension)
+
+ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
+                                         uint rng_hash,
+                                         int sample, int num_samples,
+                                         int dimension)
 {
+#ifdef __DEBUG_CORRELATION__
+	return (float)drand48();
+#endif
+
 #ifdef __CMJ__
-	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
-		/* correlated multi-jittered */
-		int p = *rng + dimension;
+#  ifdef __SOBOL__
+	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+#  endif
+	{
+		/* Correlated multi-jitter. */
+		int p = rng_hash + dimension;
 		return cmj_sample_1D(sample, num_samples, p);
 	}
 #endif
 
-#ifdef __SOBOL_FULL_SCREEN__
-	uint result = sobol_dimension(kg, *rng, dimension);
-	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
-	return r;
-#else
-	/* compute sobol sequence value using direction vectors */
-	uint result = sobol_dimension(kg, sample + SOBOL_SKIP, dimension);
+#ifdef __SOBOL__
+	/* Sobol sequence value using direction vectors. */
+	uint result = sobol_dimension(kg, sample, dimension);
 	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 
 	/* Cranly-Patterson rotation using rng seed */
 	float shift;
 
-	/* using the same *rng value to offset seems to give correlation issues,
-	 * we could hash it with the dimension but this has a performance impact,
-	 * we need to find a solution for this */
-	if(dimension & 1)
-		shift = (*rng >> 16) * (1.0f/(float)0xFFFF);
-	else
-		shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF);
+	/* Hash rng with dimension to solve correlation issues.
+	 * See T38710, T50116.
+	 */
+	uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+	shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
 
 	return r + shift - floorf(r + shift);
 #endif
 }
 
-ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
+ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
+                                        uint rng_hash,
+                                        int sample, int num_samples,
+                                        int dimension,
+                                        float *fx, float *fy)
 {
+#ifdef __DEBUG_CORRELATION__
+	*fx = (float)drand48();
+	*fy = (float)drand48();
+	return;
+#endif
+
 #ifdef __CMJ__
-	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
-		/* correlated multi-jittered */
-		int p = *rng + dimension;
+#  ifdef __SOBOL__
+	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+#  endif
+	{
+		/* Correlated multi-jitter. */
+		int p = rng_hash + dimension;
 		cmj_sample_2D(sample, num_samples, p, fx, fy);
+		return;
 	}
-	else
 #endif
-	{
-		/* sobol */
-		*fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
-		*fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
-	}
-}
-
-ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, ccl_addr_space RNG *rng, int x, int y, float *fx, float *fy)
-{
-#ifdef __SOBOL_FULL_SCREEN__
-	uint px, py;
-	uint bits = 16; /* limits us to 65536x65536 and 65536 samples */
-	uint size = 1 << bits;
-	uint frame = sample;
-
-	*rng = sobol_lookup(bits, frame, x, y, &px, &py);
-
-	*rng ^= kernel_data.integrator.seed;
-
-	if(sample == 0) {
-		*fx = 0.5f;
-		*fy = 0.5f;
-	}
-	else {
-		*fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x;
-		*fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y;
-	}
-#else
-	*rng = *rng_state;
 
-	*rng ^= kernel_data.integrator.seed;
-
-	if(sample == 0) {
-		*fx = 0.5f;
-		*fy = 0.5f;
-	}
-	else {
-		path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy);
-	}
+#ifdef __SOBOL__
+	/* Sobol. */
+	*fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension);
+	*fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1);
 #endif
 }
 
-ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG rng)
-{
-	/* nothing to do */
-}
-
-#else
-
-/* Linear Congruential Generator */
-
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
-{
-	/* implicit mod 2^32 */
-	rng = (1103515245*(rng) + 12345);
-	return (float)rng * (1.0f/(float)0xFFFFFFFF);
-}
-
-ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension, float *fx, float *fy)
-{
-	*fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
-	*fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
-}
-
-ccl_device void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
+ccl_device_inline void path_rng_init(KernelGlobals *kg,
+                                     int sample, int num_samples,
+                                     uint *rng_hash,
+                                     int x, int y,
+                                     float *fx, float *fy)
 {
 	/* load state */
-	*rng = *rng_state;
+	*rng_hash = hash_int_2d(x, y);
+	*rng_hash ^= kernel_data.integrator.seed;
 
-	*rng ^= kernel_data.integrator.seed;
+#ifdef __DEBUG_CORRELATION__
+	srand48(*rng_hash + sample);
+#endif
 
 	if(sample == 0) {
 		*fx = 0.5f;
 		*fy = 0.5f;
 	}
 	else {
-		path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy);
+		path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy);
 	}
 }
 
-ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG rng)
-{
-	/* store state for next sample */
-	*rng_state = rng;
-}
-
-#endif
-
 /* Linear Congruential Generator */
 
 ccl_device uint lcg_step_uint(uint *rng)
@@ -259,90 +167,110 @@ ccl_device uint lcg_init(uint seed)
  * dimension to avoid using the same sequence twice.
  *
  * For branches in the path we must be careful not to reuse the same number
- * in a sequence and offset accordingly. */
-
-ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension)
-{
-	return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension);
-}
-
-ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension)
-{
-	/* the rng_offset is not increased for transparent bounces. if we do then
-	 * fully transparent objects can become subtly visible by the different
-	 * sampling patterns used where the transparent object is.
-	 *
-	 * however for some random numbers that will determine if we next bounce
-	 * is transparent we do need to increase the offset to avoid always making
-	 * the same decision */
-	int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
-	return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension);
-}
+ * in a sequence and offset accordingly.
+ */
 
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
+ccl_device_inline float path_state_rng_1D(KernelGlobals *kg,
+                                          const ccl_addr_space PathState *state,
+                                          int dimension)
 {
-	path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy);
+	return path_rng_1D(kg,
+	                   state->rng_hash,
+	                   state->sample, state->num_samples,
+	                   state->rng_offset + dimension);
 }
 
-ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline void path_state_rng_2D(KernelGlobals *kg,
+                                         const ccl_addr_space PathState *state,
+                                         int dimension,
+                                         float *fx, float *fy)
 {
-	return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension);
+	path_rng_2D(kg,
+	            state->rng_hash,
+	            state->sample, state->num_samples,
+	            state->rng_offset + dimension,
+	            fx, fy);
 }
 
-ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D(
+        KernelGlobals *kg,
+        uint rng_hash,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches,
+        int dimension)
 {
-	int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
-	return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension);
+	return path_rng_1D(kg,
+	                   rng_hash,
+	                   state->sample * num_branches + branch,
+	                   state->num_samples * num_branches,
+	                   state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
+ccl_device_inline void path_branched_rng_2D(
+        KernelGlobals *kg,
+        uint rng_hash,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches,
+        int dimension,
+        float *fx, float *fy)
 {
-	path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
+	path_rng_2D(kg,
+	            rng_hash,
+	            state->sample * num_branches + branch,
+	            state->num_samples * num_branches,
+	            state->rng_offset + dimension,
+	            fx, fy);
 }
 
-/* Utitility functions to get light termination value, since it might not be needed in many cases. */
-ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state)
+/* Utitility functions to get light termination value,
+ * since it might not be needed in many cases.
+ */
+ccl_device_inline float path_state_rng_light_termination(
+        KernelGlobals *kg,
+        const ccl_addr_space PathState *state)
 {
 	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-		return path_state_rng_1D_for_decision(kg, rng, state, PRNG_LIGHT_TERMINATE);
+		return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
 	}
 	return 0.0f;
 }
 
-ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches)
+ccl_device_inline float path_branched_rng_light_termination(
+        KernelGlobals *kg,
+        uint rng_hash,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches)
 {
 	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-		return path_branched_rng_1D_for_decision(kg, rng, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
+		return path_branched_rng_1D(kg,
+		                            rng_hash,
+		                            state,
+		                            branch,
+		                            num_branches,
+		                            PRNG_LIGHT_TERMINATE);
 	}
 	return 0.0f;
 }
 
-ccl_device_inline void path_state_branch(PathState *state, int branch, int num_branches)
-{
-	/* path is splitting into a branch, adjust so that each branch
-	 * still gets a unique sample from the same sequence */
-	state->rng_offset += PRNG_BOUNCE_NUM;
-	state->sample = state->sample*num_branches + branch;
-	state->num_samples = state->num_samples*num_branches;
-}
-
-ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble)
+ccl_device_inline uint lcg_state_init(PathState *state,
+                                      uint scramble)
 {
-	return lcg_init(*rng + state->rng_offset + state->sample*scramble);
+	return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble);
 }
 
-/* TODO(sergey): For until we can use generic address space from OpenCL 2.0. */
-
-ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space RNG *rng,
-                                                const ccl_addr_space PathState *state,
+ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state,
                                                 uint scramble)
 {
-	return lcg_init(*rng + state->rng_offset + state->sample*scramble);
+	return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble);
 }
 
+
 ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
 {
-	/* implicit mod 2^32 */
+	/* Implicit mod 2^32 */
 	*rng = (1103515245*(*rng) + 12345);
 	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
 }