Cycles: experimental correlated multi-jittered sampling pattern that can be used

instead of sobol. So far one doesn't seem to be consistently better or worse than the other for the same number of samples but more testing is needed. The random number generator itself is slower than sobol for most number of samples, except 16, 64, 256, .. because they can be computed faster. This can probably be optimized, but we can do that when/if this actually turns out to be useful. Paper this implementation is based on: http://graphics.pixar.com/library/MultiJitteredSampling/ Also includes some refactoring of RNG code, fixing a Sobol correlation issue with the first BSDF and < 16 samples, skipping some unneeded RNG calls and using a simpler unit square to unit disk function.
author: Brecht Van Lommel <brechtvanlommel@pandora.be> 2013-06-07 20:06:22 +0400
committer: Brecht Van Lommel <brechtvanlommel@pandora.be> 2013-06-07 20:06:22 +0400
commit: b20a7e01d046b95a79663da1a8072358709a5a8b (patch)
tree: 7772c50756a7d5a3352fd25eadbe8d41aa776615 /intern/cycles/kernel
parent: d835d2f4e65ae3bf8e22f8d3d60e7e6ea7a6b4ca (diff)
6 files changed, 371 insertions, 109 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 41048c7b379..912a1321d67 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SRC_HEADERS
 	kernel_emission.h
 	kernel_film.h
 	kernel_globals.h
+	kernel_jitter.h
 	kernel_light.h
 	kernel_math.h
 	kernel_montecarlo.h
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
new file mode 100644
index 00000000000..5ea44cd0cad
--- /dev/null
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2013, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* "Correlated Multi-Jittered Sampling"
+ * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
+
+/* todo: find good value, suggested 64 gives pattern on cornell box ceiling */
+#define CMJ_RANDOM_OFFSET_LIMIT 4096
+
+__device_inline bool cmj_is_pow2(int i)
+{
+	return (i & (i - 1)) == 0;
+}
+
+__device_inline int cmj_fast_mod_pow2(int a, int b)
+{
+	return (a & (b - 1));
+}
+
+/* a must be > 0 and b must be > 1 */
+__device_inline int cmj_fast_div_pow2(int a, int b)
+{
+#ifdef __KERNEL_SSE2__
+	return a >> __builtin_ctz(b);
+#else
+	return a/b;
+#endif
+}
+
+__device_inline uint cmj_w_mask(uint w)
+{
+#ifdef __KERNEL_SSE2__
+	return ((1 << (32 - __builtin_clz(w))) - 1);
+#else
+	w |= w >> 1;
+	w |= w >> 2;
+	w |= w >> 4;
+	w |= w >> 8;
+	w |= w >> 16;
+
+	return w;
+#endif
+}
+
+__device_inline uint cmj_permute(uint i, uint l, uint p)
+{
+	uint w = l - 1;
+
+	if((l & w) == 0) {
+		/* l is a power of two (fast) */
+		i ^= p;
+		i *= 0xe170893d;
+		i ^= p >> 16;
+		i ^= (i & w) >> 4;
+		i ^= p >> 8;
+		i *= 0x0929eb3f;
+		i ^= p >> 23;
+		i ^= (i & w) >> 1;
+		i *= 1 | p >> 27;
+		i *= 0x6935fa69;
+		i ^= (i & w) >> 11;
+		i *= 0x74dcb303;
+		i ^= (i & w) >> 2;
+		i *= 0x9e501cc3;
+		i ^= (i & w) >> 2;
+		i *= 0xc860a3df;
+		i &= w;
+		i ^= i >> 5;
+
+		return (i + p) & w;
+	}
+	else {
+		/* l is not a power of two (slow) */
+		w = cmj_w_mask(w);
+
+		do {
+			i ^= p;
+			i *= 0xe170893d;
+			i ^= p >> 16;
+			i ^= (i & w) >> 4;
+			i ^= p >> 8;
+			i *= 0x0929eb3f;
+			i ^= p >> 23;
+			i ^= (i & w) >> 1;
+			i *= 1 | p >> 27;
+			i *= 0x6935fa69;
+			i ^= (i & w) >> 11;
+			i *= 0x74dcb303;
+			i ^= (i & w) >> 2;
+			i *= 0x9e501cc3;
+			i ^= (i & w) >> 2;
+			i *= 0xc860a3df;
+			i &= w;
+			i ^= i >> 5;
+		} while (i >= l);
+
+		return (i + p) % l;
+	}
+}
+
+__device_inline uint cmj_hash(uint i, uint p)
+{
+	i ^= p;
+	i ^= i >> 17;
+	i ^= i >> 10;
+	i *= 0xb36534e5;
+	i ^= i >> 12;
+	i ^= i >> 21;
+	i *= 0x93fc4795;
+	i ^= 0xdf6e307f;
+	i ^= i >> 17;
+	i *= 1 | p >> 18;
+
+	return i;
+}
+
+__device_inline float cmj_randfloat(uint i, uint p)
+{
+	return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+#ifdef __CMJ__
+__device_noinline float cmj_sample_1D(int s, int N, int p)
+{
+	uint x = cmj_permute(s, N, p * 0x68bc21eb);
+	float jx = cmj_randfloat(s, p * 0x967a889b);
+
+	float invN = 1.0f/N;
+	return (x + jx)*invN;
+}
+
+__device_noinline float2 cmj_sample_2D(int s, int N, int p)
+{
+	int m = float_to_int(sqrtf(N));
+	int n = (N + m - 1)/m;
+	float invN = 1.0f/N;
+	float invm = 1.0f/m;
+	float invn = 1.0f/n;
+
+	s = cmj_permute(s, N, p * 0x51633e2d);
+
+	int sdivm, smodm;
+
+	if(cmj_is_pow2(m)) {
+		sdivm = cmj_fast_div_pow2(s, m);
+		smodm = cmj_fast_mod_pow2(s, m);
+	}
+	else {
+		sdivm = float_to_int(s * invm);
+		smodm = s - sdivm*m;
+	}
+
+	uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
+	uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
+
+	float jx = cmj_randfloat(s, p * 0x967a889b);
+	float jy = cmj_randfloat(s, p * 0x368cc8b7);
+
+	return make_float2((sx + (sy + jx)*invn)*invm, (s + jy)*invN);
+}
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index 2ae95084162..f608429da36 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -36,36 +36,12 @@
 CCL_NAMESPACE_BEGIN
 
 /// Given values x and y on [0,1], convert them in place to values on
-/// [-1,1] uniformly distributed over a unit sphere.  This code is
-/// derived from Peter Shirley, "Realistic Ray Tracing", p. 103.
+/// [-1,1] uniformly distributed over a unit sphere.
 __device void to_unit_disk(float *x, float *y)
 {
-	float r, phi;
-	float a = 2.0f * (*x) - 1.0f;
-	float b = 2.0f * (*y) - 1.0f;
-	if(a > -b) {
-		if(a > b) {
-			r = a;
-			phi = M_PI_4_F *(b/a);
-		}
-		else {
-			r = b;
-			phi = M_PI_4_F *(2.0f - a/b);
-		}
-	}
-	else {
-		if(a < b) {
-			r = -a;
-			phi = M_PI_4_F *(4.0f + b/a);
-		}
-		else {
-			r = -b;
-			if(b != 0.0f)
-				phi = M_PI_4_F *(6.0f - a/b);
-			else
-				phi = 0.0f;
-		}
-	}
+	float phi = 2.0f * M_PI_F * (*x);
+	float r = sqrtf(*y);
+
 	*x = r * cosf(phi);
 	*y = r * sinf(phi);
 }
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 5915dfed08b..866024ba303 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -233,7 +233,7 @@ __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ra
 	return result;
 }
 
-__device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
+__device float4 kernel_path_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
 {
 	/* initialize */
 	PathRadiance L;
@@ -249,6 +249,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 #endif
 	PathState state;
 	int rng_offset = PRNG_BASE_NUM;
+	int num_samples = kernel_data.integrator.aa_samples;
 
 	path_state_init(&state);
 
@@ -270,7 +271,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 			}
 
 			extmax = kernel_data.curve_kernel_data.maximum_width;
-			lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
+			lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
 		}
 
 		bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
@@ -292,7 +293,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 			light_ray.dP = ray.dP;
 
 			/* intersect with lamp */
-			float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
+			float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
 			float3 emission;
 
 			if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
@@ -323,7 +324,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 		/* setup shading */
 		ShaderData sd;
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
-		float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
+		float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF);
 		shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
 
 		/* holdout */
@@ -373,12 +374,18 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 		 * mainly due to the mixed in MIS that we use. gives too many unneeded
 		 * shader evaluations, only need emission if we are going to terminate */
 		float probability = path_state_terminate_probability(kg, &state, throughput);
-		float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
 
-		if(terminate >= probability)
+		if(probability == 0.0f) {
 			break;
+		}
+		else if(probability != 1.0f) {
+			float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE);
+
+			if(terminate >= probability)
+				break;
 
-		throughput /= probability;
+			throughput /= probability;
+		}
 
 #ifdef __SUBSURFACE__
 		/* bssrdf scatter to a different location on the same object, replacing
@@ -392,7 +399,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 
 			/* do bssrdf scatter step if we picked a bssrdf closure */
 			if(sc) {
-				uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
+				uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
 				subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
 			}
 		}
@@ -402,8 +409,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
 			/* todo: solve correlation */
-			float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-			float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+			float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
+			float bsdf_u = bsdf_uv.x;
+			float bsdf_v = bsdf_uv.y;
 
 			float ao_factor = kernel_data.background.ao_factor;
 			float3 ao_N;
@@ -436,10 +444,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 		if(kernel_data.integrator.use_direct_light) {
 			/* sample illumination from lights to find path contribution */
 			if(sd.flag & SD_BSDF_HAS_EVAL) {
-				float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
-				float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
-				float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
-				float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
+				float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
+#ifdef __MULTI_CLOSURE__
+				float light_o = 0.0f;
+#else
+				float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
+#endif
+				float2 light_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U);
+				float light_u = light_uv.x;
+				float light_v = light_uv.y;
 
 				Ray light_ray;
 				BsdfEval L_light;
@@ -471,8 +484,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 		BsdfEval bsdf_eval;
 		float3 bsdf_omega_in;
 		differential3 bsdf_domega_in;
-		float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-		float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+		float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
+		float bsdf_u = bsdf_uv.x;
+		float bsdf_v = bsdf_uv.y;
 		int label;
 
 		label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -524,8 +538,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 
 #ifdef __NON_PROGRESSIVE__
 
-__device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer,
-	float3 throughput, float num_samples_adjust,
+__device void kernel_path_indirect(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer,
+	float3 throughput, int num_samples, int num_total_samples,
 	float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
 {
 #ifdef __LAMP_MIS__
@@ -557,7 +571,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 			light_ray.dP = ray.dP;
 
 			/* intersect with lamp */
-			float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
+			float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
 			float3 emission;
 
 			if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
@@ -578,7 +592,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 		/* setup shading */
 		ShaderData sd;
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
-		float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
+		float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF);
 		shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
 		shader_merge_closures(kg, &sd);
 
@@ -604,13 +618,19 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 		/* path termination. this is a strange place to put the termination, it's
 		 * mainly due to the mixed in MIS that we use. gives too many unneeded
 		 * shader evaluations, only need emission if we are going to terminate */
-		float probability = path_state_terminate_probability(kg, &state, throughput*num_samples_adjust);
-		float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
+		float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
 
-		if(terminate >= probability)
+		if(probability == 0.0f) {
 			break;
+		}
+		else if(probability != 1.0f) {
+			float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE);
+
+			if(terminate >= probability)
+				break;
 
-		throughput /= probability;
+			throughput /= probability;
+		}
 
 #ifdef __SUBSURFACE__
 		/* bssrdf scatter to a different location on the same object, replacing
@@ -624,7 +644,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 
 			/* do bssrdf scatter step if we picked a bssrdf closure */
 			if(sc) {
-				uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
+				uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
 				subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
 			}
 		}
@@ -634,8 +654,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
 			/* todo: solve correlation */
-			float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-			float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+			float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
+			float bsdf_u = bsdf_uv.x;
+			float bsdf_v = bsdf_uv.y;
 
 			float ao_factor = kernel_data.background.ao_factor;
 			float3 ao_N;
@@ -668,10 +689,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 		if(kernel_data.integrator.use_direct_light) {
 			/* sample illumination from lights to find path contribution */
 			if(sd.flag & SD_BSDF_HAS_EVAL) {
-				float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
-				float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
-				float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
-				float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
+				float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
+#ifdef __MULTI_CLOSURE__
+				float light_o = 0.0f;
+#else
+				float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
+#endif
+				float2 light_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U);
+				float light_u = light_uv.x;
+				float light_v = light_uv.y;
 
 				Ray light_ray;
 				BsdfEval L_light;
@@ -704,8 +730,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 		BsdfEval bsdf_eval;
 		float3 bsdf_omega_in;
 		differential3 bsdf_domega_in;
-		float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-		float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+		float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
+		float bsdf_u = bsdf_uv.x;
+		float bsdf_v = bsdf_uv.y;
 		int label;
 
 		label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -740,15 +767,17 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 	}
 }
 
-__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG *rng, int sample,
+__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG rng, int sample,
 	ShaderData *sd, float3 throughput, float num_samples_adjust,
 	float min_ray_pdf, float ray_pdf, PathState state,
 	int rng_offset, PathRadiance *L, __global float *buffer)
 {
+	int aa_samples = kernel_data.integrator.aa_samples;
+
 #ifdef __AO__
 	/* ambient occlusion */
 	if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
-		int num_samples = ceil(kernel_data.integrator.ao_samples*num_samples_adjust);
+		int num_samples = ceil_to_int(kernel_data.integrator.ao_samples*num_samples_adjust);
 		float num_samples_inv = num_samples_adjust/num_samples;
 		float ao_factor = kernel_data.background.ao_factor;
 		float3 ao_N;
@@ -756,8 +785,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
 		for(int j = 0; j < num_samples; j++) {
 			/* todo: solve correlation */
-			float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
-			float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
+			float2 bsdf_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
+			float bsdf_u = bsdf_uv.x;
+			float bsdf_v = bsdf_uv.y;
 
 			float3 ao_D;
 			float ao_pdf;
@@ -798,15 +828,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
 		/* lamp sampling */
 		for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-			int num_samples = ceil(num_samples_adjust*light_select_num_samples(kg, i));
+			int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
 			float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+			RNG lamp_rng = cmj_hash(rng, i);
 
 			if(kernel_data.integrator.pdf_triangles != 0.0f)
 				num_samples_inv *= 0.5f;
 
 			for(int j = 0; j < num_samples; j++) {
-				float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
-				float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
+				float2 light_uv = path_rng_2D(kg, lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
+				float light_u = light_uv.x;
+				float light_v = light_uv.y;
 
 				if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
 					/* trace shadow ray */
@@ -822,16 +854,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
 		/* mesh light sampling */
 		if(kernel_data.integrator.pdf_triangles != 0.0f) {
-			int num_samples = ceil(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+			int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
 			float num_samples_inv = num_samples_adjust/num_samples;
 
 			if(kernel_data.integrator.num_all_lights)
 				num_samples_inv *= 0.5f;
 
 			for(int j = 0; j < num_samples; j++) {
-				float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT);
-				float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
-				float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
+				float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
+				float2 light_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
+				float light_u = light_uv.x;
+				float light_v = light_uv.y;
 
 				/* only sample triangle lights */
 				if(kernel_data.integrator.num_all_lights)
@@ -869,9 +902,10 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 		else
 			num_samples = kernel_data.integrator.transmission_samples;
 
-		num_samples = ceil(num_samples_adjust*num_samples);
+		num_samples = ceil_to_int(num_samples_adjust*num_samples);
 
 		float num_samples_inv = num_samples_adjust/num_samples;
+		RNG bsdf_rng = cmj_hash(rng, i);
 
 		for(int j = 0; j < num_samples; j++) {
 			/* sample BSDF */
@@ -879,8 +913,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 			BsdfEval bsdf_eval;
 			float3 bsdf_omega_in;
 			differential3 bsdf_domega_in;
-			float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
-			float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
+			float2 bsdf_uv = path_rng_2D(kg, bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
+			float bsdf_u = bsdf_uv.x;
+			float bsdf_v = bsdf_uv.y;
 			int label;
 
 			label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
@@ -918,7 +953,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 #endif
 
 			kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer,
-				tp*num_samples_inv, num_samples,
+				tp*num_samples_inv, num_samples, aa_samples*num_samples,
 				min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L);
 
 			/* for render passes, sum and reset indirect light pass variables
@@ -929,7 +964,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 	}
 }
 
-__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
+__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
 {
 	/* initialize */
 	PathRadiance L;
@@ -941,6 +976,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 	float ray_pdf = 0.0f;
 	PathState state;
 	int rng_offset = PRNG_BASE_NUM;
+	int aa_samples = kernel_data.integrator.aa_samples;
 
 	path_state_init(&state);
 
@@ -961,7 +997,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 			}
 
 			extmax = kernel_data.curve_kernel_data.maximum_width;
-			lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
+			lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
 		}
 
 		if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) {
@@ -990,8 +1026,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 		/* setup shading */
 		ShaderData sd;
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
-		float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
-		shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+		shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
 		shader_merge_closures(kg, &sd);
 
 		/* holdout */
@@ -1031,12 +1066,18 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 			 * mainly due to the mixed in MIS that we use. gives too many unneeded
 			 * shader evaluations, only need emission if we are going to terminate */
 			float probability = path_state_terminate_probability(kg, &state, throughput);
-			float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
 
-			if(terminate >= probability)
+			if(probability == 0.0f) {
 				break;
+			}
+			else if(probability != 1.0f) {
+				float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE);
 
-			throughput /= probability;
+				if(terminate >= probability)
+					break;
+
+				throughput /= probability;
+			}
 		}
 
 #ifdef __SUBSURFACE__
@@ -1049,7 +1090,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 					continue;
 
 				/* set up random number generator */
-				uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
+				uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
 				int num_samples = kernel_data.integrator.subsurface_samples;
 				float num_samples_inv = 1.0f/num_samples;
 
@@ -1112,22 +1153,26 @@ __device void kernel_path_trace(KernelGlobals *kg,
 
 	float filter_u;
 	float filter_v;
+	int num_samples = kernel_data.integrator.aa_samples;
 
-	path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v);
+	path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
 
 	/* sample camera ray */
 	Ray ray;
-	
+
 	float lens_u = 0.0f, lens_v = 0.0f;
-	float time = 0.0f;
-	
+
 	if(kernel_data.cam.aperturesize > 0.0f) {
-		lens_u = path_rng(kg, &rng, sample, PRNG_LENS_U);
-		lens_v = path_rng(kg, &rng, sample, PRNG_LENS_V);
+		float2 lens_uv = path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U);
+		lens_u = lens_uv.x;
+		lens_v = lens_uv.y;
 	}
+
+	float time = 0.0f;
+
 #ifdef __CAMERA_MOTION__
 	if(kernel_data.cam.shuttertime != -1.0f)
-		time = path_rng(kg, &rng, sample, PRNG_TIME);
+		time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
 #endif
 
 	camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
@@ -1139,10 +1184,10 @@ __device void kernel_path_trace(KernelGlobals *kg,
 #ifdef __NON_PROGRESSIVE__
 		if(kernel_data.integrator.progressive)
 #endif
-			L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
+			L = kernel_path_progressive(kg, rng, sample, ray, buffer);
 #ifdef __NON_PROGRESSIVE__
 		else
-			L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
+			L = kernel_path_non_progressive(kg, rng, sample, ray, buffer);
 #endif
 	}
 	else
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index fc33e226051..ecf80b817d4 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -16,6 +16,8 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
+#include "kernel_jitter.h"
+
 CCL_NAMESPACE_BEGIN
 
 typedef uint RNG;
@@ -100,10 +102,10 @@ __device uint sobol_lookup(const uint m, const uint frame, const uint ex, const
 	return index;
 }
 
-__device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
+__device_inline float path_rng(KernelGlobals *kg, RNG rng, int sample, int dimension)
 {
 #ifdef __SOBOL_FULL_SCREEN__
-	uint result = sobol_dimension(kg, *rng, dimension);
+	uint result = sobol_dimension(kg, rng, dimension);
 	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 	return r;
 #else
@@ -115,15 +117,44 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime
 	float shift;
 
 	if(dimension & 1)
-		shift = (*rng >> 16)/((float)0xFFFF);
+		shift = (rng >> 16)*(1.0f/(float)0xFFFF);
 	else
-		shift = (*rng & 0xFFFF)/((float)0xFFFF);
+		shift = (rng & 0xFFFF)*(1.0f/(float)0xFFFF);
 
 	return r + shift - floorf(r + shift);
 #endif
 }
 
-__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+{
+#ifdef __CMJ__
+	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+		/* correlated multi-jittered */
+		int p = rng + dimension;
+		return cmj_sample_1D(sample, num_samples, p);
+	}
+#endif
+
+	/* sobol */
+	return path_rng(kg, rng, sample, dimension);
+}
+
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+{
+#ifdef __CMJ__
+	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+		/* correlated multi-jittered */
+		int p = rng + dimension;
+		return cmj_sample_2D(sample, num_samples, p);
+	}
+#endif
+
+	/* sobol */
+	return make_float2(path_rng(kg, rng, sample, dimension),
+	                   path_rng(kg, rng, sample, dimension + 1));
+}
+
+__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
 #ifdef __SOBOL_FULL_SCREEN__
 	uint px, py;
@@ -153,8 +184,10 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state,
 		*fy = 0.5f;
 	}
 	else {
-		*fx = path_rng(kg, rng, sample, PRNG_FILTER_U);
-		*fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
+		float2 fxy = path_rng_2D(kg, *rng, sample, num_samples, PRNG_FILTER_U);
+
+		*fx = fxy.x;
+		*fy = fxy.y;
 	}
 #endif
 }
@@ -168,14 +201,25 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng)
 
 /* Linear Congruential Generator */
 
-__device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
+__device float path_rng(KernelGlobals *kg, RNG& rng, int sample, int dimension)
 {
 	/* implicit mod 2^32 */
-	*rng = (1103515245*(*rng) + 12345);
-	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+	rng = (1103515245*(rng) + 12345);
+	return (float)rng * (1.0f/(float)0xFFFFFFFF);
 }
 
-__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
+{
+	return path_rng(kg, rng, sample, dimension);
+}
+
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
+{
+	return make_float2(path_rng(kg, rng, sample, dimension),
+	                   path_rng(kg, rng, sample, dimension + 1));
+}
+
+__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
 	/* load state */
 	*rng = *rng_state;
@@ -187,8 +231,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam
 		*fy = 0.5f;
 	}
 	else {
-		*fx = path_rng(kg, rng, sample, PRNG_FILTER_U);
-		*fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
+		float2 fxy = path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U);
+
+		*fx = fxy.x;
+		*fy = fxy.y;
 	}
 }
 
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index f165a1f3839..abdb609b55f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -117,6 +117,7 @@ CCL_NAMESPACE_BEGIN
 #define __CAMERA_CLIPPING__
 #define __INTERSECTION_REFINE__
 #define __CLAMP_SAMPLE__
+#define __CMJ__
 
 #ifdef __KERNEL_SHADING__
 #define __SVM__
@@ -164,8 +165,10 @@ enum PathTraceDimension {
 	PRNG_LENS_V = 3,
 #ifdef __CAMERA_MOTION__
 	PRNG_TIME = 4,
-	PRNG_UNUSED = 5,
-	PRNG_BASE_NUM = 6,
+	PRNG_UNUSED_0 = 5,
+	PRNG_UNUSED_1 = 6,	/* for some reason (6, 7) is a bad sobol pattern */
+	PRNG_UNUSED_2 = 7,  /* with a low number of samples (< 64) */
+	PRNG_BASE_NUM = 8,
 #else
 	PRNG_BASE_NUM = 4,
 #endif
@@ -181,6 +184,11 @@ enum PathTraceDimension {
 	PRNG_BOUNCE_NUM = 8
 };
 
+enum SamplingPattern {
+	SAMPLING_PATTERN_SOBOL = 0,
+	SAMPLING_PATTERN_CMJ = 1
+};
+
 /* these flags values correspond to raytypes in osl.cpp, so keep them in sync!
  *
  * for ray visibility tests in BVH traversal, the upper 20 bits are used for
@@ -728,6 +736,7 @@ typedef struct KernelIntegrator {
 
 	/* non-progressive */
 	int progressive;
+	int aa_samples;
 	int diffuse_samples;
 	int glossy_samples;
 	int transmission_samples;
@@ -736,7 +745,11 @@ typedef struct KernelIntegrator {
 	int use_lamp_mis;
 	int subsurface_samples;
 
-	int pad1, pad2, pad3;
+	/* sampler */
+	int sampling_pattern;
+
+	/* padding */
+	int pad;
 } KernelIntegrator;
 
 typedef struct KernelBVH {
author	Brecht Van Lommel <brechtvanlommel@pandora.be>	2013-06-07 20:06:22 +0400
committer	Brecht Van Lommel <brechtvanlommel@pandora.be>	2013-06-07 20:06:22 +0400
commit	b20a7e01d046b95a79663da1a8072358709a5a8b (patch)
tree	7772c50756a7d5a3352fd25eadbe8d41aa776615 /intern/cycles/kernel
parent	d835d2f4e65ae3bf8e22f8d3d60e7e6ea7a6b4ca (diff)