Fix T40370: cycles CUDA baking timeout with high number of AA samples.

Now baking does one AA sample at a time, just like final render. There is also some code for shader antialiasing that solves T40369 but it is disabled for now because there may be unpredictable side effects.
author: Brecht Van Lommel <brechtvanlommel@gmail.com> 2014-06-06 16:40:09 +0400
committer: Brecht Van Lommel <brechtvanlommel@gmail.com> 2014-06-06 17:39:04 +0400
commit: e4e58d46128dc7fe4fb9b881d73b38173f00f5c3 (patch)
tree: cc38ac39838bec84d28de396374ba022139a8aa2 /intern/cycles/kernel
parent: 553264ff8e20484d0b91bb468f56aa1b7144f7aa (diff)
9 files changed, 152 insertions, 106 deletions
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index d7d3438036e..2e0a49435a8 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader(
 	ccl_global type *name,
 #include "kernel_textures.h"
 
-	int type, int sx, int sw)
+	int type, int sx, int sw, int sample)
 {
 	KernelGlobals kglobals, *kg = &kglobals;
 
@@ -128,7 +128,7 @@ __kernel void kernel_ocl_shader(
 	int x = sx + get_global_id(0);
 
 	if(x < sx + sw)
-		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x);
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
 }
 
 __kernel void kernel_ocl_bake(
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake(
 	ccl_global type *name,
 #include "kernel_textures.h"
 
-	int type, int sx, int sw)
+	int type, int sx, int sw, int sample)
 {
 	KernelGlobals kglobals, *kg = &kglobals;
 
@@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake(
 	int x = sx + get_global_id(0);
 
 	if(x < sx + sw)
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x);
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
 }
 
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index a535659b3b1..42eb9a62518 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -120,12 +120,12 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
 
 /* Shader Evaluation */
 
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
 {
 	if(type >= SHADER_EVAL_BAKE)
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 	else
-		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 12273cc2853..83b1381a0f5 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -146,22 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
 {
 	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
 
 	if(x < sx + sw)
-		kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+		kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
 }
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
 {
 	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
 
 	if(x < sx + sw)
-		kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+		kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
 }
 
 #endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index c4a08646bab..b169b15b9b5 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
 void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
 	float sample_scale, int x, int y, int offset, int stride);
 void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
-	int type, int i);
+	int type, int i, int sample);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
@@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
 void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
 	float sample_scale, int x, int y, int offset, int stride);
 void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
-	int type, int i);
+	int type, int i, int sample);
 #endif
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
@@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
 void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
 	float sample_scale, int x, int y, int offset, int stride);
 void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
-	int type, int i);
+	int type, int i, int sample);
 #endif
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu
 void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
 	float sample_scale, int x, int y, int offset, int stride);
 void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
-	int type, int i);
+	int type, int i, int sample);
 #endif
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
@@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff
 void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
 	float sample_scale, int x, int y, int offset, int stride);
 void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
-	int type, int i);
+	int type, int i, int sample);
 #endif
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index 7d354de16d2..f5e1b8a7bb7 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -67,12 +67,12 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
 
 /* Shader Evaluate */
 
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
 {
 	if(type >= SHADER_EVAL_BAKE)
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 	else
-		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index e31090fe9ba..e8845e03acb 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -17,108 +17,114 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
-                                   const bool is_combined, const bool is_ao, const bool is_sss)
+                                   const bool is_combined, const bool is_ao, const bool is_sss, int sample)
 {
-	int samples = kernel_data.integrator.aa_samples;
-
 	/* initialize master radiance accumulator */
 	kernel_assert(kernel_data.film.use_light_pass);
 	path_radiance_init(L, kernel_data.film.use_light_pass);
 
-	/* take multiple samples */
-	for(int sample = 0; sample < samples; sample++) {
-		PathRadiance L_sample;
-		PathState state;
-		Ray ray;
-		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-		bool is_sss_sample = is_sss;
+	PathRadiance L_sample;
+	PathState state;
+	Ray ray;
+	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+	bool is_sss_sample = is_sss;
 
-		/* init radiance */
-		path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+	/* init radiance */
+	path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
 
-		/* init path state */
-		path_state_init(kg, &state, &rng, sample);
-		state.num_samples = samples;
+	/* init path state */
+	path_state_init(kg, &state, &rng, sample);
+	state.num_samples = kernel_data.integrator.aa_samples;
 
-		/* evaluate surface shader */
-		float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
-		shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+	/* evaluate surface shader */
+	float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
+	shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
 
-		/* TODO, disable the closures we won't need */
+	/* TODO, disable the closures we won't need */
 
 #ifdef __BRANCHED_PATH__
-		if(!kernel_data.integrator.branched) {
-			/* regular path tracer */
+	if(!kernel_data.integrator.branched) {
+		/* regular path tracer */
 #endif
 
-			/* sample ambient occlusion */
-			if(is_combined || is_ao) {
-				kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
-			}
+		/* sample ambient occlusion */
+		if(is_combined || is_ao) {
+			kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+		}
 
 #ifdef __SUBSURFACE__
-			/* sample subsurface scattering */
-			if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
-				/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-				if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
-					is_sss_sample = true;
-			}
+		/* sample subsurface scattering */
+		if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+			if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+				is_sss_sample = true;
+		}
 #endif
 
-			/* sample light and BSDF */
-			if((!is_sss_sample) && (!is_ao)) {
+		/* sample light and BSDF */
+		if((!is_sss_sample) && (!is_ao)) {
 
-				if(sd->flag & SD_EMISSION) {
-					float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
-					path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
-				}
+			if(sd->flag & SD_EMISSION) {
+				float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+				path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
+			}
 
-				if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
+			if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
 #ifdef __LAMP_MIS__
-					state.ray_t = 0.0f;
+				state.ray_t = 0.0f;
 #endif
-					/* compute indirect light */
-					kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample);
+				/* compute indirect light */
+				kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
 
-					/* sum and reset indirect light pass variables for the next samples */
-					path_radiance_sum_indirect(&L_sample);
-					path_radiance_reset_indirect(&L_sample);
-				}
+				/* sum and reset indirect light pass variables for the next samples */
+				path_radiance_sum_indirect(&L_sample);
+				path_radiance_reset_indirect(&L_sample);
 			}
-#ifdef __BRANCHED_PATH__
 		}
-		else {
-			/* branched path tracer */
+#ifdef __BRANCHED_PATH__
+	}
+	else {
+		/* branched path tracer */
 
-			/* sample ambient occlusion */
-			if(is_combined || is_ao) {
-				kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
-			}
+		/* sample ambient occlusion */
+		if(is_combined || is_ao) {
+			kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+		}
 
 #ifdef __SUBSURFACE__
-			/* sample subsurface scattering */
-			if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
-				/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-				kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput);
-			}
+		/* sample subsurface scattering */
+		if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+			kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput);
+		}
 #endif
 
-			/* sample light and BSDF */
-			if((!is_sss_sample) && (!is_ao)) {
-
-				if(sd->flag & SD_EMISSION) {
-					float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
-					path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
-				}
+		/* sample light and BSDF */
+		if((!is_sss_sample) && (!is_ao)) {
 
-				kernel_branched_path_integrate_lighting(kg, &rng,
-					sd, throughput, 1.0f, &state, &L_sample);
+			if(sd->flag & SD_EMISSION) {
+				float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+				path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
 			}
+
+			kernel_branched_path_integrate_lighting(kg, &rng,
+				sd, throughput, 1.0f, &state, &L_sample);
 		}
+	}
 #endif
 
-		/* accumulate into master L */
-		path_radiance_accum_sample(L, &L_sample, samples);
+	/* accumulate into master L */
+	path_radiance_accum_sample(L, &L_sample, 1);
+}
+
+ccl_device bool is_aa_pass(ShaderEvalType type)
+{
+	switch(type) {
+		case SHADER_EVAL_UV:
+		case SHADER_EVAL_NORMAL:
+			return false;
+		default:
+			return true;
 	}
 }
 
@@ -142,7 +148,20 @@ ccl_device bool is_light_pass(ShaderEvalType type)
 	}
 }
 
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+#if 0
+ccl_device_inline float bake_clamp_mirror_repeat(float u)
+{
+	/* use mirror repeat (like opengl texture) so that if the barycentric
+	 * coordinate goes past the end of the triangle it is not always clamped
+	 * to the same value, gives ugly patterns */
+	float fu = floorf(u);
+	u = u - fu;
+
+	return (((int)fu) & 1)? 1.0f - u: u;
+}
+#endif
+
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
 {
 	ShaderData sd;
 	uint4 in = input[i * 2];
@@ -164,6 +183,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 	float dvdx = __uint_as_float(diff.z);
 	float dvdy = __uint_as_float(diff.w);
 
+	int num_samples = kernel_data.integrator.aa_samples;
+
+	/* random number generator */
+	RNG rng = cmj_hash(i, 0);
+
+#if 0
+	uint rng_state = cmj_hash(i, 0);
+	float filter_x, filter_y;
+	path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y);
+
+	/* subpixel u/v offset */
+	if(sample > 0) {
+		u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f));
+		v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f));
+	}
+#endif
+
+	/* triangle */
 	int shader;
 	float3 P, Ng;
 
@@ -190,12 +227,14 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 	sd.dv.dx = dvdx;
 	sd.dv.dy = dvdy;
 
+	/* light passes */
 	if(is_light_pass(type)) {
-		RNG rng = cmj_hash(i, 0);
-		compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED),
-		                                     (type == SHADER_EVAL_AO),
-		                                     (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
-		                                      type == SHADER_EVAL_SUBSURFACE_INDIRECT));
+		compute_light_pass(kg, &sd, &L, rng,
+		                   (type == SHADER_EVAL_COMBINED),
+		                   (type == SHADER_EVAL_AO),
+		                   (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
+		                    type == SHADER_EVAL_SUBSURFACE_INDIRECT),
+		                   sample);
 	}
 
 	switch (type) {
@@ -350,11 +389,15 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 	}
 
 	/* write output */
-	output[i] = make_float4(out.x, out.y, out.z, 1.0f);
-	return;
+	float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+
+	if(sample == 0)
+		output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+	else
+		output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
 }
 
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
 {
 	ShaderData sd;
 	uint4 in = input[i];
@@ -401,7 +444,10 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu
 	}
 	
 	/* write output */
-	output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+	if(sample == 0)
+		output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+	else
+		output[i] += make_float4(out.x, out.y, out.z, 0.0f);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 3b5faea2994..67bd0943b1b 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -64,12 +64,12 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
 
 /* Shader Evaluate */
 
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
 {
 	if(type >= SHADER_EVAL_BAKE)
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 	else
-		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 3b18b164ffd..40d621b66f6 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -66,12 +66,12 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
 
 /* Shader Evaluate */
 
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
 {
 	if(type >= SHADER_EVAL_BAKE)
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 	else
-		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index a3731d790f4..4b48d10b020 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -67,12 +67,12 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
 
 /* Shader Evaluate */
 
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
 {
 	if(type >= SHADER_EVAL_BAKE)
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 	else
-		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
 }
 
 CCL_NAMESPACE_END
author	Brecht Van Lommel <brechtvanlommel@gmail.com>	2014-06-06 16:40:09 +0400
committer	Brecht Van Lommel <brechtvanlommel@gmail.com>	2014-06-06 17:39:04 +0400
commit	e4e58d46128dc7fe4fb9b881d73b38173f00f5c3 (patch)
tree	cc38ac39838bec84d28de396374ba022139a8aa2 /intern/cycles/kernel
parent	553264ff8e20484d0b91bb468f56aa1b7144f7aa (diff)