Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-06 16:40:09 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-06 17:39:04 +0400
commite4e58d46128dc7fe4fb9b881d73b38173f00f5c3 (patch)
treecc38ac39838bec84d28de396374ba022139a8aa2
parent553264ff8e20484d0b91bb468f56aa1b7144f7aa (diff)
Fix T40370: cycles CUDA baking timeout with high number of AA samples.
Now baking does one AA sample at a time, just like final render. There is also some code for shader antialiasing that solves T40369 but it is disabled for now because there may be unpredictable side effects.
-rw-r--r--intern/cycles/blender/blender_session.cpp22
-rw-r--r--intern/cycles/device/device_cpu.cpp15
-rw-r--r--intern/cycles/device/device_cuda.cpp51
-rw-r--r--intern/cycles/device/device_opencl.cpp21
-rw-r--r--intern/cycles/kernel/kernel.cl8
-rw-r--r--intern/cycles/kernel/kernel.cpp6
-rw-r--r--intern/cycles/kernel/kernel.cu8
-rw-r--r--intern/cycles/kernel/kernel.h10
-rw-r--r--intern/cycles/kernel/kernel_avx.cpp6
-rw-r--r--intern/cycles/kernel/kernel_bake.h202
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp6
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp6
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp6
-rw-r--r--intern/cycles/render/bake.cpp33
-rw-r--r--intern/cycles/render/bake.h8
-rw-r--r--intern/cycles/render/light.cpp1
-rw-r--r--intern/cycles/render/mesh_displace.cpp1
17 files changed, 245 insertions, 165 deletions
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 13d4041f6a7..0f31e55d60d 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -492,26 +492,6 @@ static void populate_bake_data(BakeData *data, BL::BakePixel pixel_array, const
}
}
-static bool is_light_pass(ShaderEvalType type)
-{
- switch (type) {
- case SHADER_EVAL_AO:
- case SHADER_EVAL_COMBINED:
- case SHADER_EVAL_SHADOW:
- case SHADER_EVAL_DIFFUSE_DIRECT:
- case SHADER_EVAL_GLOSSY_DIRECT:
- case SHADER_EVAL_TRANSMISSION_DIRECT:
- case SHADER_EVAL_SUBSURFACE_DIRECT:
- case SHADER_EVAL_DIFFUSE_INDIRECT:
- case SHADER_EVAL_GLOSSY_INDIRECT:
- case SHADER_EVAL_TRANSMISSION_INDIRECT:
- case SHADER_EVAL_SUBSURFACE_INDIRECT:
- return true;
- default:
- return false;
- }
-}
-
void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float result[])
{
ShaderEvalType shader_type = get_shader_type(pass_type);
@@ -529,7 +509,7 @@ void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::Bake
Pass::add(PASS_UV, scene->film->passes);
}
- if(is_light_pass(shader_type)) {
+ if(BakeManager::is_light_pass(shader_type)) {
/* force use_light_pass to be true */
Pass::add(PASS_LIGHT, scene->film->passes);
}
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index b0739dd20b4..71bf2d23d6e 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -393,7 +393,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -404,7 +405,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -415,7 +417,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -426,7 +429,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -436,7 +440,8 @@ public:
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0429bfc6e97..0aa09ac5383 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -764,40 +764,45 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
- /* pass in parameters */
- int offset = 0;
+ for(int sample = 0; sample < task.num_samples; sample++) {
+ /* pass in parameters */
+ int offset = 0;
- cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
- offset += sizeof(d_input);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
+ offset += sizeof(d_input);
- cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
- offset += sizeof(d_output);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
+ offset += sizeof(d_output);
- int shader_eval_type = task.shader_eval_type;
- offset = align_up(offset, __alignof(shader_eval_type));
+ int shader_eval_type = task.shader_eval_type;
+ offset = align_up(offset, __alignof(shader_eval_type));
- cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
- offset += sizeof(task.shader_eval_type);
+ cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
+ offset += sizeof(task.shader_eval_type);
- cuda_assert(cuParamSeti(cuShader, offset, shader_x));
- offset += sizeof(shader_x);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_x));
+ offset += sizeof(shader_x);
- cuda_assert(cuParamSeti(cuShader, offset, shader_w));
- offset += sizeof(shader_w);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_w));
+ offset += sizeof(shader_w);
- cuda_assert(cuParamSetSize(cuShader, offset));
+ cuda_assert(cuParamSeti(cuShader, offset, sample));
+ offset += sizeof(sample);
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+ cuda_assert(cuParamSetSize(cuShader, offset));
- int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
- cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
- cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+ int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
- cuda_assert(cuCtxSynchronize());
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
+ cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+
+ cuda_assert(cuCtxSynchronize());
+ }
}
cuda_pop_context();
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index f841daba124..abfe445414a 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1067,19 +1067,24 @@ public:
else
kernel = ckShaderKernel;
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
+ for(int sample = 0; sample < task.num_samples; sample++) {
+ cl_int d_sample = task.sample;
+
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &narg, #name);
+ set_kernel_arg_mem(kernel, &narg, #name);
#include "kernel_textures.h"
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample));
- enqueue_kernel(kernel, task.shader_w, 1);
+ enqueue_kernel(kernel, task.shader_w, 1);
+ }
}
void thread_run(DeviceTask *task)
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index d7d3438036e..2e0a49435a8 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw)
+ int type, int sx, int sw, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -128,7 +128,7 @@ __kernel void kernel_ocl_shader(
int x = sx + get_global_id(0);
if(x < sx + sw)
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
}
__kernel void kernel_ocl_bake(
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw)
+ int type, int sx, int sw, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake(
int x = sx + get_global_id(0);
if(x < sx + sw)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
}
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index a535659b3b1..42eb9a62518 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -120,12 +120,12 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
/* Shader Evaluation */
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 12273cc2853..83b1381a0f5 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -146,22 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw)
- kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
}
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw)
- kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
}
#endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index c4a08646bab..b169b15b9b5 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
@@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
@@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
@@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index 7d354de16d2..f5e1b8a7bb7 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -67,12 +67,12 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
/* Shader Evaluate */
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index e31090fe9ba..e8845e03acb 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -17,108 +17,114 @@
CCL_NAMESPACE_BEGIN
ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
- const bool is_combined, const bool is_ao, const bool is_sss)
+ const bool is_combined, const bool is_ao, const bool is_sss, int sample)
{
- int samples = kernel_data.integrator.aa_samples;
-
/* initialize master radiance accumulator */
kernel_assert(kernel_data.film.use_light_pass);
path_radiance_init(L, kernel_data.film.use_light_pass);
- /* take multiple samples */
- for(int sample = 0; sample < samples; sample++) {
- PathRadiance L_sample;
- PathState state;
- Ray ray;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- bool is_sss_sample = is_sss;
+ PathRadiance L_sample;
+ PathState state;
+ Ray ray;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ bool is_sss_sample = is_sss;
- /* init radiance */
- path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+ /* init radiance */
+ path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
- /* init path state */
- path_state_init(kg, &state, &rng, sample);
- state.num_samples = samples;
+ /* init path state */
+ path_state_init(kg, &state, &rng, sample);
+ state.num_samples = kernel_data.integrator.aa_samples;
- /* evaluate surface shader */
- float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
- shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+ /* evaluate surface shader */
+ float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
+ shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
- /* TODO, disable the closures we won't need */
+ /* TODO, disable the closures we won't need */
#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched) {
- /* regular path tracer */
+ if(!kernel_data.integrator.branched) {
+ /* regular path tracer */
#endif
- /* sample ambient occlusion */
- if(is_combined || is_ao) {
- kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
- }
+ /* sample ambient occlusion */
+ if(is_combined || is_ao) {
+ kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ }
#ifdef __SUBSURFACE__
- /* sample subsurface scattering */
- if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
- is_sss_sample = true;
- }
+ /* sample subsurface scattering */
+ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+ is_sss_sample = true;
+ }
#endif
- /* sample light and BSDF */
- if((!is_sss_sample) && (!is_ao)) {
+ /* sample light and BSDF */
+ if((!is_sss_sample) && (!is_ao)) {
- if(sd->flag & SD_EMISSION) {
- float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
- }
+ if(sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
+ }
- if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
+ if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__
- state.ray_t = 0.0f;
+ state.ray_t = 0.0f;
#endif
- /* compute indirect light */
- kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample);
+ /* compute indirect light */
+ kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
- /* sum and reset indirect light pass variables for the next samples */
- path_radiance_sum_indirect(&L_sample);
- path_radiance_reset_indirect(&L_sample);
- }
+ /* sum and reset indirect light pass variables for the next samples */
+ path_radiance_sum_indirect(&L_sample);
+ path_radiance_reset_indirect(&L_sample);
}
-#ifdef __BRANCHED_PATH__
}
- else {
- /* branched path tracer */
+#ifdef __BRANCHED_PATH__
+ }
+ else {
+ /* branched path tracer */
- /* sample ambient occlusion */
- if(is_combined || is_ao) {
- kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
- }
+ /* sample ambient occlusion */
+ if(is_combined || is_ao) {
+ kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ }
#ifdef __SUBSURFACE__
- /* sample subsurface scattering */
- if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput);
- }
+ /* sample subsurface scattering */
+ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput);
+ }
#endif
- /* sample light and BSDF */
- if((!is_sss_sample) && (!is_ao)) {
-
- if(sd->flag & SD_EMISSION) {
- float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
- }
+ /* sample light and BSDF */
+ if((!is_sss_sample) && (!is_ao)) {
- kernel_branched_path_integrate_lighting(kg, &rng,
- sd, throughput, 1.0f, &state, &L_sample);
+ if(sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
}
+
+ kernel_branched_path_integrate_lighting(kg, &rng,
+ sd, throughput, 1.0f, &state, &L_sample);
}
+ }
#endif
- /* accumulate into master L */
- path_radiance_accum_sample(L, &L_sample, samples);
+ /* accumulate into master L */
+ path_radiance_accum_sample(L, &L_sample, 1);
+}
+
+ccl_device bool is_aa_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_UV:
+ case SHADER_EVAL_NORMAL:
+ return false;
+ default:
+ return true;
}
}
@@ -142,7 +148,20 @@ ccl_device bool is_light_pass(ShaderEvalType type)
}
}
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+#if 0
+ccl_device_inline float bake_clamp_mirror_repeat(float u)
+{
+ /* use mirror repeat (like opengl texture) so that if the barycentric
+ * coordinate goes past the end of the triangle it is not always clamped
+ * to the same value, gives ugly patterns */
+ float fu = floorf(u);
+ u = u - fu;
+
+ return (((int)fu) & 1)? 1.0f - u: u;
+}
+#endif
+
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
{
ShaderData sd;
uint4 in = input[i * 2];
@@ -164,6 +183,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
float dvdx = __uint_as_float(diff.z);
float dvdy = __uint_as_float(diff.w);
+ int num_samples = kernel_data.integrator.aa_samples;
+
+ /* random number generator */
+ RNG rng = cmj_hash(i, 0);
+
+#if 0
+ uint rng_state = cmj_hash(i, 0);
+ float filter_x, filter_y;
+ path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y);
+
+ /* subpixel u/v offset */
+ if(sample > 0) {
+ u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f));
+ v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f));
+ }
+#endif
+
+ /* triangle */
int shader;
float3 P, Ng;
@@ -190,12 +227,14 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
sd.dv.dx = dvdx;
sd.dv.dy = dvdy;
+ /* light passes */
if(is_light_pass(type)) {
- RNG rng = cmj_hash(i, 0);
- compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED),
- (type == SHADER_EVAL_AO),
- (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
- type == SHADER_EVAL_SUBSURFACE_INDIRECT));
+ compute_light_pass(kg, &sd, &L, rng,
+ (type == SHADER_EVAL_COMBINED),
+ (type == SHADER_EVAL_AO),
+ (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
+ type == SHADER_EVAL_SUBSURFACE_INDIRECT),
+ sample);
}
switch (type) {
@@ -350,11 +389,15 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
}
/* write output */
- output[i] = make_float4(out.x, out.y, out.z, 1.0f);
- return;
+ float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+
+ if(sample == 0)
+ output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+ else
+ output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
}
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
{
ShaderData sd;
uint4 in = input[i];
@@ -401,7 +444,10 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu
}
/* write output */
- output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ if(sample == 0)
+ output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ else
+ output[i] += make_float4(out.x, out.y, out.z, 0.0f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 3b5faea2994..67bd0943b1b 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -64,12 +64,12 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 3b18b164ffd..40d621b66f6 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -66,12 +66,12 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index a3731d790f4..4b48d10b020 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -67,12 +67,12 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
/* Shader Evaluate */
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index aa317ab672f..c68f6e1f08e 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -15,6 +15,7 @@
*/
#include "bake.h"
+#include "integrator.h"
CCL_NAMESPACE_BEGIN
@@ -152,6 +153,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
task.shader_eval_type = shader_type;
task.shader_x = 0;
task.shader_w = d_output.size();
+ task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
device->task_add(task);
@@ -203,4 +205,35 @@ void BakeManager::device_free(Device *device, DeviceScene *dscene)
{
}
+bool BakeManager::is_aa_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_UV:
+ case SHADER_EVAL_NORMAL:
+ return false;
+ default:
+ return true;
+ }
+}
+
+bool BakeManager::is_light_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_AO:
+ case SHADER_EVAL_COMBINED:
+ case SHADER_EVAL_SHADOW:
+ case SHADER_EVAL_DIFFUSE_DIRECT:
+ case SHADER_EVAL_GLOSSY_DIRECT:
+ case SHADER_EVAL_TRANSMISSION_DIRECT:
+ case SHADER_EVAL_SUBSURFACE_DIRECT:
+ case SHADER_EVAL_DIFFUSE_INDIRECT:
+ case SHADER_EVAL_GLOSSY_INDIRECT:
+ case SHADER_EVAL_TRANSMISSION_INDIRECT:
+ case SHADER_EVAL_SUBSURFACE_INDIRECT:
+ return true;
+ default:
+ return false;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index ea403f7d39a..f91ba589b8b 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -17,10 +17,11 @@
#ifndef __BAKE_H__
#define __BAKE_H__
-#include "util_vector.h"
#include "device.h"
#include "scene.h"
-#include "session.h"
+
+#include "util_progress.h"
+#include "util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -64,6 +65,9 @@ public:
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
void device_free(Device *device, DeviceScene *dscene);
+ static bool is_light_pass(ShaderEvalType type);
+ static bool is_aa_pass(ShaderEvalType type);
+
bool need_update;
private:
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 1325627ef05..9a0a7ead696 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -66,6 +66,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
main_task.shader_eval_type = SHADER_EVAL_BACKGROUND;
main_task.shader_x = 0;
main_task.shader_w = width*height;
+ main_task.num_samples = 1;
main_task.get_cancel = function_bind(&Progress::get_cancel, &progress);
/* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index 661fd9c66c1..4c0ee76299c 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -119,6 +119,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
task.shader_eval_type = SHADER_EVAL_DISPLACE;
task.shader_x = 0;
task.shader_w = d_output.size();
+ task.num_samples = 1;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
device->task_add(task);