Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-06 16:40:09 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-06 17:39:04 +0400
commite4e58d46128dc7fe4fb9b881d73b38173f00f5c3 (patch)
treecc38ac39838bec84d28de396374ba022139a8aa2 /intern
parent553264ff8e20484d0b91bb468f56aa1b7144f7aa (diff)
Fix T40370: cycles CUDA baking timeout with high number of AA samples.
Now baking does one AA sample at a time, just like final render. There is also some code for shader antialiasing that solves T40369 but it is disabled for now because there may be unpredictable side effects.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/blender_session.cpp22
-rw-r--r--intern/cycles/device/device_cpu.cpp15
-rw-r--r--intern/cycles/device/device_cuda.cpp51
-rw-r--r--intern/cycles/device/device_opencl.cpp21
-rw-r--r--intern/cycles/kernel/kernel.cl8
-rw-r--r--intern/cycles/kernel/kernel.cpp6
-rw-r--r--intern/cycles/kernel/kernel.cu8
-rw-r--r--intern/cycles/kernel/kernel.h10
-rw-r--r--intern/cycles/kernel/kernel_avx.cpp6
-rw-r--r--intern/cycles/kernel/kernel_bake.h202
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp6
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp6
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp6
-rw-r--r--intern/cycles/render/bake.cpp33
-rw-r--r--intern/cycles/render/bake.h8
-rw-r--r--intern/cycles/render/light.cpp1
-rw-r--r--intern/cycles/render/mesh_displace.cpp1
17 files changed, 245 insertions, 165 deletions
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 13d4041f6a7..0f31e55d60d 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -492,26 +492,6 @@ static void populate_bake_data(BakeData *data, BL::BakePixel pixel_array, const
}
}
-static bool is_light_pass(ShaderEvalType type)
-{
- switch (type) {
- case SHADER_EVAL_AO:
- case SHADER_EVAL_COMBINED:
- case SHADER_EVAL_SHADOW:
- case SHADER_EVAL_DIFFUSE_DIRECT:
- case SHADER_EVAL_GLOSSY_DIRECT:
- case SHADER_EVAL_TRANSMISSION_DIRECT:
- case SHADER_EVAL_SUBSURFACE_DIRECT:
- case SHADER_EVAL_DIFFUSE_INDIRECT:
- case SHADER_EVAL_GLOSSY_INDIRECT:
- case SHADER_EVAL_TRANSMISSION_INDIRECT:
- case SHADER_EVAL_SUBSURFACE_INDIRECT:
- return true;
- default:
- return false;
- }
-}
-
void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float result[])
{
ShaderEvalType shader_type = get_shader_type(pass_type);
@@ -529,7 +509,7 @@ void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::Bake
Pass::add(PASS_UV, scene->film->passes);
}
- if(is_light_pass(shader_type)) {
+ if(BakeManager::is_light_pass(shader_type)) {
/* force use_light_pass to be true */
Pass::add(PASS_LIGHT, scene->film->passes);
}
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index b0739dd20b4..71bf2d23d6e 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -393,7 +393,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -404,7 +405,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -415,7 +417,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -426,7 +429,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -436,7 +440,8 @@ public:
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0429bfc6e97..0aa09ac5383 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -764,40 +764,45 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
- /* pass in parameters */
- int offset = 0;
+ for(int sample = 0; sample < task.num_samples; sample++) {
+ /* pass in parameters */
+ int offset = 0;
- cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
- offset += sizeof(d_input);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
+ offset += sizeof(d_input);
- cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
- offset += sizeof(d_output);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
+ offset += sizeof(d_output);
- int shader_eval_type = task.shader_eval_type;
- offset = align_up(offset, __alignof(shader_eval_type));
+ int shader_eval_type = task.shader_eval_type;
+ offset = align_up(offset, __alignof(shader_eval_type));
- cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
- offset += sizeof(task.shader_eval_type);
+ cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
+ offset += sizeof(task.shader_eval_type);
- cuda_assert(cuParamSeti(cuShader, offset, shader_x));
- offset += sizeof(shader_x);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_x));
+ offset += sizeof(shader_x);
- cuda_assert(cuParamSeti(cuShader, offset, shader_w));
- offset += sizeof(shader_w);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_w));
+ offset += sizeof(shader_w);
- cuda_assert(cuParamSetSize(cuShader, offset));
+ cuda_assert(cuParamSeti(cuShader, offset, sample));
+ offset += sizeof(sample);
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+ cuda_assert(cuParamSetSize(cuShader, offset));
- int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
- cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
- cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+ int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
- cuda_assert(cuCtxSynchronize());
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
+ cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+
+ cuda_assert(cuCtxSynchronize());
+ }
}
cuda_pop_context();
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index f841daba124..abfe445414a 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1067,19 +1067,24 @@ public:
else
kernel = ckShaderKernel;
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
+ for(int sample = 0; sample < task.num_samples; sample++) {
+ cl_int d_sample = task.sample;
+
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &narg, #name);
+ set_kernel_arg_mem(kernel, &narg, #name);
#include "kernel_textures.h"
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample));
- enqueue_kernel(kernel, task.shader_w, 1);
+ enqueue_kernel(kernel, task.shader_w, 1);
+ }
}
void thread_run(DeviceTask *task)
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index d7d3438036e..2e0a49435a8 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw)
+ int type, int sx, int sw, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -128,7 +128,7 @@ __kernel void kernel_ocl_shader(
int x = sx + get_global_id(0);
if(x < sx + sw)
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
}
__kernel void kernel_ocl_bake(
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw)
+ int type, int sx, int sw, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake(
int x = sx + get_global_id(0);
if(x < sx + sw)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
}
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index a535659b3b1..42eb9a62518 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -120,12 +120,12 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
/* Shader Evaluation */
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 12273cc2853..83b1381a0f5 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -146,22 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw)
- kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
}
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw)
- kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
}
#endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index c4a08646bab..b169b15b9b5 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
@@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
@@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
@@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int sample);
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index 7d354de16d2..f5e1b8a7bb7 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -67,12 +67,12 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
/* Shader Evaluate */
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index e31090fe9ba..e8845e03acb 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -17,108 +17,114 @@
CCL_NAMESPACE_BEGIN
ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
- const bool is_combined, const bool is_ao, const bool is_sss)
+ const bool is_combined, const bool is_ao, const bool is_sss, int sample)
{
- int samples = kernel_data.integrator.aa_samples;
-
/* initialize master radiance accumulator */
kernel_assert(kernel_data.film.use_light_pass);
path_radiance_init(L, kernel_data.film.use_light_pass);
- /* take multiple samples */
- for(int sample = 0; sample < samples; sample++) {
- PathRadiance L_sample;
- PathState state;
- Ray ray;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- bool is_sss_sample = is_sss;
+ PathRadiance L_sample;
+ PathState state;
+ Ray ray;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ bool is_sss_sample = is_sss;
- /* init radiance */
- path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+ /* init radiance */
+ path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
- /* init path state */
- path_state_init(kg, &state, &rng, sample);
- state.num_samples = samples;
+ /* init path state */
+ path_state_init(kg, &state, &rng, sample);
+ state.num_samples = kernel_data.integrator.aa_samples;
- /* evaluate surface shader */
- float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
- shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+ /* evaluate surface shader */
+ float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
+ shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
- /* TODO, disable the closures we won't need */
+ /* TODO, disable the closures we won't need */
#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched) {
- /* regular path tracer */
+ if(!kernel_data.integrator.branched) {
+ /* regular path tracer */
#endif
- /* sample ambient occlusion */
- if(is_combined || is_ao) {
- kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
- }
+ /* sample ambient occlusion */
+ if(is_combined || is_ao) {
+ kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ }
#ifdef __SUBSURFACE__
- /* sample subsurface scattering */
- if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
- is_sss_sample = true;
- }
+ /* sample subsurface scattering */
+ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+ is_sss_sample = true;
+ }
#endif
- /* sample light and BSDF */
- if((!is_sss_sample) && (!is_ao)) {
+ /* sample light and BSDF */
+ if((!is_sss_sample) && (!is_ao)) {
- if(sd->flag & SD_EMISSION) {
- float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
- }
+ if(sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
+ }
- if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
+ if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__
- state.ray_t = 0.0f;
+ state.ray_t = 0.0f;
#endif
- /* compute indirect light */
- kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample);
+ /* compute indirect light */
+ kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
- /* sum and reset indirect light pass variables for the next samples */
- path_radiance_sum_indirect(&L_sample);
- path_radiance_reset_indirect(&L_sample);
- }
+ /* sum and reset indirect light pass variables for the next samples */
+ path_radiance_sum_indirect(&L_sample);
+ path_radiance_reset_indirect(&L_sample);
}
-#ifdef __BRANCHED_PATH__
}
- else {
- /* branched path tracer */
+#ifdef __BRANCHED_PATH__
+ }
+ else {
+ /* branched path tracer */
- /* sample ambient occlusion */
- if(is_combined || is_ao) {
- kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
- }
+ /* sample ambient occlusion */
+ if(is_combined || is_ao) {
+ kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ }
#ifdef __SUBSURFACE__
- /* sample subsurface scattering */
- if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput);
- }
+ /* sample subsurface scattering */
+ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput);
+ }
#endif
- /* sample light and BSDF */
- if((!is_sss_sample) && (!is_ao)) {
-
- if(sd->flag & SD_EMISSION) {
- float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
- }
+ /* sample light and BSDF */
+ if((!is_sss_sample) && (!is_ao)) {
- kernel_branched_path_integrate_lighting(kg, &rng,
- sd, throughput, 1.0f, &state, &L_sample);
+ if(sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
}
+
+ kernel_branched_path_integrate_lighting(kg, &rng,
+ sd, throughput, 1.0f, &state, &L_sample);
}
+ }
#endif
- /* accumulate into master L */
- path_radiance_accum_sample(L, &L_sample, samples);
+ /* accumulate into master L */
+ path_radiance_accum_sample(L, &L_sample, 1);
+}
+
+ccl_device bool is_aa_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_UV:
+ case SHADER_EVAL_NORMAL:
+ return false;
+ default:
+ return true;
}
}
@@ -142,7 +148,20 @@ ccl_device bool is_light_pass(ShaderEvalType type)
}
}
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+#if 0
+ccl_device_inline float bake_clamp_mirror_repeat(float u)
+{
+ /* use mirror repeat (like opengl texture) so that if the barycentric
+ * coordinate goes past the end of the triangle it is not always clamped
+ * to the same value, gives ugly patterns */
+ float fu = floorf(u);
+ u = u - fu;
+
+ return (((int)fu) & 1)? 1.0f - u: u;
+}
+#endif
+
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
{
ShaderData sd;
uint4 in = input[i * 2];
@@ -164,6 +183,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
float dvdx = __uint_as_float(diff.z);
float dvdy = __uint_as_float(diff.w);
+ int num_samples = kernel_data.integrator.aa_samples;
+
+ /* random number generator */
+ RNG rng = cmj_hash(i, 0);
+
+#if 0
+ uint rng_state = cmj_hash(i, 0);
+ float filter_x, filter_y;
+ path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y);
+
+ /* subpixel u/v offset */
+ if(sample > 0) {
+ u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f));
+ v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f));
+ }
+#endif
+
+ /* triangle */
int shader;
float3 P, Ng;
@@ -190,12 +227,14 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
sd.dv.dx = dvdx;
sd.dv.dy = dvdy;
+ /* light passes */
if(is_light_pass(type)) {
- RNG rng = cmj_hash(i, 0);
- compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED),
- (type == SHADER_EVAL_AO),
- (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
- type == SHADER_EVAL_SUBSURFACE_INDIRECT));
+ compute_light_pass(kg, &sd, &L, rng,
+ (type == SHADER_EVAL_COMBINED),
+ (type == SHADER_EVAL_AO),
+ (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
+ type == SHADER_EVAL_SUBSURFACE_INDIRECT),
+ sample);
}
switch (type) {
@@ -350,11 +389,15 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
}
/* write output */
- output[i] = make_float4(out.x, out.y, out.z, 1.0f);
- return;
+ float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+
+ if(sample == 0)
+ output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+ else
+ output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
}
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
{
ShaderData sd;
uint4 in = input[i];
@@ -401,7 +444,10 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu
}
/* write output */
- output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ if(sample == 0)
+ output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ else
+ output[i] += make_float4(out.x, out.y, out.z, 0.0f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 3b5faea2994..67bd0943b1b 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -64,12 +64,12 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 3b18b164ffd..40d621b66f6 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -66,12 +66,12 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index a3731d790f4..4b48d10b020 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -67,12 +67,12 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
/* Shader Evaluate */
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index aa317ab672f..c68f6e1f08e 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -15,6 +15,7 @@
*/
#include "bake.h"
+#include "integrator.h"
CCL_NAMESPACE_BEGIN
@@ -152,6 +153,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
task.shader_eval_type = shader_type;
task.shader_x = 0;
task.shader_w = d_output.size();
+ task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
device->task_add(task);
@@ -203,4 +205,35 @@ void BakeManager::device_free(Device *device, DeviceScene *dscene)
{
}
+bool BakeManager::is_aa_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_UV:
+ case SHADER_EVAL_NORMAL:
+ return false;
+ default:
+ return true;
+ }
+}
+
+bool BakeManager::is_light_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_AO:
+ case SHADER_EVAL_COMBINED:
+ case SHADER_EVAL_SHADOW:
+ case SHADER_EVAL_DIFFUSE_DIRECT:
+ case SHADER_EVAL_GLOSSY_DIRECT:
+ case SHADER_EVAL_TRANSMISSION_DIRECT:
+ case SHADER_EVAL_SUBSURFACE_DIRECT:
+ case SHADER_EVAL_DIFFUSE_INDIRECT:
+ case SHADER_EVAL_GLOSSY_INDIRECT:
+ case SHADER_EVAL_TRANSMISSION_INDIRECT:
+ case SHADER_EVAL_SUBSURFACE_INDIRECT:
+ return true;
+ default:
+ return false;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index ea403f7d39a..f91ba589b8b 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -17,10 +17,11 @@
#ifndef __BAKE_H__
#define __BAKE_H__
-#include "util_vector.h"
#include "device.h"
#include "scene.h"
-#include "session.h"
+
+#include "util_progress.h"
+#include "util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -64,6 +65,9 @@ public:
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
void device_free(Device *device, DeviceScene *dscene);
+ static bool is_light_pass(ShaderEvalType type);
+ static bool is_aa_pass(ShaderEvalType type);
+
bool need_update;
private:
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 1325627ef05..9a0a7ead696 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -66,6 +66,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
main_task.shader_eval_type = SHADER_EVAL_BACKGROUND;
main_task.shader_x = 0;
main_task.shader_w = width*height;
+ main_task.num_samples = 1;
main_task.get_cancel = function_bind(&Progress::get_cancel, &progress);
/* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index 661fd9c66c1..4c0ee76299c 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -119,6 +119,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
task.shader_eval_type = SHADER_EVAL_DISPLACE;
task.shader_x = 0;
task.shader_w = d_output.size();
+ task.num_samples = 1;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
device->task_add(task);