diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-06-06 16:40:09 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-06-06 17:39:04 +0400 |
commit | e4e58d46128dc7fe4fb9b881d73b38173f00f5c3 (patch) | |
tree | cc38ac39838bec84d28de396374ba022139a8aa2 | |
parent | 553264ff8e20484d0b91bb468f56aa1b7144f7aa (diff) |
Fix T40370: cycles CUDA baking timeout with high number of AA samples.
Now baking does one AA sample at a time, just like final render. There is
also some code for shader antialiasing that solves T40369 but it is disabled
for now because there may be unpredictable side effects.
-rw-r--r-- | intern/cycles/blender/blender_session.cpp | 22 | ||||
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 15 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 51 | ||||
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 21 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cl | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cu | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.h | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_avx.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 202 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse2.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse3.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse41.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/render/bake.cpp | 33 | ||||
-rw-r--r-- | intern/cycles/render/bake.h | 8 | ||||
-rw-r--r-- | intern/cycles/render/light.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/render/mesh_displace.cpp | 1 |
17 files changed, 245 insertions, 165 deletions
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 13d4041f6a7..0f31e55d60d 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -492,26 +492,6 @@ static void populate_bake_data(BakeData *data, BL::BakePixel pixel_array, const } } -static bool is_light_pass(ShaderEvalType type) -{ - switch (type) { - case SHADER_EVAL_AO: - case SHADER_EVAL_COMBINED: - case SHADER_EVAL_SHADOW: - case SHADER_EVAL_DIFFUSE_DIRECT: - case SHADER_EVAL_GLOSSY_DIRECT: - case SHADER_EVAL_TRANSMISSION_DIRECT: - case SHADER_EVAL_SUBSURFACE_DIRECT: - case SHADER_EVAL_DIFFUSE_INDIRECT: - case SHADER_EVAL_GLOSSY_INDIRECT: - case SHADER_EVAL_TRANSMISSION_INDIRECT: - case SHADER_EVAL_SUBSURFACE_INDIRECT: - return true; - default: - return false; - } -} - void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float result[]) { ShaderEvalType shader_type = get_shader_type(pass_type); @@ -529,7 +509,7 @@ void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::Bake Pass::add(PASS_UV, scene->film->passes); } - if(is_light_pass(shader_type)) { + if(BakeManager::is_light_pass(shader_type)) { /* force use_light_pass to be true */ Pass::add(PASS_LIGHT, scene->film->passes); } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index b0739dd20b4..71bf2d23d6e 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -393,7 +393,8 @@ public: #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { - kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + for(int sample = 0; sample < task.num_samples; sample++) + kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -404,7 +405,8 @@ public: #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { - kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + for(int sample = 0; sample < task.num_samples; sample++) + kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -415,7 +417,8 @@ public: #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { - kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + for(int sample = 0; sample < task.num_samples; sample++) + kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -426,7 +429,8 @@ public: #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { - kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + for(int sample = 0; sample < task.num_samples; sample++) + kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -436,7 +440,8 @@ public: #endif { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { - kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + for(int sample = 0; sample < task.num_samples; sample++) + kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); if(task.get_cancel() || task_pool.canceled()) break; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0429bfc6e97..0aa09ac5383 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -764,40 +764,45 @@ public: int shader_w = min(shader_chunk_size, end - shader_x); - /* pass in parameters */ - int offset = 0; + for(int sample = 0; sample < task.num_samples; sample++) { + /* pass in parameters */ + int offset = 0; - cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); - offset += sizeof(d_input); + cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); + offset += sizeof(d_input); - cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); - offset += sizeof(d_output); + cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); + offset += sizeof(d_output); - int shader_eval_type = task.shader_eval_type; - offset = align_up(offset, __alignof(shader_eval_type)); + int shader_eval_type = task.shader_eval_type; + offset = align_up(offset, __alignof(shader_eval_type)); - cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); - offset += sizeof(task.shader_eval_type); + cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); + offset += sizeof(task.shader_eval_type); - cuda_assert(cuParamSeti(cuShader, offset, shader_x)); - offset += sizeof(shader_x); + cuda_assert(cuParamSeti(cuShader, offset, shader_x)); + offset += sizeof(shader_x); - cuda_assert(cuParamSeti(cuShader, offset, shader_w)); - offset += sizeof(shader_w); + cuda_assert(cuParamSeti(cuShader, offset, shader_w)); + offset += sizeof(shader_w); - cuda_assert(cuParamSetSize(cuShader, offset)); + cuda_assert(cuParamSeti(cuShader, offset, sample)); + offset += sizeof(sample); - /* launch kernel */ - int threads_per_block; - cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); + cuda_assert(cuParamSetSize(cuShader, offset)); - int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; + /* launch kernel */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); - cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); - cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); + int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; - cuda_assert(cuCtxSynchronize()); + cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); + cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); + + cuda_assert(cuCtxSynchronize()); + } } cuda_pop_context(); diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index f841daba124..abfe445414a 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -1067,19 +1067,24 @@ public: else kernel = ckShaderKernel; - opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data)); - opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input)); - opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output)); + for(int sample = 0; sample < task.num_samples; sample++) { + cl_int d_sample = task.sample; + + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output)); #define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(kernel, &narg, #name); + set_kernel_arg_mem(kernel, &narg, #name); #include "kernel_textures.h" - opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); - opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); - opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample)); - enqueue_kernel(kernel, task.shader_w, 1); + enqueue_kernel(kernel, task.shader_w, 1); + } } void thread_run(DeviceTask *task) diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index d7d3438036e..2e0a49435a8 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw) + int type, int sx, int sw, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -128,7 +128,7 @@ __kernel void kernel_ocl_shader( int x = sx + get_global_id(0); if(x < sx + sw) - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); } __kernel void kernel_ocl_bake( @@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw) + int type, int sx, int sw, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake( int x = sx + get_global_id(0); if(x < sx + sw) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); } diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index a535659b3b1..42eb9a62518 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -120,12 +120,12 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu /* Shader Evaluation */ -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i) +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 12273cc2853..83b1381a0f5 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -146,22 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw) +kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) - kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x); + kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); } extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) - kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x); + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); } #endif diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index c4a08646bab..b169b15b9b5 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i); + int type, int i, int sample); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, @@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i); + int type, int i, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 @@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i); + int type, int i, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 @@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i); + int type, int i, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX @@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i); + int type, int i, int sample); #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp index 7d354de16d2..f5e1b8a7bb7 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernel_avx.cpp @@ -67,12 +67,12 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float /* Shader Evaluate */ -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i) +void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index e31090fe9ba..e8845e03acb 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -17,108 +17,114 @@ CCL_NAMESPACE_BEGIN ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng, - const bool is_combined, const bool is_ao, const bool is_sss) + const bool is_combined, const bool is_ao, const bool is_sss, int sample) { - int samples = kernel_data.integrator.aa_samples; - /* initialize master radiance accumulator */ kernel_assert(kernel_data.film.use_light_pass); path_radiance_init(L, kernel_data.film.use_light_pass); - /* take multiple samples */ - for(int sample = 0; sample < samples; sample++) { - PathRadiance L_sample; - PathState state; - Ray ray; - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - bool is_sss_sample = is_sss; + PathRadiance L_sample; + PathState state; + Ray ray; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + bool is_sss_sample = is_sss; - /* init radiance */ - path_radiance_init(&L_sample, kernel_data.film.use_light_pass); + /* init radiance */ + path_radiance_init(&L_sample, kernel_data.film.use_light_pass); - /* init path state */ - path_state_init(kg, &state, &rng, sample); - state.num_samples = samples; + /* init path state */ + path_state_init(kg, &state, &rng, sample); + state.num_samples = kernel_data.integrator.aa_samples; - /* evaluate surface shader */ - float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF); - shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN); + /* evaluate surface shader */ + float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF); + shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN); - /* TODO, disable the closures we won't need */ + /* TODO, disable the closures we won't need */ #ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched) { - /* regular path tracer */ + if(!kernel_data.integrator.branched) { + /* regular path tracer */ #endif - /* sample ambient occlusion */ - if(is_combined || is_ao) { - kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput); - } + /* sample ambient occlusion */ + if(is_combined || is_ao) { + kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput); + } #ifdef __SUBSURFACE__ - /* sample subsurface scattering */ - if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) { - /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ - if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput)) - is_sss_sample = true; - } + /* sample subsurface scattering */ + if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) { + /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput)) + is_sss_sample = true; + } #endif - /* sample light and BSDF */ - if((!is_sss_sample) && (!is_ao)) { + /* sample light and BSDF */ + if((!is_sss_sample) && (!is_ao)) { - if(sd->flag & SD_EMISSION) { - float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce); - } + if(sd->flag & SD_EMISSION) { + float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); + path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce); + } - if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) { + if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) { #ifdef __LAMP_MIS__ - state.ray_t = 0.0f; + state.ray_t = 0.0f; #endif - /* compute indirect light */ - kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample); + /* compute indirect light */ + kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample); - /* sum and reset indirect light pass variables for the next samples */ - path_radiance_sum_indirect(&L_sample); - path_radiance_reset_indirect(&L_sample); - } + /* sum and reset indirect light pass variables for the next samples */ + path_radiance_sum_indirect(&L_sample); + path_radiance_reset_indirect(&L_sample); } -#ifdef __BRANCHED_PATH__ } - else { - /* branched path tracer */ +#ifdef __BRANCHED_PATH__ + } + else { + /* branched path tracer */ - /* sample ambient occlusion */ - if(is_combined || is_ao) { - kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput); - } + /* sample ambient occlusion */ + if(is_combined || is_ao) { + kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput); + } #ifdef __SUBSURFACE__ - /* sample subsurface scattering */ - if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) { - /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ - kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput); - } + /* sample subsurface scattering */ + if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) { + /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, throughput); + } #endif - /* sample light and BSDF */ - if((!is_sss_sample) && (!is_ao)) { - - if(sd->flag & SD_EMISSION) { - float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce); - } + /* sample light and BSDF */ + if((!is_sss_sample) && (!is_ao)) { - kernel_branched_path_integrate_lighting(kg, &rng, - sd, throughput, 1.0f, &state, &L_sample); + if(sd->flag & SD_EMISSION) { + float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); + path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce); } + + kernel_branched_path_integrate_lighting(kg, &rng, + sd, throughput, 1.0f, &state, &L_sample); } + } #endif - /* accumulate into master L */ - path_radiance_accum_sample(L, &L_sample, samples); + /* accumulate into master L */ + path_radiance_accum_sample(L, &L_sample, 1); +} + +ccl_device bool is_aa_pass(ShaderEvalType type) +{ + switch(type) { + case SHADER_EVAL_UV: + case SHADER_EVAL_NORMAL: + return false; + default: + return true; } } @@ -142,7 +148,20 @@ ccl_device bool is_light_pass(ShaderEvalType type) } } -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i) +#if 0 +ccl_device_inline float bake_clamp_mirror_repeat(float u) +{ + /* use mirror repeat (like opengl texture) so that if the barycentric + * coordinate goes past the end of the triangle it is not always clamped + * to the same value, gives ugly patterns */ + float fu = floorf(u); + u = u - fu; + + return (((int)fu) & 1)? 1.0f - u: u; +} +#endif + +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample) { ShaderData sd; uint4 in = input[i * 2]; @@ -164,6 +183,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, float dvdx = __uint_as_float(diff.z); float dvdy = __uint_as_float(diff.w); + int num_samples = kernel_data.integrator.aa_samples; + + /* random number generator */ + RNG rng = cmj_hash(i, 0); + +#if 0 + uint rng_state = cmj_hash(i, 0); + float filter_x, filter_y; + path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y); + + /* subpixel u/v offset */ + if(sample > 0) { + u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f)); + v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f)); + } +#endif + + /* triangle */ int shader; float3 P, Ng; @@ -190,12 +227,14 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, sd.dv.dx = dvdx; sd.dv.dy = dvdy; + /* light passes */ if(is_light_pass(type)) { - RNG rng = cmj_hash(i, 0); - compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED), - (type == SHADER_EVAL_AO), - (type == SHADER_EVAL_SUBSURFACE_DIRECT || - type == SHADER_EVAL_SUBSURFACE_INDIRECT)); + compute_light_pass(kg, &sd, &L, rng, + (type == SHADER_EVAL_COMBINED), + (type == SHADER_EVAL_AO), + (type == SHADER_EVAL_SUBSURFACE_DIRECT || + type == SHADER_EVAL_SUBSURFACE_INDIRECT), + sample); } switch (type) { @@ -350,11 +389,15 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, } /* write output */ - output[i] = make_float4(out.x, out.y, out.z, 1.0f); - return; + float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f; + + if(sample == 0) + output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac; + else + output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac; } -ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i) +ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample) { ShaderData sd; uint4 in = input[i]; @@ -401,7 +444,10 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu } /* write output */ - output[i] = make_float4(out.x, out.y, out.z, 0.0f); + if(sample == 0) + output[i] = make_float4(out.x, out.y, out.z, 0.0f); + else + output[i] += make_float4(out.x, out.y, out.z, 0.0f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 3b5faea2994..67bd0943b1b 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -64,12 +64,12 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i) +void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 3b18b164ffd..40d621b66f6 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -66,12 +66,12 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i) +void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index a3731d790f4..4b48d10b020 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -67,12 +67,12 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo /* Shader Evaluate */ -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i) +void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } CCL_NAMESPACE_END diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index aa317ab672f..c68f6e1f08e 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -15,6 +15,7 @@ */ #include "bake.h" +#include "integrator.h" CCL_NAMESPACE_BEGIN @@ -152,6 +153,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre task.shader_eval_type = shader_type; task.shader_x = 0; task.shader_w = d_output.size(); + task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1; task.get_cancel = function_bind(&Progress::get_cancel, &progress); device->task_add(task); @@ -203,4 +205,35 @@ void BakeManager::device_free(Device *device, DeviceScene *dscene) { } +bool BakeManager::is_aa_pass(ShaderEvalType type) +{ + switch(type) { + case SHADER_EVAL_UV: + case SHADER_EVAL_NORMAL: + return false; + default: + return true; + } +} + +bool BakeManager::is_light_pass(ShaderEvalType type) +{ + switch(type) { + case SHADER_EVAL_AO: + case SHADER_EVAL_COMBINED: + case SHADER_EVAL_SHADOW: + case SHADER_EVAL_DIFFUSE_DIRECT: + case SHADER_EVAL_GLOSSY_DIRECT: + case SHADER_EVAL_TRANSMISSION_DIRECT: + case SHADER_EVAL_SUBSURFACE_DIRECT: + case SHADER_EVAL_DIFFUSE_INDIRECT: + case SHADER_EVAL_GLOSSY_INDIRECT: + case SHADER_EVAL_TRANSMISSION_INDIRECT: + case SHADER_EVAL_SUBSURFACE_INDIRECT: + return true; + default: + return false; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index ea403f7d39a..f91ba589b8b 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -17,10 +17,11 @@ #ifndef __BAKE_H__ #define __BAKE_H__ -#include "util_vector.h" #include "device.h" #include "scene.h" -#include "session.h" + +#include "util_progress.h" +#include "util_vector.h" CCL_NAMESPACE_BEGIN @@ -64,6 +65,9 @@ public: void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); void device_free(Device *device, DeviceScene *dscene); + static bool is_light_pass(ShaderEvalType type); + static bool is_aa_pass(ShaderEvalType type); + bool need_update; private: diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 1325627ef05..9a0a7ead696 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -66,6 +66,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res main_task.shader_eval_type = SHADER_EVAL_BACKGROUND; main_task.shader_x = 0; main_task.shader_w = width*height; + main_task.num_samples = 1; main_task.get_cancel = function_bind(&Progress::get_cancel, &progress); /* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */ diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index 661fd9c66c1..4c0ee76299c 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -119,6 +119,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me task.shader_eval_type = SHADER_EVAL_DISPLACE; task.shader_x = 0; task.shader_w = d_output.size(); + task.num_samples = 1; task.get_cancel = function_bind(&Progress::get_cancel, &progress); device->task_add(task); |