Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-06 16:40:09 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-06 17:39:04 +0400
commite4e58d46128dc7fe4fb9b881d73b38173f00f5c3 (patch)
treecc38ac39838bec84d28de396374ba022139a8aa2 /intern/cycles/device
parent553264ff8e20484d0b91bb468f56aa1b7144f7aa (diff)
Fix T40370: cycles CUDA baking timeout with high number of AA samples.
Now baking does one AA sample at a time, just like final render. There is also some code for shader antialiasing that solves T40369 but it is disabled for now because there may be unpredictable side effects.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_cpu.cpp15
-rw-r--r--intern/cycles/device/device_cuda.cpp51
-rw-r--r--intern/cycles/device/device_opencl.cpp21
3 files changed, 51 insertions, 36 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index b0739dd20b4..71bf2d23d6e 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -393,7 +393,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -404,7 +405,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -415,7 +417,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -426,7 +429,8 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@@ -436,7 +440,8 @@ public:
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ for(int sample = 0; sample < task.num_samples; sample++)
+ kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
if(task.get_cancel() || task_pool.canceled())
break;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0429bfc6e97..0aa09ac5383 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -764,40 +764,45 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
- /* pass in parameters */
- int offset = 0;
+ for(int sample = 0; sample < task.num_samples; sample++) {
+ /* pass in parameters */
+ int offset = 0;
- cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
- offset += sizeof(d_input);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
+ offset += sizeof(d_input);
- cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
- offset += sizeof(d_output);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
+ offset += sizeof(d_output);
- int shader_eval_type = task.shader_eval_type;
- offset = align_up(offset, __alignof(shader_eval_type));
+ int shader_eval_type = task.shader_eval_type;
+ offset = align_up(offset, __alignof(shader_eval_type));
- cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
- offset += sizeof(task.shader_eval_type);
+ cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
+ offset += sizeof(task.shader_eval_type);
- cuda_assert(cuParamSeti(cuShader, offset, shader_x));
- offset += sizeof(shader_x);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_x));
+ offset += sizeof(shader_x);
- cuda_assert(cuParamSeti(cuShader, offset, shader_w));
- offset += sizeof(shader_w);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_w));
+ offset += sizeof(shader_w);
- cuda_assert(cuParamSetSize(cuShader, offset));
+ cuda_assert(cuParamSeti(cuShader, offset, sample));
+ offset += sizeof(sample);
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+ cuda_assert(cuParamSetSize(cuShader, offset));
- int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
- cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
- cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+ int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
- cuda_assert(cuCtxSynchronize());
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
+ cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+
+ cuda_assert(cuCtxSynchronize());
+ }
}
cuda_pop_context();
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index f841daba124..abfe445414a 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1067,19 +1067,24 @@ public:
else
kernel = ckShaderKernel;
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
+ for(int sample = 0; sample < task.num_samples; sample++) {
+ cl_int d_sample = task.sample;
+
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &narg, #name);
+ set_kernel_arg_mem(kernel, &narg, #name);
#include "kernel_textures.h"
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
- opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample));
- enqueue_kernel(kernel, task.shader_w, 1);
+ enqueue_kernel(kernel, task.shader_w, 1);
+ }
}
void thread_run(DeviceTask *task)