Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_cpu.cpp23
-rw-r--r--intern/cycles/device/device_cuda.cpp21
-rw-r--r--intern/cycles/device/device_denoising.cpp59
-rw-r--r--intern/cycles/device/device_denoising.h7
-rw-r--r--intern/cycles/device/device_task.h2
-rw-r--r--intern/cycles/device/opencl/opencl.h1
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp31
7 files changed, 117 insertions, 27 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 6668acc9cbe..93c63b92a55 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -186,15 +186,15 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, int, float, float)> filter_nlm_calc_difference_kernel;
KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, int, int, int)> filter_nlm_update_output_kernel;
KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
- KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
- KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
+ KernelFunctions<void(*)(float*, TileInfo*, int, int, int, float*, int*, int*, int, int, bool, int, float)> filter_construct_transform_kernel;
+ KernelFunctions<void(*)(int, int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int, bool)> filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
int, int, int, int, int, int, int, int, ccl_global int*, int,
@@ -512,7 +512,7 @@ public:
difference,
local_rect,
w, channel_offset,
- a, k_2);
+ 0, a, k_2);
filter_nlm_blur_kernel() (difference, blurDifference, local_rect, w, f);
filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
@@ -542,6 +542,7 @@ public:
for(int y = 0; y < task->filter_area.w; y++) {
for(int x = 0; x < task->filter_area.z; x++) {
filter_construct_transform_kernel()((float*) task->buffer.mem.device_pointer,
+ task->tile_info,
x + task->filter_area.x,
y + task->filter_area.y,
y*task->filter_area.z + x,
@@ -549,6 +550,8 @@ public:
(int*) task->storage.rank.device_pointer,
&task->rect.x,
task->buffer.pass_stride,
+ task->buffer.frame_stride,
+ task->buffer.use_time,
task->radius,
task->pca_threshold);
}
@@ -559,6 +562,7 @@ public:
bool denoising_accumulate(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
+ int frame,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT);
@@ -568,6 +572,7 @@ public:
float *blurDifference = temporary_mem + task->buffer.pass_stride;
int r = task->radius;
+ int frame_offset = frame * task->buffer.frame_stride;
for(int i = 0; i < (2*r+1)*(2*r+1); i++) {
int dy = i / (2*r+1) - r;
int dx = i % (2*r+1) - r;
@@ -583,12 +588,14 @@ public:
local_rect,
task->buffer.stride,
task->buffer.pass_stride,
+ frame_offset,
1.0f,
task->nlm_k_2);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, task->buffer.stride, 4);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
filter_nlm_construct_gramian_kernel()(dx, dy,
+ task->tile_info->frames[frame],
blurDifference,
(float*) task->buffer.mem.device_pointer,
(float*) task->storage.transform.device_pointer,
@@ -599,7 +606,9 @@ public:
&task->reconstruction_state.filter_window.x,
task->buffer.stride,
4,
- task->buffer.pass_stride);
+ task->buffer.pass_stride,
+ frame_offset,
+ task->buffer.use_time);
}
return true;
@@ -787,7 +796,7 @@ public:
tile.sample = tile.start_sample + tile.num_samples;
denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
- denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, &denoising);
+ denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising);
denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index cb7d8bbb224..e21d974ebbe 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1301,6 +1301,7 @@ public:
int pass_stride = task->buffer.pass_stride;
int num_shifts = (2*r+1)*(2*r+1);
int channel_offset = task->nlm_state.is_color? task->buffer.pass_stride : 0;
+ int frame_offset = 0;
if(have_error())
return false;
@@ -1327,7 +1328,7 @@ public:
CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w*h, num_shifts);
- void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &a, &k_2};
+ void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &frame_offset, &a, &k_2};
void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
void *update_output_args[] = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, &channel_offset, &r, &f};
@@ -1367,13 +1368,16 @@ public:
task->storage.h);
void *args[] = {&task->buffer.mem.device_pointer,
+ &task->tile_info_mem.device_pointer,
&task->storage.transform.device_pointer,
&task->storage.rank.device_pointer,
&task->filter_area,
&task->rect,
&task->radius,
&task->pca_threshold,
- &task->buffer.pass_stride};
+ &task->buffer.pass_stride,
+ &task->buffer.frame_stride,
+ &task->buffer.use_time};
CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
cuda_assert(cuCtxSynchronize());
@@ -1383,6 +1387,7 @@ public:
bool denoising_accumulate(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
+ int frame,
DenoisingTask *task)
{
if(have_error())
@@ -1398,6 +1403,8 @@ public:
int w = task->reconstruction_state.source_w;
int h = task->reconstruction_state.source_h;
int stride = task->buffer.stride;
+ int frame_offset = frame * task->buffer.frame_stride;
+ int t = task->tile_info->frames[frame];
int pass_stride = task->buffer.pass_stride;
int num_shifts = (2*r+1)*(2*r+1);
@@ -1430,10 +1437,12 @@ public:
&w, &h,
&stride, &pass_stride,
&r, &pass_stride,
+ &frame_offset,
&a, &k_2};
void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *construct_gramian_args[] = {&blurDifference,
+ void *construct_gramian_args[] = {&t,
+ &blurDifference,
&task->buffer.mem.device_pointer,
&task->storage.transform.device_pointer,
&task->storage.rank.device_pointer,
@@ -1442,7 +1451,9 @@ public:
&task->reconstruction_state.filter_window,
&w, &h, &stride,
&pass_stride, &r,
- &f};
+ &f,
+ &frame_offset,
+ &task->buffer.use_time};
CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
@@ -1635,7 +1646,7 @@ public:
void denoise(RenderTile &rtile, DenoisingTask& denoising)
{
denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
- denoising.functions.accumulate = function_bind(&CUDADevice::denoising_accumulate, this, _1, _2, _3, &denoising);
+ denoising.functions.accumulate = function_bind(&CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 724171c3acb..61e0ba47ab8 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -36,6 +36,7 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising_feature_strength));
}
+ render_buffer.frame_stride = task.frame_stride;
render_buffer.pass_stride = task.pass_stride;
render_buffer.offset = task.pass_denoising_data;
@@ -49,6 +50,12 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int));
tile_info->from_render = task.denoising_from_render? 1 : 0;
+ tile_info->frames[0] = 0;
+ tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
+ for(int i = 1; i < tile_info->num_frames; i++) {
+ tile_info->frames[i] = task.denoising_frames[i-1];
+ }
+
write_passes = task.denoising_write_passes;
do_filter = task.denoising_do_filter;
}
@@ -101,16 +108,18 @@ void DenoisingTask::setup_denoising_buffer()
rect = rect_expand(rect, radius);
rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
- buffer.use_intensity = write_passes;
+ buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y;
int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
+ buffer.frame_stride = buffer.pass_stride * buffer.passes;
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
- int mem_size = align_up(buffer.pass_stride * buffer.passes + 4, alignment_floats);
+ int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats);
buffer.mem.alloc_to_device(mem_size, false);
+ buffer.use_time = (tile_info->num_frames > 1);
/* CPUs process shifts sequentially while GPUs process them in parallel. */
int num_layers;
@@ -216,6 +225,25 @@ void DenoisingTask::prefilter_color()
}
}
+void DenoisingTask::load_buffer()
+{
+ device_ptr null_ptr = (device_ptr) 0;
+
+ int original_offset = render_buffer.offset;
+
+ int num_passes = buffer.use_intensity? 15 : 14;
+ for(int i = 0; i < tile_info->num_frames; i++) {
+ for(int pass = 0; pass < num_passes; pass++) {
+ device_sub_ptr to_pass(buffer.mem, i*buffer.frame_stride + pass*buffer.pass_stride, buffer.pass_stride);
+ bool is_variance = (pass >= 11) && (pass <= 13);
+ functions.get_feature(pass, -1, *to_pass, null_ptr, is_variance? (1.0f / render_buffer.samples) : 1.0f);
+ }
+ render_buffer.offset += render_buffer.frame_stride;
+ }
+
+ render_buffer.offset = original_offset;
+}
+
void DenoisingTask::write_buffer()
{
reconstruction_state.buffer_params = make_int4(target_buffer.offset,
@@ -259,11 +287,17 @@ void DenoisingTask::reconstruct()
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
-
- device_ptr scale_ptr = 0;
- device_sub_ptr *scale_sub_ptr = NULL;
- functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr);
- delete scale_sub_ptr;
+ for(int f = 0; f < tile_info->num_frames; f++) {
+ device_ptr scale_ptr = 0;
+ device_sub_ptr *scale_sub_ptr = NULL;
+ if(tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
+ scale_sub_ptr = new device_sub_ptr(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride);
+ scale_ptr = **scale_sub_ptr;
+ }
+
+ functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f);
+ delete scale_sub_ptr;
+ }
functions.solve(target_buffer.ptr);
}
@@ -276,9 +310,14 @@ void DenoisingTask::run_denoising(RenderTile *tile)
setup_denoising_buffer();
- prefilter_shadowing();
- prefilter_features();
- prefilter_color();
+ if(tile_info->from_render) {
+ prefilter_shadowing();
+ prefilter_features();
+ prefilter_color();
+ }
+ else {
+ load_buffer();
+ }
if(do_filter) {
construct_transform();
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index cddcd3bd0c9..5869aa05390 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -38,6 +38,7 @@ public:
struct RenderBuffers {
int offset;
int pass_stride;
+ int frame_stride;
int samples;
} render_buffer;
@@ -70,7 +71,8 @@ public:
)> non_local_means;
function<bool(device_ptr color_ptr,
device_ptr color_variance_ptr,
- device_ptr scale_ptr
+ device_ptr scale_ptr,
+ int frame
)> accumulate;
function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform;
@@ -156,8 +158,10 @@ public:
int stride;
int h;
int width;
+ int frame_stride;
device_only_memory<float> mem;
device_only_memory<float> temporary_mem;
+ bool use_time;
bool use_intensity;
bool gpu_temporary_mem;
@@ -179,6 +183,7 @@ protected:
void construct_transform();
void reconstruct();
+ void load_buffer();
void write_buffer();
};
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 97bcde99af6..2871bc5761a 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -73,11 +73,13 @@ public:
float denoising_feature_strength;
bool denoising_relative_pca;
bool denoising_from_render;
+ vector<int> denoising_frames;
bool denoising_do_filter;
bool denoising_write_passes;
int pass_stride;
+ int frame_stride;
int target_pass_stride;
int pass_denoising_data;
int pass_denoising_clean;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 4d42ddc0c53..9b763167459 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -422,6 +422,7 @@ protected:
bool denoising_accumulate(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
+ int frame,
DenoisingTask *task);
bool denoising_solve(device_ptr output_ptr,
DenoisingTask *task);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index a0a1cf68c32..4417065bb7f 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -821,16 +821,31 @@ bool OpenCLDeviceBase::denoising_construct_transform(DenoisingTask *task)
cl_mem buffer_mem = CL_MEM_PTR(task->buffer.mem.device_pointer);
cl_mem transform_mem = CL_MEM_PTR(task->storage.transform.device_pointer);
cl_mem rank_mem = CL_MEM_PTR(task->storage.rank.device_pointer);
+ cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer);
+
+ char use_time = task->buffer.use_time? 1 : 0;
cl_kernel ckFilterConstructTransform = denoising_program(ustring("filter_construct_transform"));
- kernel_set_args(ckFilterConstructTransform, 0,
- buffer_mem,
+ int arg_ofs = kernel_set_args(ckFilterConstructTransform, 0,
+ buffer_mem,
+ tile_info_mem);
+ cl_mem buffers[9];
+ for(int i = 0; i < 9; i++) {
+ buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
+ arg_ofs += kernel_set_args(ckFilterConstructTransform,
+ arg_ofs,
+ buffers[i]);
+ }
+ kernel_set_args(ckFilterConstructTransform,
+ arg_ofs,
transform_mem,
rank_mem,
task->filter_area,
task->rect,
task->buffer.pass_stride,
+ task->buffer.frame_stride,
+ use_time,
task->radius,
task->pca_threshold);
@@ -845,6 +860,7 @@ bool OpenCLDeviceBase::denoising_construct_transform(DenoisingTask *task)
bool OpenCLDeviceBase::denoising_accumulate(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
+ int frame,
DenoisingTask *task)
{
cl_mem color_mem = CL_MEM_PTR(color_ptr);
@@ -865,6 +881,9 @@ bool OpenCLDeviceBase::denoising_accumulate(device_ptr color_ptr,
int w = task->reconstruction_state.source_w;
int h = task->reconstruction_state.source_h;
int stride = task->buffer.stride;
+ int frame_offset = frame * task->buffer.frame_stride;
+ int t = task->tile_info->frames[frame];
+ char use_time = task->buffer.use_time? 1 : 0;
int r = task->radius;
int pass_stride = task->buffer.pass_stride;
@@ -884,6 +903,7 @@ bool OpenCLDeviceBase::denoising_accumulate(device_ptr color_ptr,
pass_stride,
r,
pass_stride,
+ frame_offset,
1.0f, task->nlm_k_2);
kernel_set_args(ckNLMBlur, 0,
difference_mem,
@@ -898,6 +918,7 @@ bool OpenCLDeviceBase::denoising_accumulate(device_ptr color_ptr,
pass_stride,
r, 4);
kernel_set_args(ckNLMConstructGramian, 0,
+ t,
blurDifference_mem,
buffer_mem,
transform_mem,
@@ -907,7 +928,9 @@ bool OpenCLDeviceBase::denoising_accumulate(device_ptr color_ptr,
task->reconstruction_state.filter_window,
w, h, stride,
pass_stride,
- r, 4);
+ r, 4,
+ frame_offset,
+ use_time);
enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true);
enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
@@ -1108,7 +1131,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising)
{
denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
- denoising.functions.accumulate = function_bind(&OpenCLDeviceBase::denoising_accumulate, this, _1, _2, _3, &denoising);
+ denoising.functions.accumulate = function_bind(&OpenCLDeviceBase::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.solve = function_bind(&OpenCLDeviceBase::denoising_solve, this, _1, &denoising);
denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);