diff options
author | Lukas Stockner <lukas.stockner@freenet.de> | 2019-02-06 16:19:20 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2019-02-06 17:18:42 +0300 |
commit | fccf506ed7fd96f8a8f5edda7b99f564a386321a (patch) | |
tree | 80a4d10012b13e1601011e5cf6d4771d0e382775 /intern/cycles/kernel/kernels | |
parent | c183ac73dcfd20d0acf5ca07a2b062deadc4d73a (diff) |
Cycles: animation denoising support in the kernel.
This is the internal implementation, not available from the API or
interface yet. The algorithm takes into account past and future frames,
both to get more coherent animation and reduce noise.
Ref D3889.
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h | 19 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/filter.cu | 25 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/filter.cl | 20 |
4 files changed, 60 insertions, 13 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index 08333c7a455..02c85562db8 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -68,6 +68,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, int r); void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, + TileInfo *tiles, int x, int y, int storage_ofs, @@ -75,6 +76,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, int *rank, int* rect, int pass_stride, + int frame_stride, + bool use_time, int radius, float pca_threshold); @@ -87,6 +90,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, int* rect, int stride, int channel_offset, + int frame_offset, float a, float k_2); @@ -116,6 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, + int t, float *difference_image, float *buffer, float *transform, @@ -126,7 +131,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int *filter_window, int stride, int f, - int pass_stride); + int pass_stride, + int frame_offset, + bool use_time); void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, float *accum_image, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index b792367e3ab..c29505880cb 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -135,6 +135,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, } void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, + TileInfo *tile_info, int x, int y, int storage_ofs, @@ -142,6 +143,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, int *rank, int* prefilter_rect, int pass_stride, + int frame_stride, + bool use_time, int radius, float pca_threshold) { @@ -151,9 +154,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, rank += storage_ofs; transform += storage_ofs*TRANSFORM_SIZE; kernel_filter_construct_transform(buffer, + tile_info, x, y, load_int4(prefilter_rect), pass_stride, + frame_stride, + use_time, transform, rank, radius, @@ -170,6 +176,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, int *rect, int stride, int channel_offset, + int frame_offset, float a, float k_2) { @@ -184,6 +191,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, load_int4(rect), stride, channel_offset, + frame_offset, a, k_2); #endif } @@ -243,6 +251,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, + int t, float *difference_image, float *buffer, float *transform, @@ -253,12 +262,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int *filter_window, int stride, int f, - int pass_stride) + int pass_stride, + int frame_offset, + bool use_time) { #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, + kernel_filter_nlm_construct_gramian(dx, dy, t, difference_image, buffer, transform, rank, @@ -266,7 +277,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, load_int4(rect), load_int4(filter_window), stride, f, - pass_stride); + pass_stride, + frame_offset, + use_time); #endif } diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 3b51bb41aed..5b552b01413 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -29,7 +29,7 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_divide_shadow(int sample, - TileInfo *tile_info, + CCL_FILTER_TILE_INFO, float *unfilteredA, float *unfilteredB, float *sampleVariance, @@ -59,7 +59,7 @@ kernel_cuda_filter_divide_shadow(int sample, extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_get_feature(int sample, - TileInfo *tile_info, + CCL_FILTER_TILE_INFO, int m_offset, int v_offset, float *mean, @@ -138,10 +138,12 @@ kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_construct_transform(float const* __restrict__ buffer, + CCL_FILTER_TILE_INFO, float *transform, int *rank, int4 filter_area, int4 rect, int radius, float pca_threshold, - int pass_stride) + int pass_stride, int frame_stride, + bool use_time) { int x = blockDim.x*blockIdx.x + threadIdx.x; int y = blockDim.y*blockIdx.y + threadIdx.y; @@ -149,8 +151,11 @@ kernel_cuda_filter_construct_transform(float const* __restrict__ buffer, int *l_rank = rank + y*filter_area.z + x; float *l_transform = transform + y*filter_area.z + x; kernel_filter_construct_transform(buffer, + tile_info, x + filter_area.x, y + filter_area.y, - rect, pass_stride, + rect, + pass_stride, frame_stride, + use_time, l_transform, l_rank, radius, pca_threshold, filter_area.z*filter_area.w, @@ -170,6 +175,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, int pass_stride, int r, int channel_offset, + int frame_offset, float a, float k_2) { @@ -183,6 +189,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, difference_image + ofs, rect, stride, channel_offset, + frame_offset, a, k_2); } } @@ -274,7 +281,8 @@ kernel_cuda_filter_nlm_normalize(float *out_image, extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_image, +kernel_cuda_filter_nlm_construct_gramian(int t, + const float *ccl_restrict difference_image, const float *ccl_restrict buffer, float const* __restrict__ transform, int *rank, @@ -286,13 +294,16 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im int stride, int pass_stride, int r, - int f) + int f, + int frame_offset, + bool use_time) { int4 co, rect; int ofs; if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) { kernel_filter_nlm_construct_gramian(co.x, co.y, co.z, co.w, + t, difference_image + ofs, buffer, transform, rank, @@ -300,6 +311,8 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im rect, filter_window, stride, f, pass_stride, + frame_offset, + use_time, threadIdx.y*blockDim.x + threadIdx.x); } } diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 8a821ee281d..996bc27f71b 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -127,11 +127,14 @@ __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean, } __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer, + CCL_FILTER_TILE_INFO, ccl_global float *transform, ccl_global int *rank, int4 filter_area, int4 rect, int pass_stride, + int frame_stride, + char use_time, int radius, float pca_threshold) { @@ -141,8 +144,11 @@ __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_ ccl_global int *l_rank = rank + y*filter_area.z + x; ccl_global float *l_transform = transform + y*filter_area.z + x; kernel_filter_construct_transform(buffer, + CCL_FILTER_TILE_INFO_ARG, x + filter_area.x, y + filter_area.y, - rect, pass_stride, + rect, + pass_stride, frame_stride, + use_time, l_transform, l_rank, radius, pca_threshold, filter_area.z*filter_area.w, @@ -160,6 +166,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_ int pass_stride, int r, int channel_offset, + int frame_offset, float a, float k_2) { @@ -173,6 +180,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_ difference_image + ofs, rect, stride, channel_offset, + frame_offset, a, k_2); } } @@ -254,7 +262,8 @@ __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *out_image, } } -__kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *ccl_restrict difference_image, +__kernel void kernel_ocl_filter_nlm_construct_gramian(int t, + const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, const ccl_global float *ccl_restrict transform, ccl_global int *rank, @@ -266,13 +275,16 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc int stride, int pass_stride, int r, - int f) + int f, + int frame_offset, + char use_time) { int4 co, rect; int ofs; if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) { kernel_filter_nlm_construct_gramian(co.x, co.y, co.z, co.w, + t, difference_image + ofs, buffer, transform, rank, @@ -280,6 +292,8 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc rect, filter_window, stride, f, pass_stride, + frame_offset, + use_time, get_local_id(1)*get_local_size(0) + get_local_id(0)); } } |