diff options
author | Lukas Stockner <lukas.stockner@freenet.de> | 2019-02-06 14:42:10 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2019-02-06 17:18:29 +0300 |
commit | 405cacd4cd955552e1f7b50a176ddcdd9baf8d3b (patch) | |
tree | e54e2bf0c79bcc04d669088393b1d16df554bffd /intern/cycles/kernel/kernels | |
parent | 81159e99b819910b72cb3caba6b3cd4f35184ea9 (diff) |
Cycles: prefilter feature passes separate from denoising.
Prefiltering of feature passes will happen during rendering, which can
then be used for denoising immediately or written as a render pass for
later (animation) denoising.
The number of denoising data passes written is reduced because of this,
leaving out the feature variance passes. The passes are now Normal,
Albedo, Depth, Shadowing, Variance and Intensity.
Ref D3889.
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu.h | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h | 56 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/filter.cu | 36 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/filter.cl | 34 |
4 files changed, 129 insertions, 11 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index e036b53b810..08333c7a455 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -37,10 +37,20 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, int y, float *mean, float *variance, + float scale, int* prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset); +void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, + int x, + int y, + int *buffer_params, + float *from, + float *buffer, + int out_offset, + int* prefilter_rect); + void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, ccl_global float *image, ccl_global float *variance, @@ -71,7 +81,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, int dy, float *weight_image, - float *variance, + float *variance_image, + float *scale_image, float *difference_image, int* rect, int stride, @@ -99,6 +110,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, float *out_image, float *accum_image, int* rect, + int channel_offset, int stride, int f); diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 4c758711481..b792367e3ab 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -69,6 +69,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, int x, int y, float *mean, float *variance, + float scale, int* prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) @@ -80,12 +81,29 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, m_offset, v_offset, x, y, mean, variance, + scale, load_int4(prefilter_rect), buffer_pass_stride, buffer_denoising_offset); #endif } +void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, + int x, + int y, + int *buffer_params, + float *from, + float *buffer, + int out_offset, + int* prefilter_rect) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_write_feature); +#else + kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect)); +#endif +} + void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, ccl_global float *image, ccl_global float *variance, @@ -130,8 +148,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); #else - rank += storage_ofs; - transform += storage_ofs*TRANSFORM_SIZE; + rank += storage_ofs; + transform += storage_ofs*TRANSFORM_SIZE; kernel_filter_construct_transform(buffer, x, y, load_int4(prefilter_rect), @@ -146,7 +164,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, int dy, float *weight_image, - float *variance, + float *variance_image, + float *scale_image, float *difference_image, int *rect, int stride, @@ -157,7 +176,15 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); #else - kernel_filter_nlm_calc_difference(dx, dy, weight_image, variance, difference_image, load_int4(rect), stride, channel_offset, a, k_2); + kernel_filter_nlm_calc_difference(dx, dy, + weight_image, + variance_image, + scale_image, + difference_image, + load_int4(rect), + stride, + channel_offset, + a, k_2); #endif } @@ -195,13 +222,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, float *out_image, float *accum_image, int *rect, + int channel_offset, int stride, int f) { #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); #else - kernel_filter_nlm_update_output(dx, dy, difference_image, image, temp_image, out_image, accum_image, load_int4(rect), stride, f); + kernel_filter_nlm_update_output(dx, dy, + difference_image, + image, + temp_image, + out_image, + accum_image, + load_int4(rect), + channel_offset, + stride, f); #endif } @@ -222,7 +258,15 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_window), stride, f, pass_stride); + kernel_filter_nlm_construct_gramian(dx, dy, + difference_image, + buffer, + transform, rank, + XtWX, XtWY, + load_int4(rect), + load_int4(filter_window), + stride, f, + pass_stride); #endif } diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index b856cbde45c..3b51bb41aed 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -64,6 +64,7 @@ kernel_cuda_filter_get_feature(int sample, int v_offset, float *mean, float *variance, + float scale, int4 prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) @@ -76,6 +77,7 @@ kernel_cuda_filter_get_feature(int sample, m_offset, v_offset, x, y, mean, variance, + scale, prefilter_rect, buffer_pass_stride, buffer_denoising_offset); @@ -84,6 +86,30 @@ kernel_cuda_filter_get_feature(int sample, extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_filter_write_feature(int sample, + int4 buffer_params, + int4 filter_area, + float *from, + float *buffer, + int out_offset, + int4 prefilter_rect) +{ + int x = blockDim.x*blockIdx.x + threadIdx.x; + int y = blockDim.y*blockIdx.y + threadIdx.y; + if(x < filter_area.z && y < filter_area.w) { + kernel_filter_write_feature(sample, + x + filter_area.x, + y + filter_area.y, + buffer_params, + from, + buffer, + out_offset, + prefilter_rect); + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_detect_outliers(float *image, float *variance, float *depth, @@ -136,6 +162,7 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, const float *ccl_restrict variance_image, + const float *ccl_restrict scale_image, float *difference_image, int w, int h, @@ -152,9 +179,11 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w, weight_image, variance_image, + scale_image, difference_image + ofs, rect, stride, - channel_offset, a, k_2); + channel_offset, + a, k_2); } } @@ -210,6 +239,7 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image, int h, int stride, int pass_stride, + int channel_offset, int r, int f) { @@ -221,7 +251,9 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image, image, out_image, accum_image, - rect, stride, f); + rect, + channel_offset, + stride, f); } } diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index a550f97f4eb..8a821ee281d 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -56,6 +56,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample, int v_offset, ccl_global float *mean, ccl_global float *variance, + float scale, int4 prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) @@ -68,12 +69,35 @@ __kernel void kernel_ocl_filter_get_feature(int sample, m_offset, v_offset, x, y, mean, variance, + scale, prefilter_rect, buffer_pass_stride, buffer_denoising_offset); } } +__kernel void kernel_ocl_filter_write_feature(int sample, + int4 buffer_params, + int4 filter_area, + ccl_global float *from, + ccl_global float *buffer, + int out_offset, + int4 prefilter_rect) +{ + int x = get_global_id(0); + int y = get_global_id(1); + if(x < filter_area.z && y < filter_area.w) { + kernel_filter_write_feature(sample, + x + filter_area.x, + y + filter_area.y, + buffer_params, + from, + buffer, + out_offset, + prefilter_rect); + } +} + __kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -128,6 +152,7 @@ __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_restrict weight_image, const ccl_global float *ccl_restrict variance_image, + const ccl_global float *ccl_restrict scale_image, ccl_global float *difference_image, int w, int h, @@ -144,9 +169,11 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_ kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w, weight_image, variance_image, + scale_image, difference_image + ofs, rect, stride, - channel_offset, a, k_2); + channel_offset, + a, k_2); } } @@ -196,6 +223,7 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re int h, int stride, int pass_stride, + int channel_offset, int r, int f) { @@ -207,7 +235,9 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re image, out_image, accum_image, - rect, stride, f); + rect, + channel_offset, + stride, f); } } |