From a0cc7bd961ef2cd501468dce08998992a88b3bed Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sat, 6 Oct 2018 20:39:01 +0200 Subject: Cycles: Implement vectorized NLM kernels for faster CPU denoising --- intern/cycles/device/device_cpu.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'intern/cycles/device/device_cpu.cpp') diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 7c72ab1a009..4986bb809fb 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -184,11 +184,11 @@ public: KernelFunctions filter_detect_outliers_kernel; KernelFunctions filter_combine_halves_kernel; - KernelFunctions filter_nlm_calc_difference_kernel; - KernelFunctions filter_nlm_blur_kernel; - KernelFunctions filter_nlm_calc_weight_kernel; - KernelFunctions filter_nlm_update_output_kernel; - KernelFunctions filter_nlm_normalize_kernel; + KernelFunctions filter_nlm_calc_difference_kernel; + KernelFunctions filter_nlm_blur_kernel; + KernelFunctions filter_nlm_calc_weight_kernel; + KernelFunctions filter_nlm_update_output_kernel; + KernelFunctions filter_nlm_normalize_kernel; KernelFunctions filter_construct_transform_kernel; KernelFunctions filter_nlm_construct_gramian_kernel; @@ -475,6 +475,7 @@ public: float *blurDifference = temporary_mem; float *difference = temporary_mem + task->buffer.pass_stride; float *weightAccum = temporary_mem + 2*task->buffer.pass_stride; + float *temp_image = temporary_mem + 3*task->buffer.pass_stride; memset(weightAccum, 0, sizeof(float)*w*h); memset((float*) out_ptr, 0, sizeof(float)*w*h); @@ -499,6 +500,7 @@ public: filter_nlm_update_output_kernel()(dx, dy, blurDifference, (float*) image_ptr, + temp_image, (float*) out_ptr, weightAccum, local_rect, -- cgit v1.2.3