diff options
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 16 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_features.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_nlm_cpu.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_nlm_gpu.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_prefilter.h | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_reconstruction.h | 19 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/filter.cu | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/filter.cl | 3 |
15 files changed, 36 insertions, 80 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c2f74aa8903..29bb1f91a40 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -185,9 +185,9 @@ public: KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel; KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel; - KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel; - KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int)> filter_nlm_construct_gramian_kernel; - KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel; + KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel; + KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int)> filter_nlm_construct_gramian_kernel; + KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel; KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*, ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int, @@ -465,8 +465,6 @@ public: bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -485,8 +483,8 @@ public: task->reconstruction_state.source_w - max(0, dx), task->reconstruction_state.source_h - max(0, dy)}; filter_nlm_calc_difference_kernel()(dx, dy, - (float*) guide_ptr, - (float*) guide_variance_ptr, + (float*) color_ptr, + (float*) color_variance_ptr, difference, local_rect, task->buffer.w, @@ -499,8 +497,6 @@ public: filter_nlm_construct_gramian_kernel()(dx, dy, blurDifference, (float*) task->buffer.mem.device_pointer, - (float*) color_ptr, - (float*) color_variance_ptr, (float*) task->storage.transform.device_pointer, (int*) task->storage.rank.device_pointer, (float*) task->storage.XtWX.device_pointer, @@ -648,7 +644,7 @@ public: DenoisingTask denoising(this); denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 99537e9a983..29fa08d94b1 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1051,8 +1051,6 @@ public: bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -1096,8 +1094,8 @@ public: task->reconstruction_state.source_h - max(0, dy)}; void *calc_difference_args[] = {&dx, &dy, - &guide_ptr, - &guide_variance_ptr, + &color_ptr, + &color_variance_ptr, &difference, &local_rect, &task->buffer.w, @@ -1126,8 +1124,6 @@ public: void *construct_gramian_args[] = {&dx, &dy, &blurDifference, &task->buffer.mem.device_pointer, - &color_ptr, - &color_variance_ptr, &task->storage.transform.device_pointer, &task->storage.rank.device_pointer, &task->storage.XtWX.device_pointer, @@ -1294,7 +1290,7 @@ public: DenoisingTask denoising(this); denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 613bd9112cf..619cc1d171e 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -215,7 +215,7 @@ bool DenoisingTask::run_denoising() { device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); - functions.reconstruct(*color_ptr, *color_var_ptr, *color_ptr, *color_var_ptr, render_buffer.ptr); + functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } device->mem_free(storage.XtWX); diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 25b93c2ad74..def7b72f67d 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -58,8 +58,6 @@ public: )> non_local_means; function<bool(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr )> reconstruct; function<bool()> construct_transform; diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 27e196d1e68..52851061d7b 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -390,8 +390,6 @@ protected: bool denoising_construct_transform(DenoisingTask *task); bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task); bool denoising_combine_halves(device_ptr a_ptr, diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 24b70e3446c..e4ab979dcbf 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -693,8 +693,6 @@ bool OpenCLDeviceBase::denoising_construct_transform(DenoisingTask *task) bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -703,8 +701,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, cl_mem color_mem = CL_MEM_PTR(color_ptr); cl_mem color_variance_mem = CL_MEM_PTR(color_variance_ptr); - cl_mem guide_mem = CL_MEM_PTR(guide_ptr); - cl_mem guide_variance_mem = CL_MEM_PTR(guide_variance_ptr); cl_mem output_mem = CL_MEM_PTR(output_ptr); cl_mem buffer_mem = CL_MEM_PTR(task->buffer.mem.device_pointer); @@ -735,8 +731,8 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, kernel_set_args(ckNLMCalcDifference, 0, dx, dy, - guide_mem, - guide_variance_mem, + color_mem, + color_variance_mem, difference, local_rect, task->buffer.w, @@ -775,8 +771,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, dx, dy, blurDifference, buffer_mem, - color_mem, - color_variance_mem, transform_mem, rank_mem, XtWX_mem, @@ -961,7 +955,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task) denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising); denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h index 53d703de143..6226ed2c2ef 100644 --- a/intern/cycles/kernel/filter/filter_features.h +++ b/intern/cycles/kernel/filter/filter_features.h @@ -78,16 +78,10 @@ ccl_device_inline void filter_calculate_scale(float *scale) scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f); } -ccl_device_inline float3 filter_get_pixel_color(const ccl_global float *ccl_restrict buffer, - int pass_stride) +ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer, + int pass_stride) { - return make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2)); -} - -ccl_device_inline float filter_get_pixel_variance(const ccl_global float *ccl_restrict buffer, - int pass_stride) -{ - return average(make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2))); + return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10)); } ccl_device_inline void design_row_add(float *design_row, diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h index 5cb4038bc33..88afc00ccb3 100644 --- a/intern/cycles/kernel/filter/filter_nlm_cpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h @@ -133,8 +133,6 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -167,7 +165,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, dx, dy, w, h, pass_stride, buffer, - color_pass, variance_pass, l_transform, l_rank, weight, l_XtWX, l_XtWY, 0); } diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h index 078c5f56763..62bd5be1de5 100644 --- a/intern/cycles/kernel/filter/filter_nlm_gpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h @@ -97,8 +97,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy, int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -130,7 +128,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy, dx, dy, w, h, pass_stride, buffer, - color_pass, variance_pass, transform, rank, weight, XtWX, XtWY, localIdx); diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index 82cc36625ec..d5ae1b73927 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -142,13 +142,22 @@ ccl_device void kernel_filter_detect_outliers(int x, int y, float ref = 2.0f*values[(int)(n*0.75f)]; float fac = 1.0f; if(L > ref) { - /* If the pixel is an outlier, negate the depth value to mark it as one. - * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ - depth[idx] = -depth[idx]; - fac = ref/L; - variance[idx ] *= fac*fac; - variance[idx + pass_stride] *= fac*fac; - variance[idx+2*pass_stride] *= fac*fac; + /* The pixel appears to be an outlier. + * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel + * should actually be at the reference value: + * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. + * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. + */ + float stddev = sqrtf(average(make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]))); + if(L - 3*stddev < ref) { + /* The pixel is an outlier, so negate the depth value to mark it as one. + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + depth[idx] = -depth[idx]; + fac = ref/L; + variance[idx ] *= fac*fac; + variance[idx + pass_stride] *= fac*fac; + variance[idx+2*pass_stride] *= fac*fac; + } } out[idx ] = fac*image[idx]; out[idx + pass_stride] = fac*image[idx + pass_stride]; diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h index 4a4c81b7ba3..90a2816ddf7 100644 --- a/intern/cycles/kernel/filter/filter_reconstruction.h +++ b/intern/cycles/kernel/filter/filter_reconstruction.h @@ -22,8 +22,6 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int w, int h, int pass_stride, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, float weight, @@ -48,21 +46,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, float design_row[DENOISE_FEATURES+1]; #endif - float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride); - float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride); + float3 q_color = filter_get_color(buffer + q_offset, pass_stride); - float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride)); - float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride)); - - /* If the pixel was flagged as an outlier during prefiltering, skip it. - * Otherwise, perform the regular confidence interval test unless - * the center pixel is an outlier (in that case, using the confidence - * interval test could result in no pixels being used at all). */ - bool p_outlier = (ccl_get_feature(buffer + p_offset, 0) < 0.0f); - bool q_outlier = (ccl_get_feature(buffer + q_offset, 0) < 0.0f); - bool outside_of_interval = (average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)); - - if(q_outlier || (!p_outlier && outside_of_interval)) { + /* If the pixel was flagged as an outlier during prefiltering, skip it. */ + if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) { return; } diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index ffd34c293fc..2ed713299fd 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -107,8 +107,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 261176846b1..8dc1a8d583c 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -213,8 +213,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -229,7 +227,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); + kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); #endif } diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 2edbff08087..009c3fde9d5 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -207,8 +207,6 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float const* __restrict__ transform, int *rank, float *XtWX, @@ -225,7 +223,6 @@ kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 0462ca6f9bc..ba53ba4b26f 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -207,8 +207,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -227,7 +225,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, |