Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2017-05-18 04:03:18 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2017-05-18 22:55:56 +0300
commit740cd287485d919452fa4cd56a700cc0070f0c6a (patch)
tree1c5b9c0528b7b25d26835d16adee7a6968d43fb3 /intern/cycles
parentb3a3459e1ae4a79d03887ea03c20bf1ccc8c8aa0 (diff)
Cycles Denoising: Add more robust outlier heuristic to avoid artifacts
Extremely bright pixels in the rendered image cause the denoising algorithm to produce extremely noticable artifacts. Therefore, a heuristic is needed to exclude these pixels from the filtering process. The new approach calculates the 75% percentile of the 5x5 neighborhood of each pixel and flags the pixel if it is more than twice as bright. During the reconstruction process, flagged pixels are skipped. Therefore, they don't cause any problems for neighboring pixels, and the outlier pixels themselves are replaced by a prediction of their actual value based on their feature pass values and the neighboring pixels. Therefore, the denoiser now also works as a smarter despeckling filter that uses a more accurate prediction of the pixel instead of a simple average. This can be used even if denoising isn't wanted by setting the denoising radius to 1.
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/device/device_cpu.cpp23
-rw-r--r--intern/cycles/device/device_cuda.cpp33
-rw-r--r--intern/cycles/device/device_denoising.cpp16
-rw-r--r--intern/cycles/device/device_denoising.h5
-rw-r--r--intern/cycles/device/opencl/opencl.h5
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp29
-rw-r--r--intern/cycles/kernel/filter/filter_features.h8
-rw-r--r--intern/cycles/kernel/filter/filter_features_sse.h4
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h51
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h5
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h8
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h15
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu16
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl14
14 files changed, 224 insertions, 8 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index e219ce3cb8e..fea55049049 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -176,6 +176,7 @@ public:
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@@ -210,6 +211,7 @@ public:
REGISTER_KERNEL(shader),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
+ REGISTER_KERNEL(filter_detect_outliers),
REGISTER_KERNEL(filter_combine_halves),
REGISTER_KERNEL(filter_nlm_calc_difference),
REGISTER_KERNEL(filter_nlm_blur),
@@ -594,6 +596,26 @@ public:
return true;
}
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ for(int y = task->rect.y; y < task->rect.w; y++) {
+ for(int x = task->rect.x; x < task->rect.z; x++) {
+ filter_detect_outliers_kernel()(x, y,
+ (float*) image_ptr,
+ (float*) variance_ptr,
+ (float*) depth_ptr,
+ (float*) output_ptr,
+ &task->rect.x,
+ task->buffer.pass_stride);
+ }
+ }
+ return true;
+ }
+
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
float *render_buffer = (float*)tile.buffer;
@@ -632,6 +654,7 @@ public:
denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 968ee5bc487..4d8ca487799 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1248,6 +1248,38 @@ public:
return !have_error();
}
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ if(have_error())
+ return false;
+
+ cuda_push_context();
+
+ CUfunction cuFilterDetectOutliers;
+ cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterDetectOutliers,
+ task->rect.z-task->rect.x,
+ task->rect.w-task->rect.y);
+
+ void *args[] = {&image_ptr,
+ &variance_ptr,
+ &depth_ptr,
+ &output_ptr,
+ &task->rect,
+ &task->buffer.pass_stride};
+
+ CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+ cuda_assert(cuCtxSynchronize());
+
+ cuda_pop_context();
+ return !have_error();
+ }
+
void denoise(RenderTile &rtile, const DeviceTask &task)
{
DenoisingTask denoising(this);
@@ -1258,6 +1290,7 @@ public:
denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index f5543d7c82d..613bd9112cf 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -159,11 +159,25 @@ bool DenoisingTask::run_denoising()
int mean_to[] = { 8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
+
+ device_only_memory<float> temp_color;
+ temp_color.resize(3*buffer.pass_stride);
+ device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
+
for(int pass = 0; pass < num_color_passes; pass++) {
- device_sub_ptr color_pass (device, buffer.mem, mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
}
+
+ {
+ device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+ functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
+ }
+
+ device->mem_free(temp_color);
}
storage.w = filter_area.z;
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 86d8eb64386..25b93c2ad74 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -82,6 +82,11 @@ public:
device_ptr mean_ptr,
device_ptr variance_ptr
)> get_feature;
+ function<bool(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr
+ )> detect_outliers;
function<bool(device_ptr*)> set_tiles;
} functions;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index a458ca6bf64..27e196d1e68 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -411,6 +411,11 @@ protected:
device_ptr mean_ptr,
device_ptr variance_ptr,
DenoisingTask *task);
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task);
bool denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index ae1a7b917c3..8f33014a00e 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -216,6 +216,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
denoising_program.add_kernel(ustring("filter_divide_shadow"));
denoising_program.add_kernel(ustring("filter_get_feature"));
+ denoising_program.add_kernel(ustring("filter_detect_outliers"));
denoising_program.add_kernel(ustring("filter_combine_halves"));
denoising_program.add_kernel(ustring("filter_construct_transform"));
denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
@@ -910,6 +911,33 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
return true;
}
+bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+{
+ cl_mem image_mem = CL_MEM_PTR(image_ptr);
+ cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
+ cl_mem depth_mem = CL_MEM_PTR(depth_ptr);
+ cl_mem output_mem = CL_MEM_PTR(output_ptr);
+
+ cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers"));
+
+ kernel_set_args(ckFilterDetectOutliers, 0,
+ image_mem,
+ variance_mem,
+ depth_mem,
+ output_mem,
+ task->rect,
+ task->buffer.pass_stride);
+ enqueue_kernel(ckFilterDetectOutliers,
+ task->rect.z-task->rect.x,
+ task->rect.w-task->rect.y);
+
+ return true;
+}
+
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task)
{
@@ -942,6 +970,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&OpenCLDeviceBase::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.detect_outliers = function_bind(&OpenCLDeviceBase::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
denoising.render_buffer.samples = rtile.sample;
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index f5a40d49997..41998c792b6 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-#define ccl_get_feature(buffer, pass) buffer[(pass)*pass_stride]
+#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).
* pixel_buffer always points to the current pixel in the first pass. */
@@ -32,7 +32,7 @@ ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_rest
{
features[0] = pixel.x;
features[1] = pixel.y;
- features[2] = ccl_get_feature(buffer, 0);
+ features[2] = fabsf(ccl_get_feature(buffer, 0));
features[3] = ccl_get_feature(buffer, 1);
features[4] = ccl_get_feature(buffer, 2);
features[5] = ccl_get_feature(buffer, 3);
@@ -50,7 +50,7 @@ ccl_device_inline void filter_get_feature_scales(int2 pixel, ccl_global float cc
{
scales[0] = fabsf(pixel.x - mean[0]);
scales[1] = fabsf(pixel.y - mean[1]);
- scales[2] = fabsf(ccl_get_feature(buffer, 0) - mean[2]);
+ scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
ccl_get_feature(buffer, 2) - mean[4],
ccl_get_feature(buffer, 3) - mean[5]));
@@ -107,7 +107,7 @@ ccl_device_inline void filter_get_design_row_transform(int2 p_pixel,
math_vector_zero(design_row+1, rank);
design_row_add(design_row, rank, transform, stride, 0, q_pixel.x - p_pixel.x);
design_row_add(design_row, rank, transform, stride, 1, q_pixel.y - p_pixel.y);
- design_row_add(design_row, rank, transform, stride, 2, ccl_get_feature(q_buffer, 0) - ccl_get_feature(p_buffer, 0));
+ design_row_add(design_row, rank, transform, stride, 2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
design_row_add(design_row, rank, transform, stride, 3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
design_row_add(design_row, rank, transform, stride, 4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
design_row_add(design_row, rank, transform, stride, 5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 303c8f482e3..a242a8ed0a1 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -37,7 +37,7 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active
{
features[0] = x;
features[1] = y;
- features[2] = ccl_get_feature_sse(0);
+ features[2] = _mm_fabs_ps(ccl_get_feature_sse(0));
features[3] = ccl_get_feature_sse(1);
features[4] = ccl_get_feature_sse(2);
features[5] = ccl_get_feature_sse(3);
@@ -58,7 +58,7 @@ ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, __m128
scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels);
scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels);
- scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(ccl_get_feature_sse(0), mean[2])), active_pixels);
+ scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(_mm_fabs_ps(ccl_get_feature_sse(0)), mean[2])), active_pixels);
__m128 diff, scale;
diff = _mm_sub_ps(ccl_get_feature_sse(1), mean[3]);
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 54bcf888052..252bcc5e675 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -104,6 +104,57 @@ ccl_device void kernel_filter_get_feature(int sample,
}
}
+ccl_device void kernel_filter_detect_outliers(int x, int y,
+ ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *out,
+ int4 rect,
+ int pass_stride)
+{
+ int buffer_w = align_up(rect.z - rect.x, 4);
+
+ int n = 0;
+ float values[25];
+ for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
+ for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
+ int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
+ float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
+
+ /* Find the position of L. */
+ int i;
+ for(i = 0; i < n; i++) {
+ if(values[i] > L) break;
+ }
+ /* Make space for L by shifting all following values to the right. */
+ for(int j = n; j > i; j--) {
+ values[j] = values[j-1];
+ }
+ /* Insert L. */
+ values[i] = L;
+ n++;
+ }
+ }
+
+ int idx = (y-rect.y)*buffer_w + (x-rect.x);
+ float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
+
+ float ref = 2.0f*values[(int)(n*0.75f)];
+ float fac = 1.0f;
+ if(L > ref) {
+ /* If the pixel is an outlier, negate the depth value to mark it as one.
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ depth[idx] = -depth[idx];
+ fac = ref/L;
+ variance[idx ] *= fac*fac;
+ variance[idx + pass_stride] *= fac*fac;
+ variance[idx+2*pass_stride] *= fac*fac;
+ }
+ out[idx ] = fac*image[idx];
+ out[idx + pass_stride] = fac*image[idx + pass_stride];
+ out[idx+2*pass_stride] = fac*image[idx+2*pass_stride];
+}
+
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
ccl_device void kernel_filter_combine_halves(int x, int y,
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 02f3802fa0c..6a7c86e4012 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -54,7 +54,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
- if(average(fabs(p_color - q_color)) > 3.0f*(p_std_dev + q_std_dev + 1e-3f)) {
+ /* If the pixel was flagged as an outlier during prefiltering, skip it.
+ * Otherwise, perform the regular confidence interval test. */
+ if(ccl_get_feature(buffer + q_offset, 0) < 0.0f ||
+ average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) {
return;
}
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 10007ee2635..9708b4b5b58 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -43,6 +43,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int buffer_denoising_offset,
bool use_split_variance);
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+ ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *output,
+ int *rect,
+ int pass_stride);
+
void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *mean,
float *variance,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 3b71e50ca3b..15325abdccd 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -91,6 +91,21 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
#endif
}
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+ ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *output,
+ int *rect,
+ int pass_stride)
+{
+#ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
+#else
+ kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
+#endif
+}
+
void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *mean,
float *variance,
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 50f73f9728d..f812a6601c6 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -88,6 +88,22 @@ kernel_cuda_filter_get_feature(int sample,
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_detect_outliers(float *image,
+ float *variance,
+ float *depth,
+ float *output,
+ int4 prefilter_rect,
+ int pass_stride)
+{
+ int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
+ int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
+ if(x < prefilter_rect.z && y < prefilter_rect.w) {
+ kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 3d82bff9892..fbc3daa62b9 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -78,6 +78,20 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
}
}
+__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *output,
+ int4 prefilter_rect,
+ int pass_stride)
+{
+ int x = prefilter_rect.x + get_global_id(0);
+ int y = prefilter_rect.y + get_global_id(1);
+ if(x < prefilter_rect.z && y < prefilter_rect.w) {
+ kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
+ }
+}
+
__kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
ccl_global float *variance,
ccl_global float *a,