Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_cpu.cpp23
-rw-r--r--intern/cycles/device/device_cuda.cpp33
-rw-r--r--intern/cycles/device/device_denoising.cpp16
-rw-r--r--intern/cycles/device/device_denoising.h5
-rw-r--r--intern/cycles/device/opencl/opencl.h5
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp29
-rw-r--r--intern/cycles/kernel/filter/filter_features.h8
-rw-r--r--intern/cycles/kernel/filter/filter_features_sse.h4
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h51
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h5
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h8
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h15
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu16
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl14
14 files changed, 224 insertions, 8 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index e219ce3cb8e..fea55049049 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -176,6 +176,7 @@ public:
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@@ -210,6 +211,7 @@ public:
REGISTER_KERNEL(shader),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
+ REGISTER_KERNEL(filter_detect_outliers),
REGISTER_KERNEL(filter_combine_halves),
REGISTER_KERNEL(filter_nlm_calc_difference),
REGISTER_KERNEL(filter_nlm_blur),
@@ -594,6 +596,26 @@ public:
return true;
}
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ for(int y = task->rect.y; y < task->rect.w; y++) {
+ for(int x = task->rect.x; x < task->rect.z; x++) {
+ filter_detect_outliers_kernel()(x, y,
+ (float*) image_ptr,
+ (float*) variance_ptr,
+ (float*) depth_ptr,
+ (float*) output_ptr,
+ &task->rect.x,
+ task->buffer.pass_stride);
+ }
+ }
+ return true;
+ }
+
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
float *render_buffer = (float*)tile.buffer;
@@ -632,6 +654,7 @@ public:
denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 968ee5bc487..4d8ca487799 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1248,6 +1248,38 @@ public:
return !have_error();
}
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ if(have_error())
+ return false;
+
+ cuda_push_context();
+
+ CUfunction cuFilterDetectOutliers;
+ cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterDetectOutliers,
+ task->rect.z-task->rect.x,
+ task->rect.w-task->rect.y);
+
+ void *args[] = {&image_ptr,
+ &variance_ptr,
+ &depth_ptr,
+ &output_ptr,
+ &task->rect,
+ &task->buffer.pass_stride};
+
+ CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+ cuda_assert(cuCtxSynchronize());
+
+ cuda_pop_context();
+ return !have_error();
+ }
+
void denoise(RenderTile &rtile, const DeviceTask &task)
{
DenoisingTask denoising(this);
@@ -1258,6 +1290,7 @@ public:
denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index f5543d7c82d..613bd9112cf 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -159,11 +159,25 @@ bool DenoisingTask::run_denoising()
int mean_to[] = { 8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
+
+ device_only_memory<float> temp_color;
+ temp_color.resize(3*buffer.pass_stride);
+ device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
+
for(int pass = 0; pass < num_color_passes; pass++) {
- device_sub_ptr color_pass (device, buffer.mem, mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
}
+
+ {
+ device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+ functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
+ }
+
+ device->mem_free(temp_color);
}
storage.w = filter_area.z;
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 86d8eb64386..25b93c2ad74 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -82,6 +82,11 @@ public:
device_ptr mean_ptr,
device_ptr variance_ptr
)> get_feature;
+ function<bool(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr
+ )> detect_outliers;
function<bool(device_ptr*)> set_tiles;
} functions;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index a458ca6bf64..27e196d1e68 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -411,6 +411,11 @@ protected:
device_ptr mean_ptr,
device_ptr variance_ptr,
DenoisingTask *task);
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task);
bool denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index ae1a7b917c3..8f33014a00e 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -216,6 +216,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
denoising_program.add_kernel(ustring("filter_divide_shadow"));
denoising_program.add_kernel(ustring("filter_get_feature"));
+ denoising_program.add_kernel(ustring("filter_detect_outliers"));
denoising_program.add_kernel(ustring("filter_combine_halves"));
denoising_program.add_kernel(ustring("filter_construct_transform"));
denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
@@ -910,6 +911,33 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
return true;
}
+bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+{
+ cl_mem image_mem = CL_MEM_PTR(image_ptr);
+ cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
+ cl_mem depth_mem = CL_MEM_PTR(depth_ptr);
+ cl_mem output_mem = CL_MEM_PTR(output_ptr);
+
+ cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers"));
+
+ kernel_set_args(ckFilterDetectOutliers, 0,
+ image_mem,
+ variance_mem,
+ depth_mem,
+ output_mem,
+ task->rect,
+ task->buffer.pass_stride);
+ enqueue_kernel(ckFilterDetectOutliers,
+ task->rect.z-task->rect.x,
+ task->rect.w-task->rect.y);
+
+ return true;
+}
+
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task)
{
@@ -942,6 +970,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&OpenCLDeviceBase::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.detect_outliers = function_bind(&OpenCLDeviceBase::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
denoising.render_buffer.samples = rtile.sample;
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index f5a40d49997..41998c792b6 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-#define ccl_get_feature(buffer, pass) buffer[(pass)*pass_stride]
+#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).
* pixel_buffer always points to the current pixel in the first pass. */
@@ -32,7 +32,7 @@ ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_rest
{
features[0] = pixel.x;
features[1] = pixel.y;
- features[2] = ccl_get_feature(buffer, 0);
+ features[2] = fabsf(ccl_get_feature(buffer, 0));
features[3] = ccl_get_feature(buffer, 1);
features[4] = ccl_get_feature(buffer, 2);
features[5] = ccl_get_feature(buffer, 3);
@@ -50,7 +50,7 @@ ccl_device_inline void filter_get_feature_scales(int2 pixel, ccl_global float cc
{
scales[0] = fabsf(pixel.x - mean[0]);
scales[1] = fabsf(pixel.y - mean[1]);
- scales[2] = fabsf(ccl_get_feature(buffer, 0) - mean[2]);
+ scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
ccl_get_feature(buffer, 2) - mean[4],
ccl_get_feature(buffer, 3) - mean[5]));
@@ -107,7 +107,7 @@ ccl_device_inline void filter_get_design_row_transform(int2 p_pixel,
math_vector_zero(design_row+1, rank);
design_row_add(design_row, rank, transform, stride, 0, q_pixel.x - p_pixel.x);
design_row_add(design_row, rank, transform, stride, 1, q_pixel.y - p_pixel.y);
- design_row_add(design_row, rank, transform, stride, 2, ccl_get_feature(q_buffer, 0) - ccl_get_feature(p_buffer, 0));
+ design_row_add(design_row, rank, transform, stride, 2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
design_row_add(design_row, rank, transform, stride, 3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
design_row_add(design_row, rank, transform, stride, 4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
design_row_add(design_row, rank, transform, stride, 5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 303c8f482e3..a242a8ed0a1 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -37,7 +37,7 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active
{
features[0] = x;
features[1] = y;
- features[2] = ccl_get_feature_sse(0);
+ features[2] = _mm_fabs_ps(ccl_get_feature_sse(0));
features[3] = ccl_get_feature_sse(1);
features[4] = ccl_get_feature_sse(2);
features[5] = ccl_get_feature_sse(3);
@@ -58,7 +58,7 @@ ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, __m128
scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels);
scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels);
- scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(ccl_get_feature_sse(0), mean[2])), active_pixels);
+ scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(_mm_fabs_ps(ccl_get_feature_sse(0)), mean[2])), active_pixels);
__m128 diff, scale;
diff = _mm_sub_ps(ccl_get_feature_sse(1), mean[3]);
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 54bcf888052..252bcc5e675 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -104,6 +104,57 @@ ccl_device void kernel_filter_get_feature(int sample,
}
}
+ccl_device void kernel_filter_detect_outliers(int x, int y,
+ ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *out,
+ int4 rect,
+ int pass_stride)
+{
+ int buffer_w = align_up(rect.z - rect.x, 4);
+
+ int n = 0;
+ float values[25];
+ for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
+ for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
+ int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
+ float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
+
+ /* Find the position of L. */
+ int i;
+ for(i = 0; i < n; i++) {
+ if(values[i] > L) break;
+ }
+ /* Make space for L by shifting all following values to the right. */
+ for(int j = n; j > i; j--) {
+ values[j] = values[j-1];
+ }
+ /* Insert L. */
+ values[i] = L;
+ n++;
+ }
+ }
+
+ int idx = (y-rect.y)*buffer_w + (x-rect.x);
+ float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
+
+ float ref = 2.0f*values[(int)(n*0.75f)];
+ float fac = 1.0f;
+ if(L > ref) {
+ /* If the pixel is an outlier, negate the depth value to mark it as one.
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ depth[idx] = -depth[idx];
+ fac = ref/L;
+ variance[idx ] *= fac*fac;
+ variance[idx + pass_stride] *= fac*fac;
+ variance[idx+2*pass_stride] *= fac*fac;
+ }
+ out[idx ] = fac*image[idx];
+ out[idx + pass_stride] = fac*image[idx + pass_stride];
+ out[idx+2*pass_stride] = fac*image[idx+2*pass_stride];
+}
+
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
ccl_device void kernel_filter_combine_halves(int x, int y,
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 02f3802fa0c..6a7c86e4012 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -54,7 +54,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
- if(average(fabs(p_color - q_color)) > 3.0f*(p_std_dev + q_std_dev + 1e-3f)) {
+ /* If the pixel was flagged as an outlier during prefiltering, skip it.
+ * Otherwise, perform the regular confidence interval test. */
+ if(ccl_get_feature(buffer + q_offset, 0) < 0.0f ||
+ average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) {
return;
}
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 10007ee2635..9708b4b5b58 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -43,6 +43,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int buffer_denoising_offset,
bool use_split_variance);
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+ ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *output,
+ int *rect,
+ int pass_stride);
+
void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *mean,
float *variance,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 3b71e50ca3b..15325abdccd 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -91,6 +91,21 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
#endif
}
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+ ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *output,
+ int *rect,
+ int pass_stride)
+{
+#ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
+#else
+ kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
+#endif
+}
+
void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *mean,
float *variance,
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 50f73f9728d..f812a6601c6 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -88,6 +88,22 @@ kernel_cuda_filter_get_feature(int sample,
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_detect_outliers(float *image,
+ float *variance,
+ float *depth,
+ float *output,
+ int4 prefilter_rect,
+ int pass_stride)
+{
+ int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
+ int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
+ if(x < prefilter_rect.z && y < prefilter_rect.w) {
+ kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 3d82bff9892..fbc3daa62b9 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -78,6 +78,20 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
}
}
+__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
+ ccl_global float *variance,
+ ccl_global float *depth,
+ ccl_global float *output,
+ int4 prefilter_rect,
+ int pass_stride)
+{
+ int x = prefilter_rect.x + get_global_id(0);
+ int y = prefilter_rect.y + get_global_id(1);
+ if(x < prefilter_rect.z && y < prefilter_rect.w) {
+ kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
+ }
+}
+
__kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
ccl_global float *variance,
ccl_global float *a,