diff options
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 23 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 33 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 16 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.h | 5 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 5 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 29 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_features.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_features_sse.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_prefilter.h | 51 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_reconstruction.h | 5 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h | 15 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/filter.cu | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/filter.cl | 14 |
14 files changed, 224 insertions, 8 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index e219ce3cb8e..fea55049049 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -176,6 +176,7 @@ public: KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel; KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)> filter_get_feature_kernel; + KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel; @@ -210,6 +211,7 @@ public: REGISTER_KERNEL(shader), REGISTER_KERNEL(filter_divide_shadow), REGISTER_KERNEL(filter_get_feature), + REGISTER_KERNEL(filter_detect_outliers), REGISTER_KERNEL(filter_combine_halves), REGISTER_KERNEL(filter_nlm_calc_difference), REGISTER_KERNEL(filter_nlm_blur), @@ -594,6 +596,26 @@ public: return true; } + bool denoising_detect_outliers(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr, + DenoisingTask *task) + { + for(int y = task->rect.y; y < task->rect.w; y++) { + for(int x = task->rect.x; x < task->rect.z; x++) { + filter_detect_outliers_kernel()(x, y, + (float*) image_ptr, + (float*) variance_ptr, + (float*) depth_ptr, + (float*) output_ptr, + &task->rect.x, + task->buffer.pass_stride); + } + } + return true; + } + void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { float *render_buffer = (float*)tile.buffer; @@ -632,6 +654,7 @@ public: denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising); + denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising); denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 968ee5bc487..4d8ca487799 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1248,6 +1248,38 @@ public: return !have_error(); } + bool denoising_detect_outliers(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr, + DenoisingTask *task) + { + if(have_error()) + return false; + + cuda_push_context(); + + CUfunction cuFilterDetectOutliers; + cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers")); + cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1)); + CUDA_GET_BLOCKSIZE(cuFilterDetectOutliers, + task->rect.z-task->rect.x, + task->rect.w-task->rect.y); + + void *args[] = {&image_ptr, + &variance_ptr, + &depth_ptr, + &output_ptr, + &task->rect, + &task->buffer.pass_stride}; + + CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args); + cuda_assert(cuCtxSynchronize()); + + cuda_pop_context(); + return !have_error(); + } + void denoise(RenderTile &rtile, const DeviceTask &task) { DenoisingTask denoising(this); @@ -1258,6 +1290,7 @@ public: denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising); + denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising); denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index f5543d7c82d..613bd9112cf 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -159,11 +159,25 @@ bool DenoisingTask::run_denoising() int mean_to[] = { 8, 9, 10}; int variance_to[] = {11, 12, 13}; int num_color_passes = 3; + + device_only_memory<float> temp_color; + temp_color.resize(3*buffer.pass_stride); + device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE); + for(int pass = 0; pass < num_color_passes; pass++) { - device_sub_ptr color_pass (device, buffer.mem, mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass); } + + { + device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); + functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); + } + + device->mem_free(temp_color); } storage.w = filter_area.z; diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 86d8eb64386..25b93c2ad74 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -82,6 +82,11 @@ public: device_ptr mean_ptr, device_ptr variance_ptr )> get_feature; + function<bool(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr + )> detect_outliers; function<bool(device_ptr*)> set_tiles; } functions; diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index a458ca6bf64..27e196d1e68 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -411,6 +411,11 @@ protected: device_ptr mean_ptr, device_ptr variance_ptr, DenoisingTask *task); + bool denoising_detect_outliers(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr, + DenoisingTask *task); bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task); diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index ae1a7b917c3..8f33014a00e 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -216,6 +216,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea denoising_program = OpenCLProgram(this, "denoising", "filter.cl", ""); denoising_program.add_kernel(ustring("filter_divide_shadow")); denoising_program.add_kernel(ustring("filter_get_feature")); + denoising_program.add_kernel(ustring("filter_detect_outliers")); denoising_program.add_kernel(ustring("filter_combine_halves")); denoising_program.add_kernel(ustring("filter_construct_transform")); denoising_program.add_kernel(ustring("filter_nlm_calc_difference")); @@ -910,6 +911,33 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset, return true; } +bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr, + DenoisingTask *task) +{ + cl_mem image_mem = CL_MEM_PTR(image_ptr); + cl_mem variance_mem = CL_MEM_PTR(variance_ptr); + cl_mem depth_mem = CL_MEM_PTR(depth_ptr); + cl_mem output_mem = CL_MEM_PTR(output_ptr); + + cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers")); + + kernel_set_args(ckFilterDetectOutliers, 0, + image_mem, + variance_mem, + depth_mem, + output_mem, + task->rect, + task->buffer.pass_stride); + enqueue_kernel(ckFilterDetectOutliers, + task->rect.z-task->rect.x, + task->rect.w-task->rect.y); + + return true; +} + bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { @@ -942,6 +970,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task) denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.get_feature = function_bind(&OpenCLDeviceBase::denoising_get_feature, this, _1, _2, _3, _4, &denoising); + denoising.functions.detect_outliers = function_bind(&OpenCLDeviceBase::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); denoising.render_buffer.samples = rtile.sample; diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h index f5a40d49997..41998c792b6 100644 --- a/intern/cycles/kernel/filter/filter_features.h +++ b/intern/cycles/kernel/filter/filter_features.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -#define ccl_get_feature(buffer, pass) buffer[(pass)*pass_stride] +#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride] /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y). * pixel_buffer always points to the current pixel in the first pass. */ @@ -32,7 +32,7 @@ ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_rest { features[0] = pixel.x; features[1] = pixel.y; - features[2] = ccl_get_feature(buffer, 0); + features[2] = fabsf(ccl_get_feature(buffer, 0)); features[3] = ccl_get_feature(buffer, 1); features[4] = ccl_get_feature(buffer, 2); features[5] = ccl_get_feature(buffer, 3); @@ -50,7 +50,7 @@ ccl_device_inline void filter_get_feature_scales(int2 pixel, ccl_global float cc { scales[0] = fabsf(pixel.x - mean[0]); scales[1] = fabsf(pixel.y - mean[1]); - scales[2] = fabsf(ccl_get_feature(buffer, 0) - mean[2]); + scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]); scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3], ccl_get_feature(buffer, 2) - mean[4], ccl_get_feature(buffer, 3) - mean[5])); @@ -107,7 +107,7 @@ ccl_device_inline void filter_get_design_row_transform(int2 p_pixel, math_vector_zero(design_row+1, rank); design_row_add(design_row, rank, transform, stride, 0, q_pixel.x - p_pixel.x); design_row_add(design_row, rank, transform, stride, 1, q_pixel.y - p_pixel.y); - design_row_add(design_row, rank, transform, stride, 2, ccl_get_feature(q_buffer, 0) - ccl_get_feature(p_buffer, 0)); + design_row_add(design_row, rank, transform, stride, 2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0))); design_row_add(design_row, rank, transform, stride, 3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1)); design_row_add(design_row, rank, transform, stride, 4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2)); design_row_add(design_row, rank, transform, stride, 5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3)); diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h index 303c8f482e3..a242a8ed0a1 100644 --- a/intern/cycles/kernel/filter/filter_features_sse.h +++ b/intern/cycles/kernel/filter/filter_features_sse.h @@ -37,7 +37,7 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active { features[0] = x; features[1] = y; - features[2] = ccl_get_feature_sse(0); + features[2] = _mm_fabs_ps(ccl_get_feature_sse(0)); features[3] = ccl_get_feature_sse(1); features[4] = ccl_get_feature_sse(2); features[5] = ccl_get_feature_sse(3); @@ -58,7 +58,7 @@ ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, __m128 scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels); scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels); - scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(ccl_get_feature_sse(0), mean[2])), active_pixels); + scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(_mm_fabs_ps(ccl_get_feature_sse(0)), mean[2])), active_pixels); __m128 diff, scale; diff = _mm_sub_ps(ccl_get_feature_sse(1), mean[3]); diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index 54bcf888052..252bcc5e675 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -104,6 +104,57 @@ ccl_device void kernel_filter_get_feature(int sample, } } +ccl_device void kernel_filter_detect_outliers(int x, int y, + ccl_global float *image, + ccl_global float *variance, + ccl_global float *depth, + ccl_global float *out, + int4 rect, + int pass_stride) +{ + int buffer_w = align_up(rect.z - rect.x, 4); + + int n = 0; + float values[25]; + for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) { + for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) { + int idx = (y1-rect.y)*buffer_w + (x1-rect.x); + float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride])); + + /* Find the position of L. */ + int i; + for(i = 0; i < n; i++) { + if(values[i] > L) break; + } + /* Make space for L by shifting all following values to the right. */ + for(int j = n; j > i; j--) { + values[j] = values[j-1]; + } + /* Insert L. */ + values[i] = L; + n++; + } + } + + int idx = (y-rect.y)*buffer_w + (x-rect.x); + float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride])); + + float ref = 2.0f*values[(int)(n*0.75f)]; + float fac = 1.0f; + if(L > ref) { + /* If the pixel is an outlier, negate the depth value to mark it as one. + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + depth[idx] = -depth[idx]; + fac = ref/L; + variance[idx ] *= fac*fac; + variance[idx + pass_stride] *= fac*fac; + variance[idx+2*pass_stride] *= fac*fac; + } + out[idx ] = fac*image[idx]; + out[idx + pass_stride] = fac*image[idx + pass_stride]; + out[idx+2*pass_stride] = fac*image[idx+2*pass_stride]; +} + /* Combine A/B buffers. * Calculates the combined mean and the buffer variance. */ ccl_device void kernel_filter_combine_halves(int x, int y, diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h index 02f3802fa0c..6a7c86e4012 100644 --- a/intern/cycles/kernel/filter/filter_reconstruction.h +++ b/intern/cycles/kernel/filter/filter_reconstruction.h @@ -54,7 +54,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride)); float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride)); - if(average(fabs(p_color - q_color)) > 3.0f*(p_std_dev + q_std_dev + 1e-3f)) { + /* If the pixel was flagged as an outlier during prefiltering, skip it. + * Otherwise, perform the regular confidence interval test. */ + if(ccl_get_feature(buffer + q_offset, 0) < 0.0f || + average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) { return; } diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index 10007ee2635..9708b4b5b58 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -43,6 +43,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, int buffer_denoising_offset, bool use_split_variance); +void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, + ccl_global float *image, + ccl_global float *variance, + ccl_global float *depth, + ccl_global float *output, + int *rect, + int pass_stride); + void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, float *mean, float *variance, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 3b71e50ca3b..15325abdccd 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -91,6 +91,21 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, #endif } +void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, + ccl_global float *image, + ccl_global float *variance, + ccl_global float *depth, + ccl_global float *output, + int *rect, + int pass_stride) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers); +#else + kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride); +#endif +} + void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, float *mean, float *variance, diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 50f73f9728d..f812a6601c6 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -88,6 +88,22 @@ kernel_cuda_filter_get_feature(int sample, extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_filter_detect_outliers(float *image, + float *variance, + float *depth, + float *output, + int4 prefilter_rect, + int pass_stride) +{ + int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; + int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; + if(x < prefilter_rect.z && y < prefilter_rect.w) { + kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride); + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r) { int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 3d82bff9892..fbc3daa62b9 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -78,6 +78,20 @@ __kernel void kernel_ocl_filter_get_feature(int sample, } } +__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image, + ccl_global float *variance, + ccl_global float *depth, + ccl_global float *output, + int4 prefilter_rect, + int pass_stride) +{ + int x = prefilter_rect.x + get_global_id(0); + int y = prefilter_rect.y + get_global_id(1); + if(x < prefilter_rect.z && y < prefilter_rect.w) { + kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride); + } +} + __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean, ccl_global float *variance, ccl_global float *a, |