Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/filter/filter_defines.h1
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h19
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_gpu.h36
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h39
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h12
-rw-r--r--intern/cycles/kernel/kernel_types.h9
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h14
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h56
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu36
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl34
10 files changed, 220 insertions, 36 deletions
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
index 67f4e62ac0f..9ac7c3db23d 100644
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ b/intern/cycles/kernel/filter/filter_defines.h
@@ -27,6 +27,7 @@ typedef struct TileInfo {
int strides[9];
int x[4];
int y[4];
+ int from_render;
/* TODO(lukas): CUDA doesn't have uint64_t... */
#ifdef __KERNEL_OPENCL__
ccl_global float *buffers[9];
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index af73c0dadf2..0c4387af540 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -22,6 +22,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
const float *ccl_restrict weight_image,
const float *ccl_restrict variance_image,
+ const float *ccl_restrict scale_image,
float *difference_image,
int4 rect,
int stride,
@@ -41,13 +42,21 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
int idx_q = (y+dy)*stride + aligned_lowx + dx;
for(int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
float4 diff = make_float4(0.0f);
+ float4 scale_fac;
+ if(scale_image) {
+ scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
+ make_float4(0.25f), make_float4(4.0f));
+ }
+ else {
+ scale_fac = make_float4(1.0f);
+ }
for(int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
/* idx_p is guaranteed to be aligned, but idx_q isn't. */
float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
- float4 color_q = load4_u(weight_image, idx_q + chan_ofs);
+ float4 color_q = scale_fac*load4_u(weight_image, idx_q + chan_ofs);
float4 cdiff = color_p - color_q;
float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
- float4 var_q = load4_u(variance_image, idx_q + chan_ofs);
+ float4 var_q = sqr(scale_fac)*load4_u(variance_image, idx_q + chan_ofs);
diff += (cdiff*cdiff - a*(var_p + min(var_p, var_q))) / (make_float4(1e-8f) + k_2*(var_p+var_q));
}
load4_a(difference_image, idx_p) = diff*channel_fac;
@@ -143,6 +152,7 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
float *out_image,
float *accum_image,
int4 rect,
+ int channel_offset,
int stride,
int f)
{
@@ -160,6 +170,11 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
load4_a(accum_image, idx_p) += mask(active, weight);
float4 val = load4_u(image, idx_q);
+ if(channel_offset) {
+ val += load4_u(image, idx_q + channel_offset);
+ val += load4_u(image, idx_q + 2*channel_offset);
+ val *= 1.0f/3.0f;
+ }
load4_a(out_image, idx_p) += mask(active, weight*val);
}
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
index 058afb34a92..d8e2e4d08aa 100644
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h
@@ -78,17 +78,25 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
int dx, int dy,
const ccl_global float *ccl_restrict weight_image,
const ccl_global float *ccl_restrict variance_image,
+ const ccl_global float *ccl_restrict scale_image,
ccl_global float *difference_image,
int4 rect, int stride,
int channel_offset,
float a, float k_2)
{
- float diff = 0.0f;
+ int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
int numChannels = channel_offset? 3 : 1;
- for(int c = 0; c < numChannels; c++) {
- float cdiff = weight_image[c*channel_offset + y*stride + x] - weight_image[c*channel_offset + (y+dy)*stride + (x+dx)];
- float pvar = variance_image[c*channel_offset + y*stride + x];
- float qvar = variance_image[c*channel_offset + (y+dy)*stride + (x+dx)];
+
+ float diff = 0.0f;
+ float scale_fac = 1.0f;
+ if(scale_image) {
+ scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
+ }
+
+ for(int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
+ float cdiff = weight_image[idx_p] - scale_fac*weight_image[idx_q];
+ float pvar = variance_image[idx_p];
+ float qvar = sqr(scale_fac)*variance_image[idx_q];
diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
}
if(numChannels > 1) {
@@ -133,7 +141,8 @@ ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
const ccl_global float *ccl_restrict image,
ccl_global float *out_image,
ccl_global float *accum_image,
- int4 rect, int stride, int f)
+ int4 rect, int channel_offset,
+ int stride, int f)
{
float sum = 0.0f;
const int low = max(rect.x, x-f);
@@ -142,12 +151,21 @@ ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
sum += difference_image[y*stride + x1];
}
sum *= 1.0f/(high-low);
+
+ int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
if(out_image) {
- atomic_add_and_fetch_float(accum_image + y*stride + x, sum);
- atomic_add_and_fetch_float(out_image + y*stride + x, sum*image[(y+dy)*stride + (x+dx)]);
+ atomic_add_and_fetch_float(accum_image + idx_p, sum);
+
+ float val = image[idx_q];
+ if(channel_offset) {
+ val += image[idx_q + channel_offset];
+ val += image[idx_q + 2*channel_offset];
+ val *= 1.0f/3.0f;
+ }
+ atomic_add_and_fetch_float(out_image + idx_p, sum*val);
}
else {
- accum_image[y*stride + x] = sum;
+ accum_image[idx_p] = sum;
}
}
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 3507f80df46..41be4dbea49 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -84,6 +84,7 @@ ccl_device void kernel_filter_get_feature(int sample,
int x, int y,
ccl_global float *mean,
ccl_global float *variance,
+ float scale,
int4 rect, int buffer_pass_stride,
int buffer_denoising_offset)
{
@@ -95,18 +96,38 @@ ccl_device void kernel_filter_get_feature(int sample,
int buffer_w = align_up(rect.z - rect.x, 4);
int idx = (y-rect.y)*buffer_w + (x - rect.x);
- mean[idx] = center_buffer[m_offset] / sample;
- if(sample > 1) {
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- variance[idx] = max(0.0f, (center_buffer[v_offset] - mean[idx]*mean[idx]*sample) / (sample * (sample-1)));
- }
- else {
- /* Can't compute variance with single sample, just set it very high. */
- variance[idx] = 1e10f;
+ float val = scale * center_buffer[m_offset];
+ mean[idx] = val;
+
+ if(v_offset >= 0) {
+ if(sample > 1) {
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ variance[idx] = max(0.0f, (center_buffer[v_offset] - val*val*sample) / (sample * (sample-1)));
+ }
+ else {
+ /* Can't compute variance with single sample, just set it very high. */
+ variance[idx] = 1e10f;
+ }
}
}
+ccl_device void kernel_filter_write_feature(int sample,
+ int x, int y,
+ int4 buffer_params,
+ ccl_global float *from,
+ ccl_global float *buffer,
+ int out_offset,
+ int4 rect)
+{
+ ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
+
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y-rect.y)*buffer_w + (x - rect.x);
+
+ combined_buffer[out_offset] = from[idx];
+}
+
ccl_device void kernel_filter_detect_outliers(int x, int y,
ccl_global float *image,
ccl_global float *variance,
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 58740d5b06a..e5d3b0da835 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -108,11 +108,13 @@ ccl_device_inline void kernel_filter_finalize(int x, int y,
final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
- final_color *= sample;
- if(buffer_params.w) {
- final_color.x += combined_buffer[buffer_params.w+0];
- final_color.y += combined_buffer[buffer_params.w+1];
- final_color.z += combined_buffer[buffer_params.w+2];
+ if(buffer_params.w >= 0) {
+ final_color *= sample;
+ if(buffer_params.w > 0) {
+ final_color.x += combined_buffer[buffer_params.w+0];
+ final_color.y += combined_buffer[buffer_params.w+1];
+ final_color.z += combined_buffer[buffer_params.w+2];
+ }
}
combined_buffer[0] = final_color.x;
combined_buffer[1] = final_color.y;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 864aa7c470a..caa0057d997 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -472,8 +472,17 @@ typedef enum DenoisingPassOffsets {
DENOISING_PASS_COLOR_VAR = 23,
DENOISING_PASS_CLEAN = 26,
+ DENOISING_PASS_PREFILTERED_DEPTH = 0,
+ DENOISING_PASS_PREFILTERED_NORMAL = 1,
+ DENOISING_PASS_PREFILTERED_SHADOWING = 4,
+ DENOISING_PASS_PREFILTERED_ALBEDO = 5,
+ DENOISING_PASS_PREFILTERED_COLOR = 8,
+ DENOISING_PASS_PREFILTERED_VARIANCE = 11,
+ DENOISING_PASS_PREFILTERED_INTENSITY = 14,
+
DENOISING_PASS_SIZE_BASE = 26,
DENOISING_PASS_SIZE_CLEAN = 3,
+ DENOISING_PASS_SIZE_PREFILTERED = 15,
} DenoisingPassOffsets;
typedef enum eBakePassFilter {
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index e036b53b810..08333c7a455 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -37,10 +37,20 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int y,
float *mean,
float *variance,
+ float scale,
int* prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset);
+void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
+ int x,
+ int y,
+ int *buffer_params,
+ float *from,
+ float *buffer,
+ int out_offset,
+ int* prefilter_rect);
+
void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
ccl_global float *image,
ccl_global float *variance,
@@ -71,7 +81,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
int dy,
float *weight_image,
- float *variance,
+ float *variance_image,
+ float *scale_image,
float *difference_image,
int* rect,
int stride,
@@ -99,6 +110,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
float *out_image,
float *accum_image,
int* rect,
+ int channel_offset,
int stride,
int f);
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 4c758711481..b792367e3ab 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -69,6 +69,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int x,
int y,
float *mean, float *variance,
+ float scale,
int* prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
@@ -80,12 +81,29 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
m_offset, v_offset,
x, y,
mean, variance,
+ scale,
load_int4(prefilter_rect),
buffer_pass_stride,
buffer_denoising_offset);
#endif
}
+void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
+ int x,
+ int y,
+ int *buffer_params,
+ float *from,
+ float *buffer,
+ int out_offset,
+ int* prefilter_rect)
+{
+#ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
+#else
+ kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
+#endif
+}
+
void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
ccl_global float *image,
ccl_global float *variance,
@@ -130,8 +148,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
#else
- rank += storage_ofs;
- transform += storage_ofs*TRANSFORM_SIZE;
+ rank += storage_ofs;
+ transform += storage_ofs*TRANSFORM_SIZE;
kernel_filter_construct_transform(buffer,
x, y,
load_int4(prefilter_rect),
@@ -146,7 +164,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
int dy,
float *weight_image,
- float *variance,
+ float *variance_image,
+ float *scale_image,
float *difference_image,
int *rect,
int stride,
@@ -157,7 +176,15 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
#else
- kernel_filter_nlm_calc_difference(dx, dy, weight_image, variance, difference_image, load_int4(rect), stride, channel_offset, a, k_2);
+ kernel_filter_nlm_calc_difference(dx, dy,
+ weight_image,
+ variance_image,
+ scale_image,
+ difference_image,
+ load_int4(rect),
+ stride,
+ channel_offset,
+ a, k_2);
#endif
}
@@ -195,13 +222,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
float *out_image,
float *accum_image,
int *rect,
+ int channel_offset,
int stride,
int f)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
#else
- kernel_filter_nlm_update_output(dx, dy, difference_image, image, temp_image, out_image, accum_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_update_output(dx, dy,
+ difference_image,
+ image,
+ temp_image,
+ out_image,
+ accum_image,
+ load_int4(rect),
+ channel_offset,
+ stride, f);
#endif
}
@@ -222,7 +258,15 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
#else
- kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_window), stride, f, pass_stride);
+ kernel_filter_nlm_construct_gramian(dx, dy,
+ difference_image,
+ buffer,
+ transform, rank,
+ XtWX, XtWY,
+ load_int4(rect),
+ load_int4(filter_window),
+ stride, f,
+ pass_stride);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index b856cbde45c..3b51bb41aed 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -64,6 +64,7 @@ kernel_cuda_filter_get_feature(int sample,
int v_offset,
float *mean,
float *variance,
+ float scale,
int4 prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
@@ -76,6 +77,7 @@ kernel_cuda_filter_get_feature(int sample,
m_offset, v_offset,
x, y,
mean, variance,
+ scale,
prefilter_rect,
buffer_pass_stride,
buffer_denoising_offset);
@@ -84,6 +86,30 @@ kernel_cuda_filter_get_feature(int sample,
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_write_feature(int sample,
+ int4 buffer_params,
+ int4 filter_area,
+ float *from,
+ float *buffer,
+ int out_offset,
+ int4 prefilter_rect)
+{
+ int x = blockDim.x*blockIdx.x + threadIdx.x;
+ int y = blockDim.y*blockIdx.y + threadIdx.y;
+ if(x < filter_area.z && y < filter_area.w) {
+ kernel_filter_write_feature(sample,
+ x + filter_area.x,
+ y + filter_area.y,
+ buffer_params,
+ from,
+ buffer,
+ out_offset,
+ prefilter_rect);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_detect_outliers(float *image,
float *variance,
float *depth,
@@ -136,6 +162,7 @@ extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
const float *ccl_restrict variance_image,
+ const float *ccl_restrict scale_image,
float *difference_image,
int w,
int h,
@@ -152,9 +179,11 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
+ scale_image,
difference_image + ofs,
rect, stride,
- channel_offset, a, k_2);
+ channel_offset,
+ a, k_2);
}
}
@@ -210,6 +239,7 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image,
int h,
int stride,
int pass_stride,
+ int channel_offset,
int r,
int f)
{
@@ -221,7 +251,9 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image,
image,
out_image,
accum_image,
- rect, stride, f);
+ rect,
+ channel_offset,
+ stride, f);
}
}
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index a550f97f4eb..8a821ee281d 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -56,6 +56,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
int v_offset,
ccl_global float *mean,
ccl_global float *variance,
+ float scale,
int4 prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
@@ -68,12 +69,35 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
m_offset, v_offset,
x, y,
mean, variance,
+ scale,
prefilter_rect,
buffer_pass_stride,
buffer_denoising_offset);
}
}
+__kernel void kernel_ocl_filter_write_feature(int sample,
+ int4 buffer_params,
+ int4 filter_area,
+ ccl_global float *from,
+ ccl_global float *buffer,
+ int out_offset,
+ int4 prefilter_rect)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+ if(x < filter_area.z && y < filter_area.w) {
+ kernel_filter_write_feature(sample,
+ x + filter_area.x,
+ y + filter_area.y,
+ buffer_params,
+ from,
+ buffer,
+ out_offset,
+ prefilter_rect);
+ }
+}
+
__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -128,6 +152,7 @@ __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_
__kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_restrict weight_image,
const ccl_global float *ccl_restrict variance_image,
+ const ccl_global float *ccl_restrict scale_image,
ccl_global float *difference_image,
int w,
int h,
@@ -144,9 +169,11 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
+ scale_image,
difference_image + ofs,
rect, stride,
- channel_offset, a, k_2);
+ channel_offset,
+ a, k_2);
}
}
@@ -196,6 +223,7 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re
int h,
int stride,
int pass_stride,
+ int channel_offset,
int r,
int f)
{
@@ -207,7 +235,9 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re
image,
out_image,
accum_image,
- rect, stride, f);
+ rect,
+ channel_offset,
+ stride, f);
}
}