Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/filter/filter_nlm_gpu.h')
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_gpu.h36
1 files changed, 27 insertions, 9 deletions
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
index 058afb34a92..d8e2e4d08aa 100644
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h
@@ -78,17 +78,25 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
int dx, int dy,
const ccl_global float *ccl_restrict weight_image,
const ccl_global float *ccl_restrict variance_image,
+ const ccl_global float *ccl_restrict scale_image,
ccl_global float *difference_image,
int4 rect, int stride,
int channel_offset,
float a, float k_2)
{
- float diff = 0.0f;
+ int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
int numChannels = channel_offset? 3 : 1;
- for(int c = 0; c < numChannels; c++) {
- float cdiff = weight_image[c*channel_offset + y*stride + x] - weight_image[c*channel_offset + (y+dy)*stride + (x+dx)];
- float pvar = variance_image[c*channel_offset + y*stride + x];
- float qvar = variance_image[c*channel_offset + (y+dy)*stride + (x+dx)];
+
+ float diff = 0.0f;
+ float scale_fac = 1.0f;
+ if(scale_image) {
+ scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
+ }
+
+ for(int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
+ float cdiff = weight_image[idx_p] - scale_fac*weight_image[idx_q];
+ float pvar = variance_image[idx_p];
+ float qvar = sqr(scale_fac)*variance_image[idx_q];
diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
}
if(numChannels > 1) {
@@ -133,7 +141,8 @@ ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
const ccl_global float *ccl_restrict image,
ccl_global float *out_image,
ccl_global float *accum_image,
- int4 rect, int stride, int f)
+ int4 rect, int channel_offset,
+ int stride, int f)
{
float sum = 0.0f;
const int low = max(rect.x, x-f);
@@ -142,12 +151,21 @@ ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
sum += difference_image[y*stride + x1];
}
sum *= 1.0f/(high-low);
+
+ int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
if(out_image) {
- atomic_add_and_fetch_float(accum_image + y*stride + x, sum);
- atomic_add_and_fetch_float(out_image + y*stride + x, sum*image[(y+dy)*stride + (x+dx)]);
+ atomic_add_and_fetch_float(accum_image + idx_p, sum);
+
+ float val = image[idx_q];
+ if(channel_offset) {
+ val += image[idx_q + channel_offset];
+ val += image[idx_q + 2*channel_offset];
+ val *= 1.0f/3.0f;
+ }
+ atomic_add_and_fetch_float(out_image + idx_p, sum*val);
}
else {
- accum_image[y*stride + x] = sum;
+ accum_image[idx_p] = sum;
}
}