diff options
Diffstat (limited to 'intern/cycles/kernel/filter/filter_nlm_gpu.h')
-rw-r--r-- | intern/cycles/kernel/filter/filter_nlm_gpu.h | 255 |
1 files changed, 0 insertions, 255 deletions
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h deleted file mode 100644 index 650c743f34f..00000000000 --- a/intern/cycles/kernel/filter/filter_nlm_gpu.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CCL_NAMESPACE_BEGIN - -/* Determines pixel coordinates and offset for the current thread. - * Returns whether the thread should do any work. - * - * All coordinates are relative to the denoising buffer! - * - * Window is the rect that should be processed. - * co is filled with (x, y, dx, dy). - */ -ccl_device_inline bool get_nlm_coords_window( - int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs, int4 window) -{ - /* Determine the pixel offset that this thread should apply. */ - int s = 2 * r + 1; - int si = ccl_global_id(1); - int sx = si % s; - int sy = si / s; - if (sy >= s) { - return false; - } - - /* Pixels still need to lie inside the denoising buffer after applying the offset, - * so determine the area for which this is the case. */ - int dx = sx - r; - int dy = sy - r; - - *rect = make_int4(max(0, -dx), max(0, -dy), w - max(0, dx), h - max(0, dy)); - - /* Find the intersection of the area that we want to process (window) and the area - * that can be processed (rect) to get the final area for this offset. */ - int4 clip_area = rect_clip(window, *rect); - - /* If the radius is larger than one of the sides of the window, - * there will be shifts for which there is no usable pixel at all. */ - if (!rect_is_valid(clip_area)) { - return false; - } - - /* Map the linear thread index to pixels inside the clip area. */ - int x, y; - if (!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) { - return false; - } - - *co = make_int4(x, y, dx, dy); - - *ofs = (sy * s + sx) * stride; - - return true; -} - -ccl_device_inline bool get_nlm_coords( - int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs) -{ - return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h)); -} - -ccl_device_inline void kernel_filter_nlm_calc_difference( - int x, - int y, - int dx, - int dy, - const ccl_global float *ccl_restrict weight_image, - const ccl_global float *ccl_restrict variance_image, - const ccl_global float *ccl_restrict scale_image, - ccl_global float *difference_image, - int4 rect, - int stride, - int channel_offset, - int frame_offset, - float a, - float k_2) -{ - int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx) + frame_offset; - int numChannels = channel_offset ? 3 : 1; - - float diff = 0.0f; - float scale_fac = 1.0f; - if (scale_image) { - scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f); - } - - for (int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) { - float cdiff = weight_image[idx_p] - scale_fac * weight_image[idx_q]; - float pvar = variance_image[idx_p]; - float qvar = sqr(scale_fac) * variance_image[idx_q]; - diff += (cdiff * cdiff - a * (pvar + min(pvar, qvar))) / (1e-8f + k_2 * (pvar + qvar)); - } - if (numChannels > 1) { - diff *= 1.0f / numChannels; - } - difference_image[y * stride + x] = diff; -} - -ccl_device_inline void kernel_filter_nlm_blur(int x, - int y, - const ccl_global float *ccl_restrict - difference_image, - ccl_global float *out_image, - int4 rect, - int stride, - int f) -{ - float sum = 0.0f; - const int low = max(rect.y, y - f); - const int high = min(rect.w, y + f + 1); - for (int y1 = low; y1 < high; y1++) { - sum += difference_image[y1 * stride + x]; - } - sum *= 1.0f / (high - low); - out_image[y * stride + x] = sum; -} - -ccl_device_inline void kernel_filter_nlm_calc_weight(int x, - int y, - const ccl_global float *ccl_restrict - difference_image, - ccl_global float *out_image, - int4 rect, - int stride, - int f) -{ - float sum = 0.0f; - const int low = max(rect.x, x - f); - const int high = min(rect.z, x + f + 1); - for (int x1 = low; x1 < high; x1++) { - sum += difference_image[y * stride + x1]; - } - sum *= 1.0f / (high - low); - out_image[y * stride + x] = fast_expf(-max(sum, 0.0f)); -} - -ccl_device_inline void kernel_filter_nlm_update_output(int x, - int y, - int dx, - int dy, - const ccl_global float *ccl_restrict - difference_image, - const ccl_global float *ccl_restrict image, - ccl_global float *out_image, - ccl_global float *accum_image, - int4 rect, - int channel_offset, - int stride, - int f) -{ - float sum = 0.0f; - const int low = max(rect.x, x - f); - const int high = min(rect.z, x + f + 1); - for (int x1 = low; x1 < high; x1++) { - sum += difference_image[y * stride + x1]; - } - sum *= 1.0f / (high - low); - - int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx); - if (out_image) { - atomic_add_and_fetch_float(accum_image + idx_p, sum); - - float val = image[idx_q]; - if (channel_offset) { - val += image[idx_q + channel_offset]; - val += image[idx_q + 2 * channel_offset]; - val *= 1.0f / 3.0f; - } - atomic_add_and_fetch_float(out_image + idx_p, sum * val); - } - else { - accum_image[idx_p] = sum; - } -} - -ccl_device_inline void kernel_filter_nlm_construct_gramian( - int x, - int y, - int dx, - int dy, - int t, - const ccl_global float *ccl_restrict difference_image, - const ccl_global float *ccl_restrict buffer, - const ccl_global float *ccl_restrict transform, - ccl_global int *rank, - ccl_global float *XtWX, - ccl_global float3 *XtWY, - int4 rect, - int4 filter_window, - int stride, - int f, - int pass_stride, - int frame_offset, - bool use_time, - int localIdx) -{ - const int low = max(rect.x, x - f); - const int high = min(rect.z, x + f + 1); - float sum = 0.0f; - for (int x1 = low; x1 < high; x1++) { - sum += difference_image[y * stride + x1]; - } - float weight = sum * (1.0f / (high - low)); - - /* Reconstruction data is only stored for pixels inside the filter window, - * so compute the pixels's index in there. */ - int storage_ofs = coord_to_local_index(filter_window, x, y); - transform += storage_ofs; - rank += storage_ofs; - XtWX += storage_ofs; - XtWY += storage_ofs; - - kernel_filter_construct_gramian(x, - y, - rect_size(filter_window), - dx, - dy, - t, - stride, - pass_stride, - frame_offset, - use_time, - buffer, - transform, - rank, - weight, - XtWX, - XtWY, - localIdx); -} - -ccl_device_inline void kernel_filter_nlm_normalize(int x, - int y, - ccl_global float *out_image, - const ccl_global float *ccl_restrict - accum_image, - int stride) -{ - out_image[y * stride + x] /= accum_image[y * stride + x]; -} - -CCL_NAMESPACE_END |