1 files changed, 0 insertions, 254 deletions
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
deleted file mode 100644
index 24200c29203..00000000000
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
-#define load4_u(buf, ofs) load_float4((buf) + (ofs))
-
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx,
-                                                         int dy,
-                                                         const float *ccl_restrict weight_image,
-                                                         const float *ccl_restrict variance_image,
-                                                         const float *ccl_restrict scale_image,
-                                                         float *difference_image,
-                                                         int4 rect,
-                                                         int stride,
-                                                         int channel_offset,
-                                                         int frame_offset,
-                                                         float a,
-                                                         float k_2)
-{
-  /* Strides need to be aligned to 16 bytes. */
-  kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
-
-  int aligned_lowx = rect.x & (~3);
-  const int numChannels = (channel_offset > 0) ? 3 : 1;
-  const float4 channel_fac = make_float4(1.0f / numChannels);
-
-  for (int y = rect.y; y < rect.w; y++) {
-    int idx_p = y * stride + aligned_lowx;
-    int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
-    for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
-      float4 diff = make_float4(0.0f);
-      float4 scale_fac;
-      if (scale_image) {
-        scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
-                          make_float4(0.25f),
-                          make_float4(4.0f));
-      }
-      else {
-        scale_fac = make_float4(1.0f);
-      }
-      for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
-        /* idx_p is guaranteed to be aligned, but idx_q isn't. */
-        float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
-        float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
-        float4 cdiff = color_p - color_q;
-        float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
-        float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
-        diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
-                (make_float4(1e-8f) + k_2 * (var_p + var_q));
-      }
-      load4_a(difference_image, idx_p) = diff * channel_fac;
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_blur(
-    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    const int low = max(rect.y, y - f);
-    const int high = min(rect.w, y + f + 1);
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) = make_float4(0.0f);
-    }
-    for (int y1 = low; y1 < high; y1++) {
-      for (int x = aligned_lowx; x < rect.z; x += 4) {
-        load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
-      }
-    }
-    float fac = 1.0f / (high - low);
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) *= fac;
-    }
-  }
-}
-
-ccl_device_inline void nlm_blur_horizontal(
-    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) = make_float4(0.0f);
-    }
-  }
-
-  for (int dx = -f; dx <= f; dx++) {
-    aligned_lowx = round_down(rect.x - min(0, dx), 4);
-    int highx = rect.z - max(0, dx);
-    int4 lowx4 = make_int4(rect.x - min(0, dx));
-    int4 highx4 = make_int4(rect.z - max(0, dx));
-    for (int y = rect.y; y < rect.w; y++) {
-      for (int x = aligned_lowx; x < highx; x += 4) {
-        int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-        int4 active = (x4 >= lowx4) & (x4 < highx4);
-
-        float4 diff = load4_u(difference_image, y * stride + x + dx);
-        load4_a(out_image, y * stride + x) += mask(active, diff);
-      }
-    }
-  }
-
-  aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
-      float4 low = max(make_float4(rect.x), x4 - make_float4(f));
-      float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
-      load4_a(out_image, y * stride + x) *= rcp(high - low);
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_weight(
-    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
-  nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
-
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) = fast_expf4(
-          -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_update_output(int dx,
-                                                       int dy,
-                                                       const float *ccl_restrict difference_image,
-                                                       const float *ccl_restrict image,
-                                                       float *temp_image,
-                                                       float *out_image,
-                                                       float *accum_image,
-                                                       int4 rect,
-                                                       int channel_offset,
-                                                       int stride,
-                                                       int f)
-{
-  nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
-
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-      int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
-
-      int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
-
-      float4 weight = load4_a(temp_image, idx_p);
-      load4_a(accum_image, idx_p) += mask(active, weight);
-
-      float4 val = load4_u(image, idx_q);
-      if (channel_offset) {
-        val += load4_u(image, idx_q + channel_offset);
-        val += load4_u(image, idx_q + 2 * channel_offset);
-        val *= 1.0f / 3.0f;
-      }
-
-      load4_a(out_image, idx_p) += mask(active, weight * val);
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
-                                                           int dy,
-                                                           int t,
-                                                           const float *ccl_restrict
-                                                               difference_image,
-                                                           const float *ccl_restrict buffer,
-                                                           float *transform,
-                                                           int *rank,
-                                                           float *XtWX,
-                                                           float3 *XtWY,
-                                                           int4 rect,
-                                                           int4 filter_window,
-                                                           int stride,
-                                                           int f,
-                                                           int pass_stride,
-                                                           int frame_offset,
-                                                           bool use_time)
-{
-  int4 clip_area = rect_clip(rect, filter_window);
-  /* fy and fy are in filter-window-relative coordinates,
-   * while x and y are in feature-window-relative coordinates. */
-  for (int y = clip_area.y; y < clip_area.w; y++) {
-    for (int x = clip_area.x; x < clip_area.z; x++) {
-      const int low = max(rect.x, x - f);
-      const int high = min(rect.z, x + f + 1);
-      float sum = 0.0f;
-      for (int x1 = low; x1 < high; x1++) {
-        sum += difference_image[y * stride + x1];
-      }
-      float weight = sum * (1.0f / (high - low));
-
-      int storage_ofs = coord_to_local_index(filter_window, x, y);
-      float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
-      float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
-      float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
-      int *l_rank = rank + storage_ofs;
-
-      kernel_filter_construct_gramian(x,
-                                      y,
-                                      1,
-                                      dx,
-                                      dy,
-                                      t,
-                                      stride,
-                                      pass_stride,
-                                      frame_offset,
-                                      use_time,
-                                      buffer,
-                                      l_transform,
-                                      l_rank,
-                                      weight,
-                                      l_XtWX,
-                                      l_XtWY,
-                                      0);
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
-                                                   const float *ccl_restrict accum_image,
-                                                   int4 rect,
-                                                   int w)
-{
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = rect.x; x < rect.z; x++) {
-      out_image[y * w + x] /= accum_image[y * w + x];
-    }
-  }
-}
-
-#undef load4_a
-#undef load4_u
-
-CCL_NAMESPACE_END