12 files changed, 0 insertions, 1778 deletions
diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h
deleted file mode 100644
index b067e53a8bf..00000000000
--- a/intern/cycles/kernel/filter/filter.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __FILTER_H__
-#define __FILTER_H__
-
-/* CPU Filter Kernel Interface */
-
-#include "util/util_types.h"
-
-#include "kernel/filter/filter_defines.h"
-
-CCL_NAMESPACE_BEGIN
-
-#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z
-#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
-#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
-
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-CCL_NAMESPACE_END
-
-#endif /* __FILTER_H__ */
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
deleted file mode 100644
index 1c0ac5e2cb7..00000000000
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __FILTER_DEFINES_H__
-#define __FILTER_DEFINES_H__
-
-#define DENOISE_FEATURES 11
-#define TRANSFORM_SIZE (DENOISE_FEATURES * DENOISE_FEATURES)
-#define XTWX_SIZE (((DENOISE_FEATURES + 1) * (DENOISE_FEATURES + 2)) / 2)
-#define XTWY_SIZE (DENOISE_FEATURES + 1)
-
-#define DENOISE_MAX_FRAMES 16
-
-typedef struct TileInfo {
-  int offsets[9];
-  int strides[9];
-  int x[4];
-  int y[4];
-  int from_render;
-  int frames[DENOISE_MAX_FRAMES];
-  int num_frames;
-  /* TODO(lukas): CUDA doesn't have uint64_t... */
-#ifdef __KERNEL_OPENCL__
-  ccl_global float *buffers[9];
-#else
-  long long int buffers[9];
-#endif
-} TileInfo;
-
-#ifdef __KERNEL_OPENCL__
-#  define CCL_FILTER_TILE_INFO \
-    ccl_global TileInfo *tile_info, ccl_global float *tile_buffer_1, \
-        ccl_global float *tile_buffer_2, ccl_global float *tile_buffer_3, \
-        ccl_global float *tile_buffer_4, ccl_global float *tile_buffer_5, \
-        ccl_global float *tile_buffer_6, ccl_global float *tile_buffer_7, \
-        ccl_global float *tile_buffer_8, ccl_global float *tile_buffer_9
-#  define CCL_FILTER_TILE_INFO_ARG \
-    tile_info, tile_buffer_1, tile_buffer_2, tile_buffer_3, tile_buffer_4, tile_buffer_5, \
-        tile_buffer_6, tile_buffer_7, tile_buffer_8, tile_buffer_9
-#  define ccl_get_tile_buffer(id) \
-    (id == 0 ? tile_buffer_1 : \
-     id == 1 ? tile_buffer_2 : \
-     id == 2 ? tile_buffer_3 : \
-     id == 3 ? tile_buffer_4 : \
-     id == 4 ? tile_buffer_5 : \
-     id == 5 ? tile_buffer_6 : \
-     id == 6 ? tile_buffer_7 : \
-     id == 7 ? tile_buffer_8 : \
-               tile_buffer_9)
-#else
-#  ifdef __KERNEL_CUDA__
-#    define CCL_FILTER_TILE_INFO ccl_global TileInfo *tile_info
-#  else
-#    define CCL_FILTER_TILE_INFO TileInfo *tile_info
-#  endif
-#  define ccl_get_tile_buffer(id) (tile_info->buffers[id])
-#endif
-
-#endif /* __FILTER_DEFINES_H__*/
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
deleted file mode 100644
index 8a2af957146..00000000000
--- a/intern/cycles/kernel/filter/filter_features.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
-
-/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always
- * points to the current pixel in the first pass. Repeat the loop for every secondary frame if
- * there are any. */
-#define FOR_PIXEL_WINDOW \
-  for (int frame = 0; frame < tile_info->num_frames; frame++) { \
-    pixel.z = tile_info->frames[frame]; \
-    pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
-                   frame * frame_stride; \
-    for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
-      for (pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
-
-#define END_FOR_PIXEL_WINDOW \
-  } \
-  pixel_buffer += buffer_w - (high.x - low.x); \
-  } \
-  }
-
-ccl_device_inline void filter_get_features(int3 pixel,
-                                           const ccl_global float *ccl_restrict buffer,
-                                           float *features,
-                                           bool use_time,
-                                           const float *ccl_restrict mean,
-                                           int pass_stride)
-{
-  features[0] = pixel.x;
-  features[1] = pixel.y;
-  features[2] = fabsf(ccl_get_feature(buffer, 0));
-  features[3] = ccl_get_feature(buffer, 1);
-  features[4] = ccl_get_feature(buffer, 2);
-  features[5] = ccl_get_feature(buffer, 3);
-  features[6] = ccl_get_feature(buffer, 4);
-  features[7] = ccl_get_feature(buffer, 5);
-  features[8] = ccl_get_feature(buffer, 6);
-  features[9] = ccl_get_feature(buffer, 7);
-  if (use_time) {
-    features[10] = pixel.z;
-  }
-  if (mean) {
-    for (int i = 0; i < (use_time ? 11 : 10); i++) {
-      features[i] -= mean[i];
-    }
-  }
-}
-
-ccl_device_inline void filter_get_feature_scales(int3 pixel,
-                                                 const ccl_global float *ccl_restrict buffer,
-                                                 float *scales,
-                                                 bool use_time,
-                                                 const float *ccl_restrict mean,
-                                                 int pass_stride)
-{
-  scales[0] = fabsf(pixel.x - mean[0]);
-  scales[1] = fabsf(pixel.y - mean[1]);
-  scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
-  scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
-                                      ccl_get_feature(buffer, 2) - mean[4],
-                                      ccl_get_feature(buffer, 3) - mean[5]));
-  scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
-  scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
-                                      ccl_get_feature(buffer, 6) - mean[8],
-                                      ccl_get_feature(buffer, 7) - mean[9]));
-  if (use_time) {
-    scales[6] = fabsf(pixel.z - mean[10]);
-  }
-}
-
-ccl_device_inline void filter_calculate_scale(float *scale, bool use_time)
-{
-  scale[0] = 1.0f / max(scale[0], 0.01f);
-  scale[1] = 1.0f / max(scale[1], 0.01f);
-  scale[2] = 1.0f / max(scale[2], 0.01f);
-  if (use_time) {
-    scale[10] = 1.0f / max(scale[6], 0.01f);
-  }
-  scale[6] = 1.0f / max(scale[4], 0.01f);
-  scale[7] = scale[8] = scale[9] = 1.0f / max(sqrtf(scale[5]), 0.01f);
-  scale[3] = scale[4] = scale[5] = 1.0f / max(sqrtf(scale[3]), 0.01f);
-}
-
-ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
-                                          int pass_stride)
-{
-  return make_float3(
-      ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
-}
-
-ccl_device_inline void design_row_add(float *design_row,
-                                      int rank,
-                                      const ccl_global float *ccl_restrict transform,
-                                      int stride,
-                                      int row,
-                                      float feature,
-                                      int transform_row_stride)
-{
-  for (int i = 0; i < rank; i++) {
-    design_row[1 + i] += transform[(row * transform_row_stride + i) * stride] * feature;
-  }
-}
-
-/* Fill the design row. */
-ccl_device_inline void filter_get_design_row_transform(
-    int3 p_pixel,
-    const ccl_global float *ccl_restrict p_buffer,
-    int3 q_pixel,
-    const ccl_global float *ccl_restrict q_buffer,
-    int pass_stride,
-    int rank,
-    float *design_row,
-    const ccl_global float *ccl_restrict transform,
-    int stride,
-    bool use_time)
-{
-  int num_features = use_time ? 11 : 10;
-
-  design_row[0] = 1.0f;
-  math_vector_zero(design_row + 1, rank);
-
-#define DESIGN_ROW_ADD(I, F) \
-  design_row_add(design_row, rank, transform, stride, I, F, num_features);
-  DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
-  DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
-  DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
-  DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
-  DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
-  DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
-  DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4));
-  DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5));
-  DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6));
-  DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7));
-  if (use_time) {
-    DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
-  }
-#undef DESIGN_ROW_ADD
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
deleted file mode 100644
index 59d4ace2bef..00000000000
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define ccl_get_feature_sse(pass) load_float4(buffer + (pass)*pass_stride)
-
-/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time.
- * pixel_buffer always points to the first of the 4 current pixel in the first pass.
- * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set
- * for all pixels within the window. Repeat the loop for every secondary frame if there are any. */
-#define FOR_PIXEL_WINDOW_SSE \
-  for (int frame = 0; frame < tile_info->num_frames; frame++) { \
-    pixel.z = tile_info->frames[frame]; \
-    pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
-                   frame * frame_stride; \
-    float4 t4 = make_float4(pixel.z); \
-    for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
-      float4 y4 = make_float4(pixel.y); \
-      for (pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
-        float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
-        int4 active_pixels = x4 < make_float4(high.x);
-
-#define END_FOR_PIXEL_WINDOW_SSE \
-  } \
-  pixel_buffer += buffer_w - (high.x - low.x); \
-  } \
-  }
-
-ccl_device_inline void filter_get_features_sse(float4 x,
-                                               float4 y,
-                                               float4 t,
-                                               int4 active_pixels,
-                                               const float *ccl_restrict buffer,
-                                               float4 *features,
-                                               bool use_time,
-                                               const float4 *ccl_restrict mean,
-                                               int pass_stride)
-{
-  int num_features = use_time ? 11 : 10;
-
-  features[0] = x;
-  features[1] = y;
-  features[2] = fabs(ccl_get_feature_sse(0));
-  features[3] = ccl_get_feature_sse(1);
-  features[4] = ccl_get_feature_sse(2);
-  features[5] = ccl_get_feature_sse(3);
-  features[6] = ccl_get_feature_sse(4);
-  features[7] = ccl_get_feature_sse(5);
-  features[8] = ccl_get_feature_sse(6);
-  features[9] = ccl_get_feature_sse(7);
-  if (use_time) {
-    features[10] = t;
-  }
-
-  if (mean) {
-    for (int i = 0; i < num_features; i++) {
-      features[i] = features[i] - mean[i];
-    }
-  }
-  for (int i = 0; i < num_features; i++) {
-    features[i] = mask(active_pixels, features[i]);
-  }
-}
-
-ccl_device_inline void filter_get_feature_scales_sse(float4 x,
-                                                     float4 y,
-                                                     float4 t,
-                                                     int4 active_pixels,
-                                                     const float *ccl_restrict buffer,
-                                                     float4 *scales,
-                                                     bool use_time,
-                                                     const float4 *ccl_restrict mean,
-                                                     int pass_stride)
-{
-  scales[0] = fabs(x - mean[0]);
-  scales[1] = fabs(y - mean[1]);
-  scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
-  scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + sqr(ccl_get_feature_sse(2) - mean[4]) +
-              sqr(ccl_get_feature_sse(3) - mean[5]);
-  scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
-  scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + sqr(ccl_get_feature_sse(6) - mean[8]) +
-              sqr(ccl_get_feature_sse(7) - mean[9]);
-  if (use_time) {
-    scales[6] = fabs(t - mean[10]);
-  }
-
-  for (int i = 0; i < (use_time ? 7 : 6); i++)
-    scales[i] = mask(active_pixels, scales[i]);
-}
-
-ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
-{
-  scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
-  scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
-  scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
-  if (use_time) {
-    scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
-  }
-  scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
-  scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
-  scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_kernel.h b/intern/cycles/kernel/filter/filter_kernel.h
deleted file mode 100644
index 2ef03dc0a02..00000000000
--- a/intern/cycles/kernel/filter/filter_kernel.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_color.h"
-#include "util/util_math.h"
-#include "util/util_math_fast.h"
-#include "util/util_texture.h"
-
-#include "util/util_atomic.h"
-#include "util/util_math_matrix.h"
-
-#include "kernel/filter/filter_defines.h"
-
-#include "kernel/filter/filter_features.h"
-#ifdef __KERNEL_SSE3__
-#  include "kernel/filter/filter_features_sse.h"
-#endif
-
-#include "kernel/filter/filter_prefilter.h"
-
-#ifdef __KERNEL_GPU__
-#  include "kernel/filter/filter_transform_gpu.h"
-#else
-#  ifdef __KERNEL_SSE3__
-#    include "kernel/filter/filter_transform_sse.h"
-#  else
-#    include "kernel/filter/filter_transform.h"
-#  endif
-#endif
-
-#include "kernel/filter/filter_reconstruction.h"
-
-#ifdef __KERNEL_CPU__
-#  include "kernel/filter/filter_nlm_cpu.h"
-#else
-#  include "kernel/filter/filter_nlm_gpu.h"
-#endif
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
deleted file mode 100644
index 24200c29203..00000000000
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
-#define load4_u(buf, ofs) load_float4((buf) + (ofs))
-
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx,
-                                                         int dy,
-                                                         const float *ccl_restrict weight_image,
-                                                         const float *ccl_restrict variance_image,
-                                                         const float *ccl_restrict scale_image,
-                                                         float *difference_image,
-                                                         int4 rect,
-                                                         int stride,
-                                                         int channel_offset,
-                                                         int frame_offset,
-                                                         float a,
-                                                         float k_2)
-{
-  /* Strides need to be aligned to 16 bytes. */
-  kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
-
-  int aligned_lowx = rect.x & (~3);
-  const int numChannels = (channel_offset > 0) ? 3 : 1;
-  const float4 channel_fac = make_float4(1.0f / numChannels);
-
-  for (int y = rect.y; y < rect.w; y++) {
-    int idx_p = y * stride + aligned_lowx;
-    int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
-    for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
-      float4 diff = make_float4(0.0f);
-      float4 scale_fac;
-      if (scale_image) {
-        scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
-                          make_float4(0.25f),
-                          make_float4(4.0f));
-      }
-      else {
-        scale_fac = make_float4(1.0f);
-      }
-      for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
-        /* idx_p is guaranteed to be aligned, but idx_q isn't. */
-        float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
-        float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
-        float4 cdiff = color_p - color_q;
-        float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
-        float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
-        diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
-                (make_float4(1e-8f) + k_2 * (var_p + var_q));
-      }
-      load4_a(difference_image, idx_p) = diff * channel_fac;
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_blur(
-    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    const int low = max(rect.y, y - f);
-    const int high = min(rect.w, y + f + 1);
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) = make_float4(0.0f);
-    }
-    for (int y1 = low; y1 < high; y1++) {
-      for (int x = aligned_lowx; x < rect.z; x += 4) {
-        load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
-      }
-    }
-    float fac = 1.0f / (high - low);
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) *= fac;
-    }
-  }
-}
-
-ccl_device_inline void nlm_blur_horizontal(
-    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) = make_float4(0.0f);
-    }
-  }
-
-  for (int dx = -f; dx <= f; dx++) {
-    aligned_lowx = round_down(rect.x - min(0, dx), 4);
-    int highx = rect.z - max(0, dx);
-    int4 lowx4 = make_int4(rect.x - min(0, dx));
-    int4 highx4 = make_int4(rect.z - max(0, dx));
-    for (int y = rect.y; y < rect.w; y++) {
-      for (int x = aligned_lowx; x < highx; x += 4) {
-        int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-        int4 active = (x4 >= lowx4) & (x4 < highx4);
-
-        float4 diff = load4_u(difference_image, y * stride + x + dx);
-        load4_a(out_image, y * stride + x) += mask(active, diff);
-      }
-    }
-  }
-
-  aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
-      float4 low = max(make_float4(rect.x), x4 - make_float4(f));
-      float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
-      load4_a(out_image, y * stride + x) *= rcp(high - low);
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_weight(
-    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
-  nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
-
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      load4_a(out_image, y * stride + x) = fast_expf4(
-          -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_update_output(int dx,
-                                                       int dy,
-                                                       const float *ccl_restrict difference_image,
-                                                       const float *ccl_restrict image,
-                                                       float *temp_image,
-                                                       float *out_image,
-                                                       float *accum_image,
-                                                       int4 rect,
-                                                       int channel_offset,
-                                                       int stride,
-                                                       int f)
-{
-  nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
-
-  int aligned_lowx = round_down(rect.x, 4);
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = aligned_lowx; x < rect.z; x += 4) {
-      int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-      int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
-
-      int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
-
-      float4 weight = load4_a(temp_image, idx_p);
-      load4_a(accum_image, idx_p) += mask(active, weight);
-
-      float4 val = load4_u(image, idx_q);
-      if (channel_offset) {
-        val += load4_u(image, idx_q + channel_offset);
-        val += load4_u(image, idx_q + 2 * channel_offset);
-        val *= 1.0f / 3.0f;
-      }
-
-      load4_a(out_image, idx_p) += mask(active, weight * val);
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
-                                                           int dy,
-                                                           int t,
-                                                           const float *ccl_restrict
-                                                               difference_image,
-                                                           const float *ccl_restrict buffer,
-                                                           float *transform,
-                                                           int *rank,
-                                                           float *XtWX,
-                                                           float3 *XtWY,
-                                                           int4 rect,
-                                                           int4 filter_window,
-                                                           int stride,
-                                                           int f,
-                                                           int pass_stride,
-                                                           int frame_offset,
-                                                           bool use_time)
-{
-  int4 clip_area = rect_clip(rect, filter_window);
-  /* fy and fy are in filter-window-relative coordinates,
-   * while x and y are in feature-window-relative coordinates. */
-  for (int y = clip_area.y; y < clip_area.w; y++) {
-    for (int x = clip_area.x; x < clip_area.z; x++) {
-      const int low = max(rect.x, x - f);
-      const int high = min(rect.z, x + f + 1);
-      float sum = 0.0f;
-      for (int x1 = low; x1 < high; x1++) {
-        sum += difference_image[y * stride + x1];
-      }
-      float weight = sum * (1.0f / (high - low));
-
-      int storage_ofs = coord_to_local_index(filter_window, x, y);
-      float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
-      float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
-      float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
-      int *l_rank = rank + storage_ofs;
-
-      kernel_filter_construct_gramian(x,
-                                      y,
-                                      1,
-                                      dx,
-                                      dy,
-                                      t,
-                                      stride,
-                                      pass_stride,
-                                      frame_offset,
-                                      use_time,
-                                      buffer,
-                                      l_transform,
-                                      l_rank,
-                                      weight,
-                                      l_XtWX,
-                                      l_XtWY,
-                                      0);
-    }
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
-                                                   const float *ccl_restrict accum_image,
-                                                   int4 rect,
-                                                   int w)
-{
-  for (int y = rect.y; y < rect.w; y++) {
-    for (int x = rect.x; x < rect.z; x++) {
-      out_image[y * w + x] /= accum_image[y * w + x];
-    }
-  }
-}
-
-#undef load4_a
-#undef load4_u
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
deleted file mode 100644
index 650c743f34f..00000000000
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Determines pixel coordinates and offset for the current thread.
- * Returns whether the thread should do any work.
- *
- * All coordinates are relative to the denoising buffer!
- *
- * Window is the rect that should be processed.
- * co is filled with (x, y, dx, dy).
- */
-ccl_device_inline bool get_nlm_coords_window(
-    int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs, int4 window)
-{
-  /* Determine the pixel offset that this thread should apply. */
-  int s = 2 * r + 1;
-  int si = ccl_global_id(1);
-  int sx = si % s;
-  int sy = si / s;
-  if (sy >= s) {
-    return false;
-  }
-
-  /* Pixels still need to lie inside the denoising buffer after applying the offset,
-   * so determine the area for which this is the case. */
-  int dx = sx - r;
-  int dy = sy - r;
-
-  *rect = make_int4(max(0, -dx), max(0, -dy), w - max(0, dx), h - max(0, dy));
-
-  /* Find the intersection of the area that we want to process (window) and the area
-   * that can be processed (rect) to get the final area for this offset. */
-  int4 clip_area = rect_clip(window, *rect);
-
-  /* If the radius is larger than one of the sides of the window,
-   * there will be shifts for which there is no usable pixel at all. */
-  if (!rect_is_valid(clip_area)) {
-    return false;
-  }
-
-  /* Map the linear thread index to pixels inside the clip area. */
-  int x, y;
-  if (!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) {
-    return false;
-  }
-
-  *co = make_int4(x, y, dx, dy);
-
-  *ofs = (sy * s + sx) * stride;
-
-  return true;
-}
-
-ccl_device_inline bool get_nlm_coords(
-    int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs)
-{
-  return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h));
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_difference(
-    int x,
-    int y,
-    int dx,
-    int dy,
-    const ccl_global float *ccl_restrict weight_image,
-    const ccl_global float *ccl_restrict variance_image,
-    const ccl_global float *ccl_restrict scale_image,
-    ccl_global float *difference_image,
-    int4 rect,
-    int stride,
-    int channel_offset,
-    int frame_offset,
-    float a,
-    float k_2)
-{
-  int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx) + frame_offset;
-  int numChannels = channel_offset ? 3 : 1;
-
-  float diff = 0.0f;
-  float scale_fac = 1.0f;
-  if (scale_image) {
-    scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
-  }
-
-  for (int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
-    float cdiff = weight_image[idx_p] - scale_fac * weight_image[idx_q];
-    float pvar = variance_image[idx_p];
-    float qvar = sqr(scale_fac) * variance_image[idx_q];
-    diff += (cdiff * cdiff - a * (pvar + min(pvar, qvar))) / (1e-8f + k_2 * (pvar + qvar));
-  }
-  if (numChannels > 1) {
-    diff *= 1.0f / numChannels;
-  }
-  difference_image[y * stride + x] = diff;
-}
-
-ccl_device_inline void kernel_filter_nlm_blur(int x,
-                                              int y,
-                                              const ccl_global float *ccl_restrict
-                                                  difference_image,
-                                              ccl_global float *out_image,
-                                              int4 rect,
-                                              int stride,
-                                              int f)
-{
-  float sum = 0.0f;
-  const int low = max(rect.y, y - f);
-  const int high = min(rect.w, y + f + 1);
-  for (int y1 = low; y1 < high; y1++) {
-    sum += difference_image[y1 * stride + x];
-  }
-  sum *= 1.0f / (high - low);
-  out_image[y * stride + x] = sum;
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_weight(int x,
-                                                     int y,
-                                                     const ccl_global float *ccl_restrict
-                                                         difference_image,
-                                                     ccl_global float *out_image,
-                                                     int4 rect,
-                                                     int stride,
-                                                     int f)
-{
-  float sum = 0.0f;
-  const int low = max(rect.x, x - f);
-  const int high = min(rect.z, x + f + 1);
-  for (int x1 = low; x1 < high; x1++) {
-    sum += difference_image[y * stride + x1];
-  }
-  sum *= 1.0f / (high - low);
-  out_image[y * stride + x] = fast_expf(-max(sum, 0.0f));
-}
-
-ccl_device_inline void kernel_filter_nlm_update_output(int x,
-                                                       int y,
-                                                       int dx,
-                                                       int dy,
-                                                       const ccl_global float *ccl_restrict
-                                                           difference_image,
-                                                       const ccl_global float *ccl_restrict image,
-                                                       ccl_global float *out_image,
-                                                       ccl_global float *accum_image,
-                                                       int4 rect,
-                                                       int channel_offset,
-                                                       int stride,
-                                                       int f)
-{
-  float sum = 0.0f;
-  const int low = max(rect.x, x - f);
-  const int high = min(rect.z, x + f + 1);
-  for (int x1 = low; x1 < high; x1++) {
-    sum += difference_image[y * stride + x1];
-  }
-  sum *= 1.0f / (high - low);
-
-  int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
-  if (out_image) {
-    atomic_add_and_fetch_float(accum_image + idx_p, sum);
-
-    float val = image[idx_q];
-    if (channel_offset) {
-      val += image[idx_q + channel_offset];
-      val += image[idx_q + 2 * channel_offset];
-      val *= 1.0f / 3.0f;
-    }
-    atomic_add_and_fetch_float(out_image + idx_p, sum * val);
-  }
-  else {
-    accum_image[idx_p] = sum;
-  }
-}
-
-ccl_device_inline void kernel_filter_nlm_construct_gramian(
-    int x,
-    int y,
-    int dx,
-    int dy,
-    int t,
-    const ccl_global float *ccl_restrict difference_image,
-    const ccl_global float *ccl_restrict buffer,
-    const ccl_global float *ccl_restrict transform,
-    ccl_global int *rank,
-    ccl_global float *XtWX,
-    ccl_global float3 *XtWY,
-    int4 rect,
-    int4 filter_window,
-    int stride,
-    int f,
-    int pass_stride,
-    int frame_offset,
-    bool use_time,
-    int localIdx)
-{
-  const int low = max(rect.x, x - f);
-  const int high = min(rect.z, x + f + 1);
-  float sum = 0.0f;
-  for (int x1 = low; x1 < high; x1++) {
-    sum += difference_image[y * stride + x1];
-  }
-  float weight = sum * (1.0f / (high - low));
-
-  /* Reconstruction data is only stored for pixels inside the filter window,
-   * so compute the pixels's index in there. */
-  int storage_ofs = coord_to_local_index(filter_window, x, y);
-  transform += storage_ofs;
-  rank += storage_ofs;
-  XtWX += storage_ofs;
-  XtWY += storage_ofs;
-
-  kernel_filter_construct_gramian(x,
-                                  y,
-                                  rect_size(filter_window),
-                                  dx,
-                                  dy,
-                                  t,
-                                  stride,
-                                  pass_stride,
-                                  frame_offset,
-                                  use_time,
-                                  buffer,
-                                  transform,
-                                  rank,
-                                  weight,
-                                  XtWX,
-                                  XtWY,
-                                  localIdx);
-}
-
-ccl_device_inline void kernel_filter_nlm_normalize(int x,
-                                                   int y,
-                                                   ccl_global float *out_image,
-                                                   const ccl_global float *ccl_restrict
-                                                       accum_image,
-                                                   int stride)
-{
-  out_image[y * stride + x] /= accum_image[y * stride + x];
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
deleted file mode 100644
index 97cecba190e..00000000000
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/**
- * First step of the shadow prefiltering, performs the shadow division and stores all data
- * in a nice and easy rectangular array that can be passed to the NLM filter.
- *
- * Calculates:
- * \param unfiltered: Contains the two half images of the shadow feature pass
- * \param sampleVariance: The sample-based variance calculated in the kernel.
- * Note: This calculation is biased in general,
- * and especially here since the variance of the ratio can only be approximated.
- * \param sampleVarianceV: Variance of the sample variance estimation, quite noisy
- * (since it's essentially the buffer variance of the two variance halves)
- * \param bufferVariance: The buffer-based variance of the shadow feature.
- * Unbiased, but quite noisy.
- */
-ccl_device void kernel_filter_divide_shadow(int sample,
-                                            CCL_FILTER_TILE_INFO,
-                                            int x,
-                                            int y,
-                                            ccl_global float *unfilteredA,
-                                            ccl_global float *unfilteredB,
-                                            ccl_global float *sampleVariance,
-                                            ccl_global float *sampleVarianceV,
-                                            ccl_global float *bufferVariance,
-                                            int4 rect,
-                                            int buffer_pass_stride,
-                                            int buffer_denoising_offset)
-{
-  int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
-  int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
-  int tile = ytile * 3 + xtile;
-
-  int offset = tile_info->offsets[tile];
-  int stride = tile_info->strides[tile];
-  const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer(
-      tile);
-  center_buffer += (y * stride + x + offset) * buffer_pass_stride;
-  center_buffer += buffer_denoising_offset + 14;
-
-  int buffer_w = align_up(rect.z - rect.x, 4);
-  int idx = (y - rect.y) * buffer_w + (x - rect.x);
-  unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
-  unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
-
-  float varA = center_buffer[2];
-  float varB = center_buffer[5];
-  int odd_sample = (sample + 1) / 2;
-  int even_sample = sample / 2;
-
-  /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
-   * update does not work efficiently with atomics in the kernel. */
-  varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample);
-  varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample);
-
-  varA /= max(odd_sample - 1, 1);
-  varB /= max(even_sample - 1, 1);
-
-  sampleVariance[idx] = 0.5f * (varA + varB) / sample;
-  sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample);
-  bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) *
-                        (unfilteredA[idx] - unfilteredB[idx]);
-}
-
-/* Load a regular feature from the render buffers into the denoise buffer.
- * Parameters:
- * - sample: The sample amount in the buffer, used to normalize the buffer.
- * - m_offset, v_offset: Render Buffer Pass offsets of mean and variance of the feature.
- * - x, y: Current pixel
- * - mean, variance: Target denoise buffers.
- * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
- */
-ccl_device void kernel_filter_get_feature(int sample,
-                                          CCL_FILTER_TILE_INFO,
-                                          int m_offset,
-                                          int v_offset,
-                                          int x,
-                                          int y,
-                                          ccl_global float *mean,
-                                          ccl_global float *variance,
-                                          float scale,
-                                          int4 rect,
-                                          int buffer_pass_stride,
-                                          int buffer_denoising_offset)
-{
-  int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
-  int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
-  int tile = ytile * 3 + xtile;
-  ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) +
-                                    (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) *
-                                        buffer_pass_stride +
-                                    buffer_denoising_offset;
-
-  int buffer_w = align_up(rect.z - rect.x, 4);
-  int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
-  float val = scale * center_buffer[m_offset];
-  mean[idx] = val;
-
-  if (v_offset >= 0) {
-    if (sample > 1) {
-      /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
-       * update does not work efficiently with atomics in the kernel. */
-      variance[idx] = max(
-          0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1)));
-    }
-    else {
-      /* Can't compute variance with single sample, just set it very high. */
-      variance[idx] = 1e10f;
-    }
-  }
-}
-
-ccl_device void kernel_filter_write_feature(int sample,
-                                            int x,
-                                            int y,
-                                            int4 buffer_params,
-                                            ccl_global float *from,
-                                            ccl_global float *buffer,
-                                            int out_offset,
-                                            int4 rect)
-{
-  ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
-                                                   buffer_params.z;
-
-  int buffer_w = align_up(rect.z - rect.x, 4);
-  int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
-  combined_buffer[out_offset] = from[idx];
-}
-
-#define GET_COLOR(image) \
-  make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride])
-#define SET_COLOR(image, color) \
-  image[idx] = color.x; \
-  image[idx + pass_stride] = color.y; \
-  image[idx + 2 * pass_stride] = color.z
-
-ccl_device void kernel_filter_detect_outliers(int x,
-                                              int y,
-                                              ccl_global float *in,
-                                              ccl_global float *variance_out,
-                                              ccl_global float *depth,
-                                              ccl_global float *image_out,
-                                              int4 rect,
-                                              int pass_stride)
-{
-  int buffer_w = align_up(rect.z - rect.x, 4);
-
-  ccl_global float *image_in = in;
-  ccl_global float *variance_in = in + 3 * pass_stride;
-
-  int n = 0;
-  float values[25];
-  float pixel_variance, max_variance = 0.0f;
-  for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
-    for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
-      int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
-      float3 color = GET_COLOR(image_in);
-      color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-      float L = average(color);
-
-      /* Find the position of L. */
-      int i;
-      for (i = 0; i < n; i++) {
-        if (values[i] > L)
-          break;
-      }
-      /* Make space for L by shifting all following values to the right. */
-      for (int j = n; j > i; j--) {
-        values[j] = values[j - 1];
-      }
-      /* Insert L. */
-      values[i] = L;
-      n++;
-
-      float3 pixel_var = GET_COLOR(variance_in);
-      float var = average(pixel_var);
-      if ((x1 == x) && (y1 == y)) {
-        pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
-                                                                                            var;
-      }
-      else {
-        max_variance = max(max_variance, var);
-      }
-    }
-  }
-
-  max_variance += 1e-4f;
-
-  int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
-  float3 color = GET_COLOR(image_in);
-  float3 variance = GET_COLOR(variance_in);
-  color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-  variance = max(variance, make_float3(0.0f, 0.0f, 0.0f));
-
-  float L = average(color);
-
-  float ref = 2.0f * values[(int)(n * 0.75f)];
-
-  /* Slightly offset values to avoid false positives in (almost) black areas. */
-  max_variance += 1e-5f;
-  ref -= 1e-5f;
-
-  if (L > ref) {
-    /* The pixel appears to be an outlier.
-     * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is
-     * that the pixel should actually be at the reference value: If the reference is within the
-     * 3-sigma interval, the pixel is assumed to be a statistical outlier. Otherwise, it is very
-     * unlikely that the pixel should be darker, which indicates a legitimate highlight.
-     */
-
-    if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
-      depth[idx] = -depth[idx];
-      color *= ref / L;
-      variance = make_float3(max_variance, max_variance, max_variance);
-    }
-    else {
-      float stddev = sqrtf(pixel_variance);
-      if (L - 3 * stddev < ref) {
-        /* The pixel is an outlier, so negate the depth value to mark it as one.
-         * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM
-         * weights. */
-        depth[idx] = -depth[idx];
-        float fac = ref / L;
-        color *= fac;
-        variance *= sqr(fac);
-      }
-    }
-  }
-
-  /* Apply log(1+x) transform to compress highlights and avoid halos in the denoised results.
-   * Variance is transformed accordingly - the derivative of the transform is 1/(1+x), so we
-   * scale by the square of that (since we have variance instead of standard deviation). */
-  color = color_highlight_compress(color, &variance);
-
-  SET_COLOR(image_out, color);
-  SET_COLOR(variance_out, variance);
-}
-
-#undef GET_COLOR
-#undef SET_COLOR
-
-/* Combine A/B buffers.
- * Calculates the combined mean and the buffer variance. */
-ccl_device void kernel_filter_combine_halves(int x,
-                                             int y,
-                                             ccl_global float *mean,
-                                             ccl_global float *variance,
-                                             ccl_global float *a,
-                                             ccl_global float *b,
-                                             int4 rect,
-                                             int r)
-{
-  int buffer_w = align_up(rect.z - rect.x, 4);
-  int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
-  if (mean)
-    mean[idx] = 0.5f * (a[idx] + b[idx]);
-  if (variance) {
-    if (r == 0)
-      variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]);
-    else {
-      variance[idx] = 0.0f;
-      float values[25];
-      int numValues = 0;
-      for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) {
-        for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) {
-          int pidx = (py - rect.y) * buffer_w + (px - rect.x);
-          values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]);
-        }
-      }
-      /* Insertion-sort the variances (fast enough for 25 elements). */
-      for (int i = 1; i < numValues; i++) {
-        float v = values[i];
-        int j;
-        for (j = i - 1; j >= 0 && values[j] > v; j--)
-          values[j + 1] = values[j];
-        values[j + 1] = v;
-      }
-      variance[idx] = values[(7 * numValues) / 8];
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
deleted file mode 100644
index 17941689ad5..00000000000
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline void kernel_filter_construct_gramian(int x,
-                                                       int y,
-                                                       int storage_stride,
-                                                       int dx,
-                                                       int dy,
-                                                       int t,
-                                                       int buffer_stride,
-                                                       int pass_stride,
-                                                       int frame_offset,
-                                                       bool use_time,
-                                                       const ccl_global float *ccl_restrict buffer,
-                                                       const ccl_global float *ccl_restrict
-                                                           transform,
-                                                       ccl_global int *rank,
-                                                       float weight,
-                                                       ccl_global float *XtWX,
-                                                       ccl_global float3 *XtWY,
-                                                       int localIdx)
-{
-  if (weight < 1e-3f) {
-    return;
-  }
-
-  int p_offset = y * buffer_stride + x;
-  int q_offset = (y + dy) * buffer_stride + (x + dx) + frame_offset;
-
-#ifdef __KERNEL_GPU__
-  const int stride = storage_stride;
-#else
-  const int stride = 1;
-  (void)storage_stride;
-#endif
-
-#ifdef __KERNEL_CUDA__
-  ccl_local float shared_design_row[(DENOISE_FEATURES + 1) * CCL_MAX_LOCAL_SIZE];
-  ccl_local_param float *design_row = shared_design_row + localIdx * (DENOISE_FEATURES + 1);
-#else
-  float design_row[DENOISE_FEATURES + 1];
-#endif
-
-  float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
-
-  /* If the pixel was flagged as an outlier during prefiltering, skip it. */
-  if (ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
-    return;
-  }
-
-  filter_get_design_row_transform(make_int3(x, y, t),
-                                  buffer + p_offset,
-                                  make_int3(x + dx, y + dy, t),
-                                  buffer + q_offset,
-                                  pass_stride,
-                                  *rank,
-                                  design_row,
-                                  transform,
-                                  stride,
-                                  use_time);
-
-#ifdef __KERNEL_GPU__
-  math_trimatrix_add_gramian_strided(XtWX, (*rank) + 1, design_row, weight, stride);
-  math_vec3_add_strided(XtWY, (*rank) + 1, design_row, weight * q_color, stride);
-#else
-  math_trimatrix_add_gramian(XtWX, (*rank) + 1, design_row, weight);
-  math_vec3_add(XtWY, (*rank) + 1, design_row, weight * q_color);
-#endif
-}
-
-ccl_device_inline void kernel_filter_finalize(int x,
-                                              int y,
-                                              ccl_global float *buffer,
-                                              ccl_global int *rank,
-                                              int storage_stride,
-                                              ccl_global float *XtWX,
-                                              ccl_global float3 *XtWY,
-                                              int4 buffer_params,
-                                              int sample)
-{
-#ifdef __KERNEL_GPU__
-  const int stride = storage_stride;
-#else
-  const int stride = 1;
-  (void)storage_stride;
-#endif
-
-  if (XtWX[0] < 1e-3f) {
-    /* There is not enough information to determine a denoised result.
-     * As a fallback, keep the original value of the pixel. */
-    return;
-  }
-
-  /* The weighted average of pixel colors (essentially, the NLM-filtered image).
-   * In case the solution of the linear model fails due to numerical issues or
-   * returns nonsensical negative values, fall back to this value. */
-  float3 mean_color = XtWY[0] / XtWX[0];
-
-  math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride);
-
-  float3 final_color = XtWY[0];
-  if (!isfinite3_safe(final_color) ||
-      (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) {
-    final_color = mean_color;
-  }
-
-  /* Clamp pixel value to positive values and reverse the highlight compression transform. */
-  final_color = color_highlight_uncompress(max(final_color, make_float3(0.0f, 0.0f, 0.0f)));
-
-  ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
-                                                   buffer_params.z;
-  if (buffer_params.w >= 0) {
-    final_color *= sample;
-    if (buffer_params.w > 0) {
-      final_color.x += combined_buffer[buffer_params.w + 0];
-      final_color.y += combined_buffer[buffer_params.w + 1];
-      final_color.z += combined_buffer[buffer_params.w + 2];
-    }
-  }
-  combined_buffer[0] = final_color.x;
-  combined_buffer[1] = final_color.y;
-  combined_buffer[2] = final_color.z;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h
deleted file mode 100644
index 880a661214e..00000000000
--- a/intern/cycles/kernel/filter/filter_transform.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
-                                                  CCL_FILTER_TILE_INFO,
-                                                  int x,
-                                                  int y,
-                                                  int4 rect,
-                                                  int pass_stride,
-                                                  int frame_stride,
-                                                  bool use_time,
-                                                  float *transform,
-                                                  int *rank,
-                                                  int radius,
-                                                  float pca_threshold)
-{
-  int buffer_w = align_up(rect.z - rect.x, 4);
-
-  float features[DENOISE_FEATURES];
-
-  const float *ccl_restrict pixel_buffer;
-  int3 pixel;
-
-  int num_features = use_time ? 11 : 10;
-
-  /* === Calculate denoising window. === */
-  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
-  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
-  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
-  /* === Shift feature passes to have mean 0. === */
-  float feature_means[DENOISE_FEATURES];
-  math_vector_zero(feature_means, num_features);
-  FOR_PIXEL_WINDOW
-  {
-    filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
-    math_vector_add(feature_means, features, num_features);
-  }
-  END_FOR_PIXEL_WINDOW
-
-  math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
-  /* === Scale the shifted feature passes to a range of [-1; 1] ===
-   * Will be baked into the transform later. */
-  float feature_scale[DENOISE_FEATURES];
-  math_vector_zero(feature_scale, num_features);
-
-  FOR_PIXEL_WINDOW
-  {
-    filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-    math_vector_max(feature_scale, features, num_features);
-  }
-  END_FOR_PIXEL_WINDOW
-
-  filter_calculate_scale(feature_scale, use_time);
-
-  /* === Generate the feature transformation. ===
-   * This transformation maps the num_features-dimensional feature space to a reduced feature
-   * (r-feature) space which generally has fewer dimensions.
-   * This mainly helps to prevent over-fitting. */
-  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
-  math_matrix_zero(feature_matrix, num_features);
-  FOR_PIXEL_WINDOW
-  {
-    filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-    math_vector_mul(features, feature_scale, num_features);
-    math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
-  }
-  END_FOR_PIXEL_WINDOW
-
-  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
-  *rank = 0;
-  /* Prevent over-fitting when a small window is used. */
-  int max_rank = min(num_features, num_pixels / 3);
-  if (pca_threshold < 0.0f) {
-    float threshold_energy = 0.0f;
-    for (int i = 0; i < num_features; i++) {
-      threshold_energy += feature_matrix[i * num_features + i];
-    }
-    threshold_energy *= 1.0f - (-pca_threshold);
-
-    float reduced_energy = 0.0f;
-    for (int i = 0; i < max_rank; i++, (*rank)++) {
-      if (i >= 2 && reduced_energy >= threshold_energy)
-        break;
-      float s = feature_matrix[i * num_features + i];
-      reduced_energy += s;
-    }
-  }
-  else {
-    for (int i = 0; i < max_rank; i++, (*rank)++) {
-      float s = feature_matrix[i * num_features + i];
-      if (i >= 2 && sqrtf(s) < pca_threshold)
-        break;
-    }
-  }
-
-  /* Bake the feature scaling into the transformation matrix. */
-  for (int i = 0; i < (*rank); i++) {
-    math_vector_mul(transform + i * num_features, feature_scale, num_features);
-  }
-  math_matrix_transpose(transform, num_features, 1);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h
deleted file mode 100644
index ec258a5212a..00000000000
--- a/intern/cycles/kernel/filter/filter_transform_gpu.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
-                                                  CCL_FILTER_TILE_INFO,
-                                                  int x,
-                                                  int y,
-                                                  int4 rect,
-                                                  int pass_stride,
-                                                  int frame_stride,
-                                                  bool use_time,
-                                                  ccl_global float *transform,
-                                                  ccl_global int *rank,
-                                                  int radius,
-                                                  float pca_threshold,
-                                                  int transform_stride,
-                                                  int localIdx)
-{
-  int buffer_w = align_up(rect.z - rect.x, 4);
-
-#ifdef __KERNEL_CUDA__
-  ccl_local float shared_features[DENOISE_FEATURES * CCL_MAX_LOCAL_SIZE];
-  ccl_local_param float *features = shared_features + localIdx * DENOISE_FEATURES;
-#else
-  float features[DENOISE_FEATURES];
-#endif
-
-  int num_features = use_time ? 11 : 10;
-
-  /* === Calculate denoising window. === */
-  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
-  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
-  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-  const ccl_global float *ccl_restrict pixel_buffer;
-  int3 pixel;
-
-  /* === Shift feature passes to have mean 0. === */
-  float feature_means[DENOISE_FEATURES];
-  math_vector_zero(feature_means, num_features);
-  FOR_PIXEL_WINDOW
-  {
-    filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
-    math_vector_add(feature_means, features, num_features);
-  }
-  END_FOR_PIXEL_WINDOW
-
-  math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
-  /* === Scale the shifted feature passes to a range of [-1; 1] ===
-   * Will be baked into the transform later. */
-  float feature_scale[DENOISE_FEATURES];
-  math_vector_zero(feature_scale, num_features);
-
-  FOR_PIXEL_WINDOW
-  {
-    filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-    math_vector_max(feature_scale, features, num_features);
-  }
-  END_FOR_PIXEL_WINDOW
-
-  filter_calculate_scale(feature_scale, use_time);
-
-  /* === Generate the feature transformation. ===
-   * This transformation maps the num_features-dimensional feature space to a reduced feature
-   * (r-feature) space which generally has fewer dimensions.
-   * This mainly helps to prevent over-fitting. */
-  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
-  math_matrix_zero(feature_matrix, num_features);
-  FOR_PIXEL_WINDOW
-  {
-    filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-    math_vector_mul(features, feature_scale, num_features);
-    math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
-  }
-  END_FOR_PIXEL_WINDOW
-
-  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
-  *rank = 0;
-  /* Prevent over-fitting when a small window is used. */
-  int max_rank = min(num_features, num_pixels / 3);
-  if (pca_threshold < 0.0f) {
-    float threshold_energy = 0.0f;
-    for (int i = 0; i < num_features; i++) {
-      threshold_energy += feature_matrix[i * num_features + i];
-    }
-    threshold_energy *= 1.0f - (-pca_threshold);
-
-    float reduced_energy = 0.0f;
-    for (int i = 0; i < max_rank; i++, (*rank)++) {
-      if (i >= 2 && reduced_energy >= threshold_energy)
-        break;
-      float s = feature_matrix[i * num_features + i];
-      reduced_energy += s;
-    }
-  }
-  else {
-    for (int i = 0; i < max_rank; i++, (*rank)++) {
-      float s = feature_matrix[i * num_features + i];
-      if (i >= 2 && sqrtf(s) < pca_threshold)
-        break;
-    }
-  }
-
-  math_matrix_transpose(transform, num_features, transform_stride);
-
-  /* Bake the feature scaling into the transformation matrix. */
-  for (int i = 0; i < num_features; i++) {
-    for (int j = 0; j < (*rank); j++) {
-      transform[(i * num_features + j) * transform_stride] *= feature_scale[i];
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h
deleted file mode 100644
index 0304d990f9f..00000000000
--- a/intern/cycles/kernel/filter/filter_transform_sse.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
-                                                  CCL_FILTER_TILE_INFO,
-                                                  int x,
-                                                  int y,
-                                                  int4 rect,
-                                                  int pass_stride,
-                                                  int frame_stride,
-                                                  bool use_time,
-                                                  float *transform,
-                                                  int *rank,
-                                                  int radius,
-                                                  float pca_threshold)
-{
-  int buffer_w = align_up(rect.z - rect.x, 4);
-
-  float4 features[DENOISE_FEATURES];
-  const float *ccl_restrict pixel_buffer;
-  int3 pixel;
-
-  int num_features = use_time ? 11 : 10;
-
-  /* === Calculate denoising window. === */
-  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
-  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
-  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
-  /* === Shift feature passes to have mean 0. === */
-  float4 feature_means[DENOISE_FEATURES];
-  math_vector_zero_sse(feature_means, num_features);
-  FOR_PIXEL_WINDOW_SSE
-  {
-    filter_get_features_sse(
-        x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
-    math_vector_add_sse(feature_means, num_features, features);
-  }
-  END_FOR_PIXEL_WINDOW_SSE
-
-  float4 pixel_scale = make_float4(1.0f / num_pixels);
-  for (int i = 0; i < num_features; i++) {
-    feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
-  }
-
-  /* === Scale the shifted feature passes to a range of [-1; 1] ===
-   * Will be baked into the transform later. */
-  float4 feature_scale[DENOISE_FEATURES];
-  math_vector_zero_sse(feature_scale, num_features);
-  FOR_PIXEL_WINDOW_SSE
-  {
-    filter_get_feature_scales_sse(
-        x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
-    math_vector_max_sse(feature_scale, features, num_features);
-  }
-  END_FOR_PIXEL_WINDOW_SSE
-
-  filter_calculate_scale_sse(feature_scale, use_time);
-
-  /* === Generate the feature transformation. ===
-   * This transformation maps the num_features-dimensional feature space to a reduced feature
-   * (r-feature) space which generally has fewer dimensions.
-   * This mainly helps to prevent over-fitting. */
-  float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
-  math_matrix_zero_sse(feature_matrix_sse, num_features);
-  FOR_PIXEL_WINDOW_SSE
-  {
-    filter_get_features_sse(
-        x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
-    math_vector_mul_sse(features, num_features, feature_scale);
-    math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
-  }
-  END_FOR_PIXEL_WINDOW_SSE
-
-  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
-  math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
-
-  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
-
-  *rank = 0;
-  /* Prevent over-fitting when a small window is used. */
-  int max_rank = min(num_features, num_pixels / 3);
-  if (pca_threshold < 0.0f) {
-    float threshold_energy = 0.0f;
-    for (int i = 0; i < num_features; i++) {
-      threshold_energy += feature_matrix[i * num_features + i];
-    }
-    threshold_energy *= 1.0f - (-pca_threshold);
-
-    float reduced_energy = 0.0f;
-    for (int i = 0; i < max_rank; i++, (*rank)++) {
-      if (i >= 2 && reduced_energy >= threshold_energy)
-        break;
-      float s = feature_matrix[i * num_features + i];
-      reduced_energy += s;
-    }
-  }
-  else {
-    for (int i = 0; i < max_rank; i++, (*rank)++) {
-      float s = feature_matrix[i * num_features + i];
-      if (i >= 2 && sqrtf(s) < pca_threshold)
-        break;
-    }
-  }
-
-  math_matrix_transpose(transform, num_features, 1);
-
-  /* Bake the feature scaling into the transformation matrix. */
-  for (int i = 0; i < num_features; i++) {
-    math_vector_scale(transform + i * num_features, feature_scale[i][0], *rank);
-  }
-}
-
-CCL_NAMESPACE_END