Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/filter')
-rw-r--r--intern/cycles/kernel/filter/filter.h52
-rw-r--r--intern/cycles/kernel/filter/filter_defines.h72
-rw-r--r--intern/cycles/kernel/filter/filter_features.h156
-rw-r--r--intern/cycles/kernel/filter/filter_features_sse.h118
-rw-r--r--intern/cycles/kernel/filter/filter_kernel.h50
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h254
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_gpu.h255
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h303
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h140
-rw-r--r--intern/cycles/kernel/filter/filter_transform.h120
-rw-r--r--intern/cycles/kernel/filter/filter_transform_gpu.h129
-rw-r--r--intern/cycles/kernel/filter/filter_transform_sse.h129
12 files changed, 0 insertions, 1778 deletions
diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h
deleted file mode 100644
index b067e53a8bf..00000000000
--- a/intern/cycles/kernel/filter/filter.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __FILTER_H__
-#define __FILTER_H__
-
-/* CPU Filter Kernel Interface */
-
-#include "util/util_types.h"
-
-#include "kernel/filter/filter_defines.h"
-
-CCL_NAMESPACE_BEGIN
-
-#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z
-#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
-#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
-
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/filter_cpu.h"
-
-CCL_NAMESPACE_END
-
-#endif /* __FILTER_H__ */
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
deleted file mode 100644
index 1c0ac5e2cb7..00000000000
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __FILTER_DEFINES_H__
-#define __FILTER_DEFINES_H__
-
-#define DENOISE_FEATURES 11
-#define TRANSFORM_SIZE (DENOISE_FEATURES * DENOISE_FEATURES)
-#define XTWX_SIZE (((DENOISE_FEATURES + 1) * (DENOISE_FEATURES + 2)) / 2)
-#define XTWY_SIZE (DENOISE_FEATURES + 1)
-
-#define DENOISE_MAX_FRAMES 16
-
-typedef struct TileInfo {
- int offsets[9];
- int strides[9];
- int x[4];
- int y[4];
- int from_render;
- int frames[DENOISE_MAX_FRAMES];
- int num_frames;
- /* TODO(lukas): CUDA doesn't have uint64_t... */
-#ifdef __KERNEL_OPENCL__
- ccl_global float *buffers[9];
-#else
- long long int buffers[9];
-#endif
-} TileInfo;
-
-#ifdef __KERNEL_OPENCL__
-# define CCL_FILTER_TILE_INFO \
- ccl_global TileInfo *tile_info, ccl_global float *tile_buffer_1, \
- ccl_global float *tile_buffer_2, ccl_global float *tile_buffer_3, \
- ccl_global float *tile_buffer_4, ccl_global float *tile_buffer_5, \
- ccl_global float *tile_buffer_6, ccl_global float *tile_buffer_7, \
- ccl_global float *tile_buffer_8, ccl_global float *tile_buffer_9
-# define CCL_FILTER_TILE_INFO_ARG \
- tile_info, tile_buffer_1, tile_buffer_2, tile_buffer_3, tile_buffer_4, tile_buffer_5, \
- tile_buffer_6, tile_buffer_7, tile_buffer_8, tile_buffer_9
-# define ccl_get_tile_buffer(id) \
- (id == 0 ? tile_buffer_1 : \
- id == 1 ? tile_buffer_2 : \
- id == 2 ? tile_buffer_3 : \
- id == 3 ? tile_buffer_4 : \
- id == 4 ? tile_buffer_5 : \
- id == 5 ? tile_buffer_6 : \
- id == 6 ? tile_buffer_7 : \
- id == 7 ? tile_buffer_8 : \
- tile_buffer_9)
-#else
-# ifdef __KERNEL_CUDA__
-# define CCL_FILTER_TILE_INFO ccl_global TileInfo *tile_info
-# else
-# define CCL_FILTER_TILE_INFO TileInfo *tile_info
-# endif
-# define ccl_get_tile_buffer(id) (tile_info->buffers[id])
-#endif
-
-#endif /* __FILTER_DEFINES_H__*/
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
deleted file mode 100644
index 8a2af957146..00000000000
--- a/intern/cycles/kernel/filter/filter_features.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
-
-/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always
- * points to the current pixel in the first pass. Repeat the loop for every secondary frame if
- * there are any. */
-#define FOR_PIXEL_WINDOW \
- for (int frame = 0; frame < tile_info->num_frames; frame++) { \
- pixel.z = tile_info->frames[frame]; \
- pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
- frame * frame_stride; \
- for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
- for (pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
-
-#define END_FOR_PIXEL_WINDOW \
- } \
- pixel_buffer += buffer_w - (high.x - low.x); \
- } \
- }
-
-ccl_device_inline void filter_get_features(int3 pixel,
- const ccl_global float *ccl_restrict buffer,
- float *features,
- bool use_time,
- const float *ccl_restrict mean,
- int pass_stride)
-{
- features[0] = pixel.x;
- features[1] = pixel.y;
- features[2] = fabsf(ccl_get_feature(buffer, 0));
- features[3] = ccl_get_feature(buffer, 1);
- features[4] = ccl_get_feature(buffer, 2);
- features[5] = ccl_get_feature(buffer, 3);
- features[6] = ccl_get_feature(buffer, 4);
- features[7] = ccl_get_feature(buffer, 5);
- features[8] = ccl_get_feature(buffer, 6);
- features[9] = ccl_get_feature(buffer, 7);
- if (use_time) {
- features[10] = pixel.z;
- }
- if (mean) {
- for (int i = 0; i < (use_time ? 11 : 10); i++) {
- features[i] -= mean[i];
- }
- }
-}
-
-ccl_device_inline void filter_get_feature_scales(int3 pixel,
- const ccl_global float *ccl_restrict buffer,
- float *scales,
- bool use_time,
- const float *ccl_restrict mean,
- int pass_stride)
-{
- scales[0] = fabsf(pixel.x - mean[0]);
- scales[1] = fabsf(pixel.y - mean[1]);
- scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
- scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
- ccl_get_feature(buffer, 2) - mean[4],
- ccl_get_feature(buffer, 3) - mean[5]));
- scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
- scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
- ccl_get_feature(buffer, 6) - mean[8],
- ccl_get_feature(buffer, 7) - mean[9]));
- if (use_time) {
- scales[6] = fabsf(pixel.z - mean[10]);
- }
-}
-
-ccl_device_inline void filter_calculate_scale(float *scale, bool use_time)
-{
- scale[0] = 1.0f / max(scale[0], 0.01f);
- scale[1] = 1.0f / max(scale[1], 0.01f);
- scale[2] = 1.0f / max(scale[2], 0.01f);
- if (use_time) {
- scale[10] = 1.0f / max(scale[6], 0.01f);
- }
- scale[6] = 1.0f / max(scale[4], 0.01f);
- scale[7] = scale[8] = scale[9] = 1.0f / max(sqrtf(scale[5]), 0.01f);
- scale[3] = scale[4] = scale[5] = 1.0f / max(sqrtf(scale[3]), 0.01f);
-}
-
-ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
- int pass_stride)
-{
- return make_float3(
- ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
-}
-
-ccl_device_inline void design_row_add(float *design_row,
- int rank,
- const ccl_global float *ccl_restrict transform,
- int stride,
- int row,
- float feature,
- int transform_row_stride)
-{
- for (int i = 0; i < rank; i++) {
- design_row[1 + i] += transform[(row * transform_row_stride + i) * stride] * feature;
- }
-}
-
-/* Fill the design row. */
-ccl_device_inline void filter_get_design_row_transform(
- int3 p_pixel,
- const ccl_global float *ccl_restrict p_buffer,
- int3 q_pixel,
- const ccl_global float *ccl_restrict q_buffer,
- int pass_stride,
- int rank,
- float *design_row,
- const ccl_global float *ccl_restrict transform,
- int stride,
- bool use_time)
-{
- int num_features = use_time ? 11 : 10;
-
- design_row[0] = 1.0f;
- math_vector_zero(design_row + 1, rank);
-
-#define DESIGN_ROW_ADD(I, F) \
- design_row_add(design_row, rank, transform, stride, I, F, num_features);
- DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
- DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
- DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
- DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
- DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
- DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
- DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4));
- DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5));
- DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6));
- DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7));
- if (use_time) {
- DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
- }
-#undef DESIGN_ROW_ADD
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
deleted file mode 100644
index 59d4ace2bef..00000000000
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define ccl_get_feature_sse(pass) load_float4(buffer + (pass)*pass_stride)
-
-/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time.
- * pixel_buffer always points to the first of the 4 current pixel in the first pass.
- * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set
- * for all pixels within the window. Repeat the loop for every secondary frame if there are any. */
-#define FOR_PIXEL_WINDOW_SSE \
- for (int frame = 0; frame < tile_info->num_frames; frame++) { \
- pixel.z = tile_info->frames[frame]; \
- pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
- frame * frame_stride; \
- float4 t4 = make_float4(pixel.z); \
- for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
- float4 y4 = make_float4(pixel.y); \
- for (pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
- float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
- int4 active_pixels = x4 < make_float4(high.x);
-
-#define END_FOR_PIXEL_WINDOW_SSE \
- } \
- pixel_buffer += buffer_w - (high.x - low.x); \
- } \
- }
-
-ccl_device_inline void filter_get_features_sse(float4 x,
- float4 y,
- float4 t,
- int4 active_pixels,
- const float *ccl_restrict buffer,
- float4 *features,
- bool use_time,
- const float4 *ccl_restrict mean,
- int pass_stride)
-{
- int num_features = use_time ? 11 : 10;
-
- features[0] = x;
- features[1] = y;
- features[2] = fabs(ccl_get_feature_sse(0));
- features[3] = ccl_get_feature_sse(1);
- features[4] = ccl_get_feature_sse(2);
- features[5] = ccl_get_feature_sse(3);
- features[6] = ccl_get_feature_sse(4);
- features[7] = ccl_get_feature_sse(5);
- features[8] = ccl_get_feature_sse(6);
- features[9] = ccl_get_feature_sse(7);
- if (use_time) {
- features[10] = t;
- }
-
- if (mean) {
- for (int i = 0; i < num_features; i++) {
- features[i] = features[i] - mean[i];
- }
- }
- for (int i = 0; i < num_features; i++) {
- features[i] = mask(active_pixels, features[i]);
- }
-}
-
-ccl_device_inline void filter_get_feature_scales_sse(float4 x,
- float4 y,
- float4 t,
- int4 active_pixels,
- const float *ccl_restrict buffer,
- float4 *scales,
- bool use_time,
- const float4 *ccl_restrict mean,
- int pass_stride)
-{
- scales[0] = fabs(x - mean[0]);
- scales[1] = fabs(y - mean[1]);
- scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
- scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + sqr(ccl_get_feature_sse(2) - mean[4]) +
- sqr(ccl_get_feature_sse(3) - mean[5]);
- scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
- scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + sqr(ccl_get_feature_sse(6) - mean[8]) +
- sqr(ccl_get_feature_sse(7) - mean[9]);
- if (use_time) {
- scales[6] = fabs(t - mean[10]);
- }
-
- for (int i = 0; i < (use_time ? 7 : 6); i++)
- scales[i] = mask(active_pixels, scales[i]);
-}
-
-ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
-{
- scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
- scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
- scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
- if (use_time) {
- scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
- }
- scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
- scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
- scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_kernel.h b/intern/cycles/kernel/filter/filter_kernel.h
deleted file mode 100644
index 2ef03dc0a02..00000000000
--- a/intern/cycles/kernel/filter/filter_kernel.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_color.h"
-#include "util/util_math.h"
-#include "util/util_math_fast.h"
-#include "util/util_texture.h"
-
-#include "util/util_atomic.h"
-#include "util/util_math_matrix.h"
-
-#include "kernel/filter/filter_defines.h"
-
-#include "kernel/filter/filter_features.h"
-#ifdef __KERNEL_SSE3__
-# include "kernel/filter/filter_features_sse.h"
-#endif
-
-#include "kernel/filter/filter_prefilter.h"
-
-#ifdef __KERNEL_GPU__
-# include "kernel/filter/filter_transform_gpu.h"
-#else
-# ifdef __KERNEL_SSE3__
-# include "kernel/filter/filter_transform_sse.h"
-# else
-# include "kernel/filter/filter_transform.h"
-# endif
-#endif
-
-#include "kernel/filter/filter_reconstruction.h"
-
-#ifdef __KERNEL_CPU__
-# include "kernel/filter/filter_nlm_cpu.h"
-#else
-# include "kernel/filter/filter_nlm_gpu.h"
-#endif
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
deleted file mode 100644
index 24200c29203..00000000000
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
-#define load4_u(buf, ofs) load_float4((buf) + (ofs))
-
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx,
- int dy,
- const float *ccl_restrict weight_image,
- const float *ccl_restrict variance_image,
- const float *ccl_restrict scale_image,
- float *difference_image,
- int4 rect,
- int stride,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2)
-{
- /* Strides need to be aligned to 16 bytes. */
- kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
-
- int aligned_lowx = rect.x & (~3);
- const int numChannels = (channel_offset > 0) ? 3 : 1;
- const float4 channel_fac = make_float4(1.0f / numChannels);
-
- for (int y = rect.y; y < rect.w; y++) {
- int idx_p = y * stride + aligned_lowx;
- int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
- for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
- float4 diff = make_float4(0.0f);
- float4 scale_fac;
- if (scale_image) {
- scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
- make_float4(0.25f),
- make_float4(4.0f));
- }
- else {
- scale_fac = make_float4(1.0f);
- }
- for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
- /* idx_p is guaranteed to be aligned, but idx_q isn't. */
- float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
- float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
- float4 cdiff = color_p - color_q;
- float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
- float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
- diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
- (make_float4(1e-8f) + k_2 * (var_p + var_q));
- }
- load4_a(difference_image, idx_p) = diff * channel_fac;
- }
- }
-}
-
-ccl_device_inline void kernel_filter_nlm_blur(
- const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
- int aligned_lowx = round_down(rect.x, 4);
- for (int y = rect.y; y < rect.w; y++) {
- const int low = max(rect.y, y - f);
- const int high = min(rect.w, y + f + 1);
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y * stride + x) = make_float4(0.0f);
- }
- for (int y1 = low; y1 < high; y1++) {
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
- }
- }
- float fac = 1.0f / (high - low);
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y * stride + x) *= fac;
- }
- }
-}
-
-ccl_device_inline void nlm_blur_horizontal(
- const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
- int aligned_lowx = round_down(rect.x, 4);
- for (int y = rect.y; y < rect.w; y++) {
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y * stride + x) = make_float4(0.0f);
- }
- }
-
- for (int dx = -f; dx <= f; dx++) {
- aligned_lowx = round_down(rect.x - min(0, dx), 4);
- int highx = rect.z - max(0, dx);
- int4 lowx4 = make_int4(rect.x - min(0, dx));
- int4 highx4 = make_int4(rect.z - max(0, dx));
- for (int y = rect.y; y < rect.w; y++) {
- for (int x = aligned_lowx; x < highx; x += 4) {
- int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
- int4 active = (x4 >= lowx4) & (x4 < highx4);
-
- float4 diff = load4_u(difference_image, y * stride + x + dx);
- load4_a(out_image, y * stride + x) += mask(active, diff);
- }
- }
- }
-
- aligned_lowx = round_down(rect.x, 4);
- for (int y = rect.y; y < rect.w; y++) {
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
- float4 low = max(make_float4(rect.x), x4 - make_float4(f));
- float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
- load4_a(out_image, y * stride + x) *= rcp(high - low);
- }
- }
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_weight(
- const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
-{
- nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
-
- int aligned_lowx = round_down(rect.x, 4);
- for (int y = rect.y; y < rect.w; y++) {
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y * stride + x) = fast_expf4(
- -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
- }
- }
-}
-
-ccl_device_inline void kernel_filter_nlm_update_output(int dx,
- int dy,
- const float *ccl_restrict difference_image,
- const float *ccl_restrict image,
- float *temp_image,
- float *out_image,
- float *accum_image,
- int4 rect,
- int channel_offset,
- int stride,
- int f)
-{
- nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
-
- int aligned_lowx = round_down(rect.x, 4);
- for (int y = rect.y; y < rect.w; y++) {
- for (int x = aligned_lowx; x < rect.z; x += 4) {
- int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
- int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
-
- int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
-
- float4 weight = load4_a(temp_image, idx_p);
- load4_a(accum_image, idx_p) += mask(active, weight);
-
- float4 val = load4_u(image, idx_q);
- if (channel_offset) {
- val += load4_u(image, idx_q + channel_offset);
- val += load4_u(image, idx_q + 2 * channel_offset);
- val *= 1.0f / 3.0f;
- }
-
- load4_a(out_image, idx_p) += mask(active, weight * val);
- }
- }
-}
-
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
- int dy,
- int t,
- const float *ccl_restrict
- difference_image,
- const float *ccl_restrict buffer,
- float *transform,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int4 rect,
- int4 filter_window,
- int stride,
- int f,
- int pass_stride,
- int frame_offset,
- bool use_time)
-{
- int4 clip_area = rect_clip(rect, filter_window);
- /* fy and fy are in filter-window-relative coordinates,
- * while x and y are in feature-window-relative coordinates. */
- for (int y = clip_area.y; y < clip_area.w; y++) {
- for (int x = clip_area.x; x < clip_area.z; x++) {
- const int low = max(rect.x, x - f);
- const int high = min(rect.z, x + f + 1);
- float sum = 0.0f;
- for (int x1 = low; x1 < high; x1++) {
- sum += difference_image[y * stride + x1];
- }
- float weight = sum * (1.0f / (high - low));
-
- int storage_ofs = coord_to_local_index(filter_window, x, y);
- float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
- float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
- float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
- int *l_rank = rank + storage_ofs;
-
- kernel_filter_construct_gramian(x,
- y,
- 1,
- dx,
- dy,
- t,
- stride,
- pass_stride,
- frame_offset,
- use_time,
- buffer,
- l_transform,
- l_rank,
- weight,
- l_XtWX,
- l_XtWY,
- 0);
- }
- }
-}
-
-ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
- const float *ccl_restrict accum_image,
- int4 rect,
- int w)
-{
- for (int y = rect.y; y < rect.w; y++) {
- for (int x = rect.x; x < rect.z; x++) {
- out_image[y * w + x] /= accum_image[y * w + x];
- }
- }
-}
-
-#undef load4_a
-#undef load4_u
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
deleted file mode 100644
index 650c743f34f..00000000000
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Determines pixel coordinates and offset for the current thread.
- * Returns whether the thread should do any work.
- *
- * All coordinates are relative to the denoising buffer!
- *
- * Window is the rect that should be processed.
- * co is filled with (x, y, dx, dy).
- */
-ccl_device_inline bool get_nlm_coords_window(
- int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs, int4 window)
-{
- /* Determine the pixel offset that this thread should apply. */
- int s = 2 * r + 1;
- int si = ccl_global_id(1);
- int sx = si % s;
- int sy = si / s;
- if (sy >= s) {
- return false;
- }
-
- /* Pixels still need to lie inside the denoising buffer after applying the offset,
- * so determine the area for which this is the case. */
- int dx = sx - r;
- int dy = sy - r;
-
- *rect = make_int4(max(0, -dx), max(0, -dy), w - max(0, dx), h - max(0, dy));
-
- /* Find the intersection of the area that we want to process (window) and the area
- * that can be processed (rect) to get the final area for this offset. */
- int4 clip_area = rect_clip(window, *rect);
-
- /* If the radius is larger than one of the sides of the window,
- * there will be shifts for which there is no usable pixel at all. */
- if (!rect_is_valid(clip_area)) {
- return false;
- }
-
- /* Map the linear thread index to pixels inside the clip area. */
- int x, y;
- if (!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) {
- return false;
- }
-
- *co = make_int4(x, y, dx, dy);
-
- *ofs = (sy * s + sx) * stride;
-
- return true;
-}
-
-ccl_device_inline bool get_nlm_coords(
- int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs)
-{
- return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h));
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_difference(
- int x,
- int y,
- int dx,
- int dy,
- const ccl_global float *ccl_restrict weight_image,
- const ccl_global float *ccl_restrict variance_image,
- const ccl_global float *ccl_restrict scale_image,
- ccl_global float *difference_image,
- int4 rect,
- int stride,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2)
-{
- int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx) + frame_offset;
- int numChannels = channel_offset ? 3 : 1;
-
- float diff = 0.0f;
- float scale_fac = 1.0f;
- if (scale_image) {
- scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
- }
-
- for (int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
- float cdiff = weight_image[idx_p] - scale_fac * weight_image[idx_q];
- float pvar = variance_image[idx_p];
- float qvar = sqr(scale_fac) * variance_image[idx_q];
- diff += (cdiff * cdiff - a * (pvar + min(pvar, qvar))) / (1e-8f + k_2 * (pvar + qvar));
- }
- if (numChannels > 1) {
- diff *= 1.0f / numChannels;
- }
- difference_image[y * stride + x] = diff;
-}
-
-ccl_device_inline void kernel_filter_nlm_blur(int x,
- int y,
- const ccl_global float *ccl_restrict
- difference_image,
- ccl_global float *out_image,
- int4 rect,
- int stride,
- int f)
-{
- float sum = 0.0f;
- const int low = max(rect.y, y - f);
- const int high = min(rect.w, y + f + 1);
- for (int y1 = low; y1 < high; y1++) {
- sum += difference_image[y1 * stride + x];
- }
- sum *= 1.0f / (high - low);
- out_image[y * stride + x] = sum;
-}
-
-ccl_device_inline void kernel_filter_nlm_calc_weight(int x,
- int y,
- const ccl_global float *ccl_restrict
- difference_image,
- ccl_global float *out_image,
- int4 rect,
- int stride,
- int f)
-{
- float sum = 0.0f;
- const int low = max(rect.x, x - f);
- const int high = min(rect.z, x + f + 1);
- for (int x1 = low; x1 < high; x1++) {
- sum += difference_image[y * stride + x1];
- }
- sum *= 1.0f / (high - low);
- out_image[y * stride + x] = fast_expf(-max(sum, 0.0f));
-}
-
-ccl_device_inline void kernel_filter_nlm_update_output(int x,
- int y,
- int dx,
- int dy,
- const ccl_global float *ccl_restrict
- difference_image,
- const ccl_global float *ccl_restrict image,
- ccl_global float *out_image,
- ccl_global float *accum_image,
- int4 rect,
- int channel_offset,
- int stride,
- int f)
-{
- float sum = 0.0f;
- const int low = max(rect.x, x - f);
- const int high = min(rect.z, x + f + 1);
- for (int x1 = low; x1 < high; x1++) {
- sum += difference_image[y * stride + x1];
- }
- sum *= 1.0f / (high - low);
-
- int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
- if (out_image) {
- atomic_add_and_fetch_float(accum_image + idx_p, sum);
-
- float val = image[idx_q];
- if (channel_offset) {
- val += image[idx_q + channel_offset];
- val += image[idx_q + 2 * channel_offset];
- val *= 1.0f / 3.0f;
- }
- atomic_add_and_fetch_float(out_image + idx_p, sum * val);
- }
- else {
- accum_image[idx_p] = sum;
- }
-}
-
-ccl_device_inline void kernel_filter_nlm_construct_gramian(
- int x,
- int y,
- int dx,
- int dy,
- int t,
- const ccl_global float *ccl_restrict difference_image,
- const ccl_global float *ccl_restrict buffer,
- const ccl_global float *ccl_restrict transform,
- ccl_global int *rank,
- ccl_global float *XtWX,
- ccl_global float3 *XtWY,
- int4 rect,
- int4 filter_window,
- int stride,
- int f,
- int pass_stride,
- int frame_offset,
- bool use_time,
- int localIdx)
-{
- const int low = max(rect.x, x - f);
- const int high = min(rect.z, x + f + 1);
- float sum = 0.0f;
- for (int x1 = low; x1 < high; x1++) {
- sum += difference_image[y * stride + x1];
- }
- float weight = sum * (1.0f / (high - low));
-
- /* Reconstruction data is only stored for pixels inside the filter window,
- * so compute the pixels's index in there. */
- int storage_ofs = coord_to_local_index(filter_window, x, y);
- transform += storage_ofs;
- rank += storage_ofs;
- XtWX += storage_ofs;
- XtWY += storage_ofs;
-
- kernel_filter_construct_gramian(x,
- y,
- rect_size(filter_window),
- dx,
- dy,
- t,
- stride,
- pass_stride,
- frame_offset,
- use_time,
- buffer,
- transform,
- rank,
- weight,
- XtWX,
- XtWY,
- localIdx);
-}
-
-ccl_device_inline void kernel_filter_nlm_normalize(int x,
- int y,
- ccl_global float *out_image,
- const ccl_global float *ccl_restrict
- accum_image,
- int stride)
-{
- out_image[y * stride + x] /= accum_image[y * stride + x];
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
deleted file mode 100644
index 97cecba190e..00000000000
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/**
- * First step of the shadow prefiltering, performs the shadow division and stores all data
- * in a nice and easy rectangular array that can be passed to the NLM filter.
- *
- * Calculates:
- * \param unfiltered: Contains the two half images of the shadow feature pass
- * \param sampleVariance: The sample-based variance calculated in the kernel.
- * Note: This calculation is biased in general,
- * and especially here since the variance of the ratio can only be approximated.
- * \param sampleVarianceV: Variance of the sample variance estimation, quite noisy
- * (since it's essentially the buffer variance of the two variance halves)
- * \param bufferVariance: The buffer-based variance of the shadow feature.
- * Unbiased, but quite noisy.
- */
-ccl_device void kernel_filter_divide_shadow(int sample,
- CCL_FILTER_TILE_INFO,
- int x,
- int y,
- ccl_global float *unfilteredA,
- ccl_global float *unfilteredB,
- ccl_global float *sampleVariance,
- ccl_global float *sampleVarianceV,
- ccl_global float *bufferVariance,
- int4 rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
- int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
- int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
- int tile = ytile * 3 + xtile;
-
- int offset = tile_info->offsets[tile];
- int stride = tile_info->strides[tile];
- const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer(
- tile);
- center_buffer += (y * stride + x + offset) * buffer_pass_stride;
- center_buffer += buffer_denoising_offset + 14;
-
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y - rect.y) * buffer_w + (x - rect.x);
- unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
- unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
-
- float varA = center_buffer[2];
- float varB = center_buffer[5];
- int odd_sample = (sample + 1) / 2;
- int even_sample = sample / 2;
-
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample);
- varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample);
-
- varA /= max(odd_sample - 1, 1);
- varB /= max(even_sample - 1, 1);
-
- sampleVariance[idx] = 0.5f * (varA + varB) / sample;
- sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample);
- bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) *
- (unfilteredA[idx] - unfilteredB[idx]);
-}
-
-/* Load a regular feature from the render buffers into the denoise buffer.
- * Parameters:
- * - sample: The sample amount in the buffer, used to normalize the buffer.
- * - m_offset, v_offset: Render Buffer Pass offsets of mean and variance of the feature.
- * - x, y: Current pixel
- * - mean, variance: Target denoise buffers.
- * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
- */
-ccl_device void kernel_filter_get_feature(int sample,
- CCL_FILTER_TILE_INFO,
- int m_offset,
- int v_offset,
- int x,
- int y,
- ccl_global float *mean,
- ccl_global float *variance,
- float scale,
- int4 rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
- int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
- int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
- int tile = ytile * 3 + xtile;
- ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) +
- (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) *
- buffer_pass_stride +
- buffer_denoising_offset;
-
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
- float val = scale * center_buffer[m_offset];
- mean[idx] = val;
-
- if (v_offset >= 0) {
- if (sample > 1) {
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- variance[idx] = max(
- 0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1)));
- }
- else {
- /* Can't compute variance with single sample, just set it very high. */
- variance[idx] = 1e10f;
- }
- }
-}
-
-ccl_device void kernel_filter_write_feature(int sample,
- int x,
- int y,
- int4 buffer_params,
- ccl_global float *from,
- ccl_global float *buffer,
- int out_offset,
- int4 rect)
-{
- ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
- buffer_params.z;
-
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
- combined_buffer[out_offset] = from[idx];
-}
-
-#define GET_COLOR(image) \
- make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride])
-#define SET_COLOR(image, color) \
- image[idx] = color.x; \
- image[idx + pass_stride] = color.y; \
- image[idx + 2 * pass_stride] = color.z
-
-ccl_device void kernel_filter_detect_outliers(int x,
- int y,
- ccl_global float *in,
- ccl_global float *variance_out,
- ccl_global float *depth,
- ccl_global float *image_out,
- int4 rect,
- int pass_stride)
-{
- int buffer_w = align_up(rect.z - rect.x, 4);
-
- ccl_global float *image_in = in;
- ccl_global float *variance_in = in + 3 * pass_stride;
-
- int n = 0;
- float values[25];
- float pixel_variance, max_variance = 0.0f;
- for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
- for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
- int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
- float3 color = GET_COLOR(image_in);
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- float L = average(color);
-
- /* Find the position of L. */
- int i;
- for (i = 0; i < n; i++) {
- if (values[i] > L)
- break;
- }
- /* Make space for L by shifting all following values to the right. */
- for (int j = n; j > i; j--) {
- values[j] = values[j - 1];
- }
- /* Insert L. */
- values[i] = L;
- n++;
-
- float3 pixel_var = GET_COLOR(variance_in);
- float var = average(pixel_var);
- if ((x1 == x) && (y1 == y)) {
- pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
- var;
- }
- else {
- max_variance = max(max_variance, var);
- }
- }
- }
-
- max_variance += 1e-4f;
-
- int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
- float3 color = GET_COLOR(image_in);
- float3 variance = GET_COLOR(variance_in);
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- variance = max(variance, make_float3(0.0f, 0.0f, 0.0f));
-
- float L = average(color);
-
- float ref = 2.0f * values[(int)(n * 0.75f)];
-
- /* Slightly offset values to avoid false positives in (almost) black areas. */
- max_variance += 1e-5f;
- ref -= 1e-5f;
-
- if (L > ref) {
- /* The pixel appears to be an outlier.
- * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is
- * that the pixel should actually be at the reference value: If the reference is within the
- * 3-sigma interval, the pixel is assumed to be a statistical outlier. Otherwise, it is very
- * unlikely that the pixel should be darker, which indicates a legitimate highlight.
- */
-
- if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
- depth[idx] = -depth[idx];
- color *= ref / L;
- variance = make_float3(max_variance, max_variance, max_variance);
- }
- else {
- float stddev = sqrtf(pixel_variance);
- if (L - 3 * stddev < ref) {
- /* The pixel is an outlier, so negate the depth value to mark it as one.
- * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM
- * weights. */
- depth[idx] = -depth[idx];
- float fac = ref / L;
- color *= fac;
- variance *= sqr(fac);
- }
- }
- }
-
- /* Apply log(1+x) transform to compress highlights and avoid halos in the denoised results.
- * Variance is transformed accordingly - the derivative of the transform is 1/(1+x), so we
- * scale by the square of that (since we have variance instead of standard deviation). */
- color = color_highlight_compress(color, &variance);
-
- SET_COLOR(image_out, color);
- SET_COLOR(variance_out, variance);
-}
-
-#undef GET_COLOR
-#undef SET_COLOR
-
-/* Combine A/B buffers.
- * Calculates the combined mean and the buffer variance. */
-ccl_device void kernel_filter_combine_halves(int x,
- int y,
- ccl_global float *mean,
- ccl_global float *variance,
- ccl_global float *a,
- ccl_global float *b,
- int4 rect,
- int r)
-{
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y - rect.y) * buffer_w + (x - rect.x);
-
- if (mean)
- mean[idx] = 0.5f * (a[idx] + b[idx]);
- if (variance) {
- if (r == 0)
- variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]);
- else {
- variance[idx] = 0.0f;
- float values[25];
- int numValues = 0;
- for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) {
- for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) {
- int pidx = (py - rect.y) * buffer_w + (px - rect.x);
- values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]);
- }
- }
- /* Insertion-sort the variances (fast enough for 25 elements). */
- for (int i = 1; i < numValues; i++) {
- float v = values[i];
- int j;
- for (j = i - 1; j >= 0 && values[j] > v; j--)
- values[j + 1] = values[j];
- values[j + 1] = v;
- }
- variance[idx] = values[(7 * numValues) / 8];
- }
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
deleted file mode 100644
index 17941689ad5..00000000000
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline void kernel_filter_construct_gramian(int x,
- int y,
- int storage_stride,
- int dx,
- int dy,
- int t,
- int buffer_stride,
- int pass_stride,
- int frame_offset,
- bool use_time,
- const ccl_global float *ccl_restrict buffer,
- const ccl_global float *ccl_restrict
- transform,
- ccl_global int *rank,
- float weight,
- ccl_global float *XtWX,
- ccl_global float3 *XtWY,
- int localIdx)
-{
- if (weight < 1e-3f) {
- return;
- }
-
- int p_offset = y * buffer_stride + x;
- int q_offset = (y + dy) * buffer_stride + (x + dx) + frame_offset;
-
-#ifdef __KERNEL_GPU__
- const int stride = storage_stride;
-#else
- const int stride = 1;
- (void)storage_stride;
-#endif
-
-#ifdef __KERNEL_CUDA__
- ccl_local float shared_design_row[(DENOISE_FEATURES + 1) * CCL_MAX_LOCAL_SIZE];
- ccl_local_param float *design_row = shared_design_row + localIdx * (DENOISE_FEATURES + 1);
-#else
- float design_row[DENOISE_FEATURES + 1];
-#endif
-
- float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
-
- /* If the pixel was flagged as an outlier during prefiltering, skip it. */
- if (ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
- return;
- }
-
- filter_get_design_row_transform(make_int3(x, y, t),
- buffer + p_offset,
- make_int3(x + dx, y + dy, t),
- buffer + q_offset,
- pass_stride,
- *rank,
- design_row,
- transform,
- stride,
- use_time);
-
-#ifdef __KERNEL_GPU__
- math_trimatrix_add_gramian_strided(XtWX, (*rank) + 1, design_row, weight, stride);
- math_vec3_add_strided(XtWY, (*rank) + 1, design_row, weight * q_color, stride);
-#else
- math_trimatrix_add_gramian(XtWX, (*rank) + 1, design_row, weight);
- math_vec3_add(XtWY, (*rank) + 1, design_row, weight * q_color);
-#endif
-}
-
-ccl_device_inline void kernel_filter_finalize(int x,
- int y,
- ccl_global float *buffer,
- ccl_global int *rank,
- int storage_stride,
- ccl_global float *XtWX,
- ccl_global float3 *XtWY,
- int4 buffer_params,
- int sample)
-{
-#ifdef __KERNEL_GPU__
- const int stride = storage_stride;
-#else
- const int stride = 1;
- (void)storage_stride;
-#endif
-
- if (XtWX[0] < 1e-3f) {
- /* There is not enough information to determine a denoised result.
- * As a fallback, keep the original value of the pixel. */
- return;
- }
-
- /* The weighted average of pixel colors (essentially, the NLM-filtered image).
- * In case the solution of the linear model fails due to numerical issues or
- * returns nonsensical negative values, fall back to this value. */
- float3 mean_color = XtWY[0] / XtWX[0];
-
- math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride);
-
- float3 final_color = XtWY[0];
- if (!isfinite3_safe(final_color) ||
- (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) {
- final_color = mean_color;
- }
-
- /* Clamp pixel value to positive values and reverse the highlight compression transform. */
- final_color = color_highlight_uncompress(max(final_color, make_float3(0.0f, 0.0f, 0.0f)));
-
- ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
- buffer_params.z;
- if (buffer_params.w >= 0) {
- final_color *= sample;
- if (buffer_params.w > 0) {
- final_color.x += combined_buffer[buffer_params.w + 0];
- final_color.y += combined_buffer[buffer_params.w + 1];
- final_color.z += combined_buffer[buffer_params.w + 2];
- }
- }
- combined_buffer[0] = final_color.x;
- combined_buffer[1] = final_color.y;
- combined_buffer[2] = final_color.z;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h
deleted file mode 100644
index 880a661214e..00000000000
--- a/intern/cycles/kernel/filter/filter_transform.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
- CCL_FILTER_TILE_INFO,
- int x,
- int y,
- int4 rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- float *transform,
- int *rank,
- int radius,
- float pca_threshold)
-{
- int buffer_w = align_up(rect.z - rect.x, 4);
-
- float features[DENOISE_FEATURES];
-
- const float *ccl_restrict pixel_buffer;
- int3 pixel;
-
- int num_features = use_time ? 11 : 10;
-
- /* === Calculate denoising window. === */
- int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
- int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
- int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
- /* === Shift feature passes to have mean 0. === */
- float feature_means[DENOISE_FEATURES];
- math_vector_zero(feature_means, num_features);
- FOR_PIXEL_WINDOW
- {
- filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
- math_vector_add(feature_means, features, num_features);
- }
- END_FOR_PIXEL_WINDOW
-
- math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
- /* === Scale the shifted feature passes to a range of [-1; 1] ===
- * Will be baked into the transform later. */
- float feature_scale[DENOISE_FEATURES];
- math_vector_zero(feature_scale, num_features);
-
- FOR_PIXEL_WINDOW
- {
- filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_max(feature_scale, features, num_features);
- }
- END_FOR_PIXEL_WINDOW
-
- filter_calculate_scale(feature_scale, use_time);
-
- /* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimensional feature space to a reduced feature
- * (r-feature) space which generally has fewer dimensions.
- * This mainly helps to prevent over-fitting. */
- float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
- math_matrix_zero(feature_matrix, num_features);
- FOR_PIXEL_WINDOW
- {
- filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_mul(features, feature_scale, num_features);
- math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
- }
- END_FOR_PIXEL_WINDOW
-
- math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
- *rank = 0;
- /* Prevent over-fitting when a small window is used. */
- int max_rank = min(num_features, num_pixels / 3);
- if (pca_threshold < 0.0f) {
- float threshold_energy = 0.0f;
- for (int i = 0; i < num_features; i++) {
- threshold_energy += feature_matrix[i * num_features + i];
- }
- threshold_energy *= 1.0f - (-pca_threshold);
-
- float reduced_energy = 0.0f;
- for (int i = 0; i < max_rank; i++, (*rank)++) {
- if (i >= 2 && reduced_energy >= threshold_energy)
- break;
- float s = feature_matrix[i * num_features + i];
- reduced_energy += s;
- }
- }
- else {
- for (int i = 0; i < max_rank; i++, (*rank)++) {
- float s = feature_matrix[i * num_features + i];
- if (i >= 2 && sqrtf(s) < pca_threshold)
- break;
- }
- }
-
- /* Bake the feature scaling into the transformation matrix. */
- for (int i = 0; i < (*rank); i++) {
- math_vector_mul(transform + i * num_features, feature_scale, num_features);
- }
- math_matrix_transpose(transform, num_features, 1);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h
deleted file mode 100644
index ec258a5212a..00000000000
--- a/intern/cycles/kernel/filter/filter_transform_gpu.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
- CCL_FILTER_TILE_INFO,
- int x,
- int y,
- int4 rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- ccl_global float *transform,
- ccl_global int *rank,
- int radius,
- float pca_threshold,
- int transform_stride,
- int localIdx)
-{
- int buffer_w = align_up(rect.z - rect.x, 4);
-
-#ifdef __KERNEL_CUDA__
- ccl_local float shared_features[DENOISE_FEATURES * CCL_MAX_LOCAL_SIZE];
- ccl_local_param float *features = shared_features + localIdx * DENOISE_FEATURES;
-#else
- float features[DENOISE_FEATURES];
-#endif
-
- int num_features = use_time ? 11 : 10;
-
- /* === Calculate denoising window. === */
- int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
- int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
- int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
- const ccl_global float *ccl_restrict pixel_buffer;
- int3 pixel;
-
- /* === Shift feature passes to have mean 0. === */
- float feature_means[DENOISE_FEATURES];
- math_vector_zero(feature_means, num_features);
- FOR_PIXEL_WINDOW
- {
- filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
- math_vector_add(feature_means, features, num_features);
- }
- END_FOR_PIXEL_WINDOW
-
- math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
- /* === Scale the shifted feature passes to a range of [-1; 1] ===
- * Will be baked into the transform later. */
- float feature_scale[DENOISE_FEATURES];
- math_vector_zero(feature_scale, num_features);
-
- FOR_PIXEL_WINDOW
- {
- filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_max(feature_scale, features, num_features);
- }
- END_FOR_PIXEL_WINDOW
-
- filter_calculate_scale(feature_scale, use_time);
-
- /* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimensional feature space to a reduced feature
- * (r-feature) space which generally has fewer dimensions.
- * This mainly helps to prevent over-fitting. */
- float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
- math_matrix_zero(feature_matrix, num_features);
- FOR_PIXEL_WINDOW
- {
- filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_mul(features, feature_scale, num_features);
- math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
- }
- END_FOR_PIXEL_WINDOW
-
- math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
- *rank = 0;
- /* Prevent over-fitting when a small window is used. */
- int max_rank = min(num_features, num_pixels / 3);
- if (pca_threshold < 0.0f) {
- float threshold_energy = 0.0f;
- for (int i = 0; i < num_features; i++) {
- threshold_energy += feature_matrix[i * num_features + i];
- }
- threshold_energy *= 1.0f - (-pca_threshold);
-
- float reduced_energy = 0.0f;
- for (int i = 0; i < max_rank; i++, (*rank)++) {
- if (i >= 2 && reduced_energy >= threshold_energy)
- break;
- float s = feature_matrix[i * num_features + i];
- reduced_energy += s;
- }
- }
- else {
- for (int i = 0; i < max_rank; i++, (*rank)++) {
- float s = feature_matrix[i * num_features + i];
- if (i >= 2 && sqrtf(s) < pca_threshold)
- break;
- }
- }
-
- math_matrix_transpose(transform, num_features, transform_stride);
-
- /* Bake the feature scaling into the transformation matrix. */
- for (int i = 0; i < num_features; i++) {
- for (int j = 0; j < (*rank); j++) {
- transform[(i * num_features + j) * transform_stride] *= feature_scale[i];
- }
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h
deleted file mode 100644
index 0304d990f9f..00000000000
--- a/intern/cycles/kernel/filter/filter_transform_sse.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
- CCL_FILTER_TILE_INFO,
- int x,
- int y,
- int4 rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- float *transform,
- int *rank,
- int radius,
- float pca_threshold)
-{
- int buffer_w = align_up(rect.z - rect.x, 4);
-
- float4 features[DENOISE_FEATURES];
- const float *ccl_restrict pixel_buffer;
- int3 pixel;
-
- int num_features = use_time ? 11 : 10;
-
- /* === Calculate denoising window. === */
- int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
- int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
- int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
- /* === Shift feature passes to have mean 0. === */
- float4 feature_means[DENOISE_FEATURES];
- math_vector_zero_sse(feature_means, num_features);
- FOR_PIXEL_WINDOW_SSE
- {
- filter_get_features_sse(
- x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
- math_vector_add_sse(feature_means, num_features, features);
- }
- END_FOR_PIXEL_WINDOW_SSE
-
- float4 pixel_scale = make_float4(1.0f / num_pixels);
- for (int i = 0; i < num_features; i++) {
- feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
- }
-
- /* === Scale the shifted feature passes to a range of [-1; 1] ===
- * Will be baked into the transform later. */
- float4 feature_scale[DENOISE_FEATURES];
- math_vector_zero_sse(feature_scale, num_features);
- FOR_PIXEL_WINDOW_SSE
- {
- filter_get_feature_scales_sse(
- x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_max_sse(feature_scale, features, num_features);
- }
- END_FOR_PIXEL_WINDOW_SSE
-
- filter_calculate_scale_sse(feature_scale, use_time);
-
- /* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimensional feature space to a reduced feature
- * (r-feature) space which generally has fewer dimensions.
- * This mainly helps to prevent over-fitting. */
- float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
- math_matrix_zero_sse(feature_matrix_sse, num_features);
- FOR_PIXEL_WINDOW_SSE
- {
- filter_get_features_sse(
- x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_mul_sse(features, num_features, feature_scale);
- math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
- }
- END_FOR_PIXEL_WINDOW_SSE
-
- float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
- math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
-
- math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
-
- *rank = 0;
- /* Prevent over-fitting when a small window is used. */
- int max_rank = min(num_features, num_pixels / 3);
- if (pca_threshold < 0.0f) {
- float threshold_energy = 0.0f;
- for (int i = 0; i < num_features; i++) {
- threshold_energy += feature_matrix[i * num_features + i];
- }
- threshold_energy *= 1.0f - (-pca_threshold);
-
- float reduced_energy = 0.0f;
- for (int i = 0; i < max_rank; i++, (*rank)++) {
- if (i >= 2 && reduced_energy >= threshold_energy)
- break;
- float s = feature_matrix[i * num_features + i];
- reduced_energy += s;
- }
- }
- else {
- for (int i = 0; i < max_rank; i++, (*rank)++) {
- float s = feature_matrix[i * num_features + i];
- if (i >= 2 && sqrtf(s) < pca_threshold)
- break;
- }
- }
-
- math_matrix_transpose(transform, num_features, 1);
-
- /* Bake the feature scaling into the transformation matrix. */
- for (int i = 0; i < num_features; i++) {
- math_vector_scale(transform + i * num_features, feature_scale[i][0], *rank);
- }
-}
-
-CCL_NAMESPACE_END