intern/cycles/kernel/filter/filter_reconstruction.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

/*
 * Copyright 2011-2017 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

CCL_NAMESPACE_BEGIN

ccl_device_inline void kernel_filter_construct_gramian(int x,
                                                       int y,
                                                       int storage_stride,
                                                       int dx,
                                                       int dy,
                                                       int t,
                                                       int buffer_stride,
                                                       int pass_stride,
                                                       int frame_offset,
                                                       bool use_time,
                                                       const ccl_global float *ccl_restrict buffer,
                                                       const ccl_global float *ccl_restrict
                                                           transform,
                                                       ccl_global int *rank,
                                                       float weight,
                                                       ccl_global float *XtWX,
                                                       ccl_global float3 *XtWY,
                                                       int localIdx)
{
  if (weight < 1e-3f) {
    return;
  }

  int p_offset = y * buffer_stride + x;
  int q_offset = (y + dy) * buffer_stride + (x + dx) + frame_offset;

#ifdef __KERNEL_GPU__
  const int stride = storage_stride;
#else
  const int stride = 1;
  (void)storage_stride;
#endif

#ifdef __KERNEL_CUDA__
  ccl_local float shared_design_row[(DENOISE_FEATURES + 1) * CCL_MAX_LOCAL_SIZE];
  ccl_local_param float *design_row = shared_design_row + localIdx * (DENOISE_FEATURES + 1);
#else
  float design_row[DENOISE_FEATURES + 1];
#endif

  float3 q_color = filter_get_color(buffer + q_offset, pass_stride);

  /* If the pixel was flagged as an outlier during prefiltering, skip it. */
  if (ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
    return;
  }

  filter_get_design_row_transform(make_int3(x, y, t),
                                  buffer + p_offset,
                                  make_int3(x + dx, y + dy, t),
                                  buffer + q_offset,
                                  pass_stride,
                                  *rank,
                                  design_row,
                                  transform,
                                  stride,
                                  use_time);

#ifdef __KERNEL_GPU__
  math_trimatrix_add_gramian_strided(XtWX, (*rank) + 1, design_row, weight, stride);
  math_vec3_add_strided(XtWY, (*rank) + 1, design_row, weight * q_color, stride);
#else
  math_trimatrix_add_gramian(XtWX, (*rank) + 1, design_row, weight);
  math_vec3_add(XtWY, (*rank) + 1, design_row, weight * q_color);
#endif
}

ccl_device_inline void kernel_filter_finalize(int x,
                                              int y,
                                              ccl_global float *buffer,
                                              ccl_global int *rank,
                                              int storage_stride,
                                              ccl_global float *XtWX,
                                              ccl_global float3 *XtWY,
                                              int4 buffer_params,
                                              int sample)
{
#ifdef __KERNEL_GPU__
  const int stride = storage_stride;
#else
  const int stride = 1;
  (void)storage_stride;
#endif

  if (XtWX[0] < 1e-3f) {
    /* There is not enough information to determine a denoised result.
     * As a fallback, keep the original value of the pixel. */
    return;
  }

  /* The weighted average of pixel colors (essentially, the NLM-filtered image).
   * In case the solution of the linear model fails due to numerical issues or
   * returns nonsensical negative values, fall back to this value. */
  float3 mean_color = XtWY[0] / XtWX[0];

  math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride);

  float3 final_color = XtWY[0];
  if (!isfinite3_safe(final_color) ||
      (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) {
    final_color = mean_color;
  }

  /* Clamp pixel value to positive values and reverse the highlight compression transform. */
  final_color = color_highlight_uncompress(max(final_color, make_float3(0.0f, 0.0f, 0.0f)));

  ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
                                                   buffer_params.z;
  if (buffer_params.w >= 0) {
    final_color *= sample;
    if (buffer_params.w > 0) {
      final_color.x += combined_buffer[buffer_params.w + 0];
      final_color.y += combined_buffer[buffer_params.w + 1];
      final_color.z += combined_buffer[buffer_params.w + 2];
    }
  }
  combined_buffer[0] = final_color.x;
  combined_buffer[1] = final_color.y;
  combined_buffer[2] = final_color.z;
}

CCL_NAMESPACE_END