/* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ CCL_NAMESPACE_BEGIN /** * First step of the shadow prefiltering, performs the shadow division and stores all data * in a nice and easy rectangular array that can be passed to the NLM filter. * * Calculates: * \param unfiltered: Contains the two half images of the shadow feature pass * \param sampleVariance: The sample-based variance calculated in the kernel. * Note: This calculation is biased in general, * and especially here since the variance of the ratio can only be approximated. * \param sampleVarianceV: Variance of the sample variance estimation, quite noisy * (since it's essentially the buffer variance of the two variance halves) * \param bufferVariance: The buffer-based variance of the shadow feature. * Unbiased, but quite noisy. */ ccl_device void kernel_filter_divide_shadow(int sample, CCL_FILTER_TILE_INFO, int x, int y, ccl_global float *unfilteredA, ccl_global float *unfilteredB, ccl_global float *sampleVariance, ccl_global float *sampleVarianceV, ccl_global float *bufferVariance, int4 rect, int buffer_pass_stride, int buffer_denoising_offset) { int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); int tile = ytile * 3 + xtile; int offset = tile_info->offsets[tile]; int stride = tile_info->strides[tile]; const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer( tile); center_buffer += (y * stride + x + offset) * buffer_pass_stride; center_buffer += buffer_denoising_offset + 14; int buffer_w = align_up(rect.z - rect.x, 4); int idx = (y - rect.y) * buffer_w + (x - rect.x); unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f); unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f); float varA = center_buffer[2]; float varB = center_buffer[5]; int odd_sample = (sample + 1) / 2; int even_sample = sample / 2; /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance * update does not work efficiently with atomics in the kernel. */ varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample); varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample); varA /= max(odd_sample - 1, 1); varB /= max(even_sample - 1, 1); sampleVariance[idx] = 0.5f * (varA + varB) / sample; sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample); bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]); } /* Load a regular feature from the render buffers into the denoise buffer. * Parameters: * - sample: The sample amount in the buffer, used to normalize the buffer. * - m_offset, v_offset: Render Buffer Pass offsets of mean and variance of the feature. * - x, y: Current pixel * - mean, variance: Target denoise buffers. * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive). */ ccl_device void kernel_filter_get_feature(int sample, CCL_FILTER_TILE_INFO, int m_offset, int v_offset, int x, int y, ccl_global float *mean, ccl_global float *variance, float scale, int4 rect, int buffer_pass_stride, int buffer_denoising_offset) { int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); int tile = ytile * 3 + xtile; ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) * buffer_pass_stride + buffer_denoising_offset; int buffer_w = align_up(rect.z - rect.x, 4); int idx = (y - rect.y) * buffer_w + (x - rect.x); float val = scale * center_buffer[m_offset]; mean[idx] = val; if (v_offset >= 0) { if (sample > 1) { /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance * update does not work efficiently with atomics in the kernel. */ variance[idx] = max( 0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1))); } else { /* Can't compute variance with single sample, just set it very high. */ variance[idx] = 1e10f; } } } ccl_device void kernel_filter_write_feature(int sample, int x, int y, int4 buffer_params, ccl_global float *from, ccl_global float *buffer, int out_offset, int4 rect) { ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) * buffer_params.z; int buffer_w = align_up(rect.z - rect.x, 4); int idx = (y - rect.y) * buffer_w + (x - rect.x); combined_buffer[out_offset] = from[idx]; } #define GET_COLOR(image) \ make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]) #define SET_COLOR(image, color) \ image[idx] = color.x; \ image[idx + pass_stride] = color.y; \ image[idx + 2 * pass_stride] = color.z ccl_device void kernel_filter_detect_outliers(int x, int y, ccl_global float *in, ccl_global float *variance_out, ccl_global float *depth, ccl_global float *image_out, int4 rect, int pass_stride) { int buffer_w = align_up(rect.z - rect.x, 4); ccl_global float *image_in = in; ccl_global float *variance_in = in + 3 * pass_stride; int n = 0; float values[25]; float pixel_variance, max_variance = 0.0f; for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) { for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) { int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x); float3 color = GET_COLOR(image_in); color = max(color, make_float3(0.0f, 0.0f, 0.0f)); float L = average(color); /* Find the position of L. */ int i; for (i = 0; i < n; i++) { if (values[i] > L) break; } /* Make space for L by shifting all following values to the right. */ for (int j = n; j > i; j--) { values[j] = values[j - 1]; } /* Insert L. */ values[i] = L; n++; float3 pixel_var = GET_COLOR(variance_in); float var = average(pixel_var); if ((x1 == x) && (y1 == y)) { pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f : var; } else { max_variance = max(max_variance, var); } } } max_variance += 1e-4f; int idx = (y - rect.y) * buffer_w + (x - rect.x); float3 color = GET_COLOR(image_in); float3 variance = GET_COLOR(variance_in); color = max(color, make_float3(0.0f, 0.0f, 0.0f)); variance = max(variance, make_float3(0.0f, 0.0f, 0.0f)); float L = average(color); float ref = 2.0f * values[(int)(n * 0.75f)]; /* Slightly offset values to avoid false positives in (almost) black areas. */ max_variance += 1e-5f; ref -= 1e-5f; if (L > ref) { /* The pixel appears to be an outlier. * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is * that the pixel should actually be at the reference value: If the reference is within the * 3-sigma interval, the pixel is assumed to be a statistical outlier. Otherwise, it is very * unlikely that the pixel should be darker, which indicates a legitimate highlight. */ if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { depth[idx] = -depth[idx]; color *= ref / L; variance = make_float3(max_variance, max_variance, max_variance); } else { float stddev = sqrtf(pixel_variance); if (L - 3 * stddev < ref) { /* The pixel is an outlier, so negate the depth value to mark it as one. * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM * weights. */ depth[idx] = -depth[idx]; float fac = ref / L; color *= fac; variance *= sqr(fac); } } } /* Apply log(1+x) transform to compress highlights and avoid halos in the denoised results. * Variance is transformed accordingly - the derivative of the transform is 1/(1+x), so we * scale by the square of that (since we have variance instead of standard deviation). */ color = color_highlight_compress(color, &variance); SET_COLOR(image_out, color); SET_COLOR(variance_out, variance); } #undef GET_COLOR #undef SET_COLOR /* Combine A/B buffers. * Calculates the combined mean and the buffer variance. */ ccl_device void kernel_filter_combine_halves(int x, int y, ccl_global float *mean, ccl_global float *variance, ccl_global float *a, ccl_global float *b, int4 rect, int r) { int buffer_w = align_up(rect.z - rect.x, 4); int idx = (y - rect.y) * buffer_w + (x - rect.x); if (mean) mean[idx] = 0.5f * (a[idx] + b[idx]); if (variance) { if (r == 0) variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]); else { variance[idx] = 0.0f; float values[25]; int numValues = 0; for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) { for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) { int pidx = (py - rect.y) * buffer_w + (px - rect.x); values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]); } } /* Insertion-sort the variances (fast enough for 25 elements). */ for (int i = 1; i < numValues; i++) { float v = values[i]; int j; for (j = i - 1; j >= 0 && values[j] > v; j--) values[j + 1] = values[j]; values[j + 1] = v; } variance[idx] = values[(7 * numValues) / 8]; } } } CCL_NAMESPACE_END