Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/filter/filter_prefilter.h')
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h325
1 files changed, 173 insertions, 152 deletions
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index e24f4feb28d..8211311313d 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -27,7 +27,8 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_filter_divide_shadow(int sample,
CCL_FILTER_TILE_INFO,
- int x, int y,
+ int x,
+ int y,
ccl_global float *unfilteredA,
ccl_global float *unfilteredB,
ccl_global float *sampleVariance,
@@ -37,37 +38,39 @@ ccl_device void kernel_filter_divide_shadow(int sample,
int buffer_pass_stride,
int buffer_denoising_offset)
{
- int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
- int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
- int tile = ytile*3+xtile;
+ int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+ int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+ int tile = ytile * 3 + xtile;
- int offset = tile_info->offsets[tile];
- int stride = tile_info->strides[tile];
- const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile);
- center_buffer += (y*stride + x + offset)*buffer_pass_stride;
- center_buffer += buffer_denoising_offset + 14;
+ int offset = tile_info->offsets[tile];
+ int stride = tile_info->strides[tile];
+ const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer(
+ tile);
+ center_buffer += (y * stride + x + offset) * buffer_pass_stride;
+ center_buffer += buffer_denoising_offset + 14;
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
- unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
- unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
+ unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
+ unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
- float varA = center_buffer[2];
- float varB = center_buffer[5];
- int odd_sample = (sample+1)/2;
- int even_sample = sample/2;
+ float varA = center_buffer[2];
+ float varB = center_buffer[5];
+ int odd_sample = (sample + 1) / 2;
+ int even_sample = sample / 2;
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
- varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample);
+ varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample);
- varA /= max(odd_sample - 1, 1);
- varB /= max(even_sample - 1, 1);
+ varA /= max(odd_sample - 1, 1);
+ varB /= max(even_sample - 1, 1);
- sampleVariance[idx] = 0.5f*(varA + varB) / sample;
- sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample);
- bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]);
+ sampleVariance[idx] = 0.5f * (varA + varB) / sample;
+ sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample);
+ bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) *
+ (unfilteredA[idx] - unfilteredB[idx]);
}
/* Load a regular feature from the render buffers into the denoise buffer.
@@ -80,55 +83,65 @@ ccl_device void kernel_filter_divide_shadow(int sample,
*/
ccl_device void kernel_filter_get_feature(int sample,
CCL_FILTER_TILE_INFO,
- int m_offset, int v_offset,
- int x, int y,
+ int m_offset,
+ int v_offset,
+ int x,
+ int y,
ccl_global float *mean,
ccl_global float *variance,
float scale,
- int4 rect, int buffer_pass_stride,
+ int4 rect,
+ int buffer_pass_stride,
int buffer_denoising_offset)
{
- int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
- int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
- int tile = ytile*3+xtile;
- ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
+ int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+ int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+ int tile = ytile * 3 + xtile;
+ ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) +
+ (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) *
+ buffer_pass_stride +
+ buffer_denoising_offset;
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
- float val = scale * center_buffer[m_offset];
- mean[idx] = val;
+ float val = scale * center_buffer[m_offset];
+ mean[idx] = val;
- if(v_offset >= 0) {
- if(sample > 1) {
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- variance[idx] = max(0.0f, (center_buffer[v_offset] - val*val*sample) / (sample * (sample-1)));
- }
- else {
- /* Can't compute variance with single sample, just set it very high. */
- variance[idx] = 1e10f;
- }
- }
+ if (v_offset >= 0) {
+ if (sample > 1) {
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ variance[idx] = max(
+ 0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1)));
+ }
+ else {
+ /* Can't compute variance with single sample, just set it very high. */
+ variance[idx] = 1e10f;
+ }
+ }
}
ccl_device void kernel_filter_write_feature(int sample,
- int x, int y,
+ int x,
+ int y,
int4 buffer_params,
ccl_global float *from,
ccl_global float *buffer,
int out_offset,
int4 rect)
{
- ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
+ ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
+ buffer_params.z;
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
- combined_buffer[out_offset] = from[idx];
+ combined_buffer[out_offset] = from[idx];
}
-ccl_device void kernel_filter_detect_outliers(int x, int y,
+ccl_device void kernel_filter_detect_outliers(int x,
+ int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -136,123 +149,131 @@ ccl_device void kernel_filter_detect_outliers(int x, int y,
int4 rect,
int pass_stride)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
+ int buffer_w = align_up(rect.z - rect.x, 4);
- int n = 0;
- float values[25];
- float pixel_variance, max_variance = 0.0f;
- for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
- for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
- int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
- float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]);
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- float L = average(color);
+ int n = 0;
+ float values[25];
+ float pixel_variance, max_variance = 0.0f;
+ for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
+ for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
+ int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
+ float3 color = make_float3(
+ image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+ float L = average(color);
- /* Find the position of L. */
- int i;
- for(i = 0; i < n; i++) {
- if(values[i] > L) break;
- }
- /* Make space for L by shifting all following values to the right. */
- for(int j = n; j > i; j--) {
- values[j] = values[j-1];
- }
- /* Insert L. */
- values[i] = L;
- n++;
+ /* Find the position of L. */
+ int i;
+ for (i = 0; i < n; i++) {
+ if (values[i] > L)
+ break;
+ }
+ /* Make space for L by shifting all following values to the right. */
+ for (int j = n; j > i; j--) {
+ values[j] = values[j - 1];
+ }
+ /* Insert L. */
+ values[i] = L;
+ n++;
- float3 pixel_var = make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]);
- float var = average(pixel_var);
- if((x1 == x) && (y1 == y)) {
- pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f)? -1.0f : var;
- }
- else {
- max_variance = max(max_variance, var);
- }
- }
- }
+ float3 pixel_var = make_float3(
+ variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]);
+ float var = average(pixel_var);
+ if ((x1 == x) && (y1 == y)) {
+ pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
+ var;
+ }
+ else {
+ max_variance = max(max_variance, var);
+ }
+ }
+ }
- max_variance += 1e-4f;
+ max_variance += 1e-4f;
- int idx = (y-rect.y)*buffer_w + (x-rect.x);
- float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]);
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- float L = average(color);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
+ float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+ float L = average(color);
- float ref = 2.0f*values[(int)(n*0.75f)];
+ float ref = 2.0f * values[(int)(n * 0.75f)];
- /* Slightly offset values to avoid false positives in (almost) black areas. */
- max_variance += 1e-5f;
- ref -= 1e-5f;
+ /* Slightly offset values to avoid false positives in (almost) black areas. */
+ max_variance += 1e-5f;
+ ref -= 1e-5f;
- if(L > ref) {
- /* The pixel appears to be an outlier.
- * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
- * should actually be at the reference value:
- * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
- * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
- */
+ if (L > ref) {
+ /* The pixel appears to be an outlier.
+ * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
+ * should actually be at the reference value:
+ * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
+ * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
+ */
- if(pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
- depth[idx] = -depth[idx];
- color *= ref/L;
- variance[idx] = variance[idx + pass_stride] = variance[idx + 2*pass_stride] = max_variance;
- }
- else {
- float stddev = sqrtf(pixel_variance);
- if(L - 3*stddev < ref) {
- /* The pixel is an outlier, so negate the depth value to mark it as one.
- * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
- depth[idx] = -depth[idx];
- float fac = ref/L;
- color *= fac;
- variance[idx ] *= fac*fac;
- variance[idx + pass_stride] *= fac*fac;
- variance[idx+2*pass_stride] *= fac*fac;
- }
- }
- }
- out[idx ] = color.x;
- out[idx + pass_stride] = color.y;
- out[idx+2*pass_stride] = color.z;
+ if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
+ depth[idx] = -depth[idx];
+ color *= ref / L;
+ variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance;
+ }
+ else {
+ float stddev = sqrtf(pixel_variance);
+ if (L - 3 * stddev < ref) {
+ /* The pixel is an outlier, so negate the depth value to mark it as one.
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ depth[idx] = -depth[idx];
+ float fac = ref / L;
+ color *= fac;
+ variance[idx] *= fac * fac;
+ variance[idx + pass_stride] *= fac * fac;
+ variance[idx + 2 * pass_stride] *= fac * fac;
+ }
+ }
+ }
+ out[idx] = color.x;
+ out[idx + pass_stride] = color.y;
+ out[idx + 2 * pass_stride] = color.z;
}
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
-ccl_device void kernel_filter_combine_halves(int x, int y,
+ccl_device void kernel_filter_combine_halves(int x,
+ int y,
ccl_global float *mean,
ccl_global float *variance,
ccl_global float *a,
ccl_global float *b,
- int4 rect, int r)
+ int4 rect,
+ int r)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
- if(mean) mean[idx] = 0.5f * (a[idx]+b[idx]);
- if(variance) {
- if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]);
- else {
- variance[idx] = 0.0f;
- float values[25];
- int numValues = 0;
- for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) {
- for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) {
- int pidx = (py-rect.y)*buffer_w + (px-rect.x);
- values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]);
- }
- }
- /* Insertion-sort the variances (fast enough for 25 elements). */
- for(int i = 1; i < numValues; i++) {
- float v = values[i];
- int j;
- for(j = i-1; j >= 0 && values[j] > v; j--)
- values[j+1] = values[j];
- values[j+1] = v;
- }
- variance[idx] = values[(7*numValues)/8];
- }
- }
+ if (mean)
+ mean[idx] = 0.5f * (a[idx] + b[idx]);
+ if (variance) {
+ if (r == 0)
+ variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]);
+ else {
+ variance[idx] = 0.0f;
+ float values[25];
+ int numValues = 0;
+ for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) {
+ for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) {
+ int pidx = (py - rect.y) * buffer_w + (px - rect.x);
+ values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]);
+ }
+ }
+ /* Insertion-sort the variances (fast enough for 25 elements). */
+ for (int i = 1; i < numValues; i++) {
+ float v = values[i];
+ int j;
+ for (j = i - 1; j >= 0 && values[j] > v; j--)
+ values[j + 1] = values[j];
+ values[j + 1] = v;
+ }
+ variance[idx] = values[(7 * numValues) / 8];
+ }
+ }
}
CCL_NAMESPACE_END