diff options
Diffstat (limited to 'intern/cycles/kernel/filter/filter_prefilter.h')
-rw-r--r-- | intern/cycles/kernel/filter/filter_prefilter.h | 325 |
1 files changed, 173 insertions, 152 deletions
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index e24f4feb28d..8211311313d 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -27,7 +27,8 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_filter_divide_shadow(int sample, CCL_FILTER_TILE_INFO, - int x, int y, + int x, + int y, ccl_global float *unfilteredA, ccl_global float *unfilteredB, ccl_global float *sampleVariance, @@ -37,37 +38,39 @@ ccl_device void kernel_filter_divide_shadow(int sample, int buffer_pass_stride, int buffer_denoising_offset) { - int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2); - int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2); - int tile = ytile*3+xtile; + int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); + int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); + int tile = ytile * 3 + xtile; - int offset = tile_info->offsets[tile]; - int stride = tile_info->strides[tile]; - const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile); - center_buffer += (y*stride + x + offset)*buffer_pass_stride; - center_buffer += buffer_denoising_offset + 14; + int offset = tile_info->offsets[tile]; + int stride = tile_info->strides[tile]; + const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer( + tile); + center_buffer += (y * stride + x + offset) * buffer_pass_stride; + center_buffer += buffer_denoising_offset + 14; - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); - unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f); - unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); + unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f); + unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f); - float varA = center_buffer[2]; - float varB = center_buffer[5]; - int odd_sample = (sample+1)/2; - int even_sample = sample/2; + float varA = center_buffer[2]; + float varB = center_buffer[5]; + int odd_sample = (sample + 1) / 2; + int even_sample = sample / 2; - /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance - * update does not work efficiently with atomics in the kernel. */ - varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample); - varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample); + /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance + * update does not work efficiently with atomics in the kernel. */ + varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample); + varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample); - varA /= max(odd_sample - 1, 1); - varB /= max(even_sample - 1, 1); + varA /= max(odd_sample - 1, 1); + varB /= max(even_sample - 1, 1); - sampleVariance[idx] = 0.5f*(varA + varB) / sample; - sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample); - bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]); + sampleVariance[idx] = 0.5f * (varA + varB) / sample; + sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample); + bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * + (unfilteredA[idx] - unfilteredB[idx]); } /* Load a regular feature from the render buffers into the denoise buffer. @@ -80,55 +83,65 @@ ccl_device void kernel_filter_divide_shadow(int sample, */ ccl_device void kernel_filter_get_feature(int sample, CCL_FILTER_TILE_INFO, - int m_offset, int v_offset, - int x, int y, + int m_offset, + int v_offset, + int x, + int y, ccl_global float *mean, ccl_global float *variance, float scale, - int4 rect, int buffer_pass_stride, + int4 rect, + int buffer_pass_stride, int buffer_denoising_offset) { - int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2); - int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2); - int tile = ytile*3+xtile; - ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset; + int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); + int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); + int tile = ytile * 3 + xtile; + ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) + + (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) * + buffer_pass_stride + + buffer_denoising_offset; - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); - float val = scale * center_buffer[m_offset]; - mean[idx] = val; + float val = scale * center_buffer[m_offset]; + mean[idx] = val; - if(v_offset >= 0) { - if(sample > 1) { - /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance - * update does not work efficiently with atomics in the kernel. */ - variance[idx] = max(0.0f, (center_buffer[v_offset] - val*val*sample) / (sample * (sample-1))); - } - else { - /* Can't compute variance with single sample, just set it very high. */ - variance[idx] = 1e10f; - } - } + if (v_offset >= 0) { + if (sample > 1) { + /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance + * update does not work efficiently with atomics in the kernel. */ + variance[idx] = max( + 0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1))); + } + else { + /* Can't compute variance with single sample, just set it very high. */ + variance[idx] = 1e10f; + } + } } ccl_device void kernel_filter_write_feature(int sample, - int x, int y, + int x, + int y, int4 buffer_params, ccl_global float *from, ccl_global float *buffer, int out_offset, int4 rect) { - ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z; + ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) * + buffer_params.z; - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); - combined_buffer[out_offset] = from[idx]; + combined_buffer[out_offset] = from[idx]; } -ccl_device void kernel_filter_detect_outliers(int x, int y, +ccl_device void kernel_filter_detect_outliers(int x, + int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -136,123 +149,131 @@ ccl_device void kernel_filter_detect_outliers(int x, int y, int4 rect, int pass_stride) { - int buffer_w = align_up(rect.z - rect.x, 4); + int buffer_w = align_up(rect.z - rect.x, 4); - int n = 0; - float values[25]; - float pixel_variance, max_variance = 0.0f; - for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) { - for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) { - int idx = (y1-rect.y)*buffer_w + (x1-rect.x); - float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]); - color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - float L = average(color); + int n = 0; + float values[25]; + float pixel_variance, max_variance = 0.0f; + for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) { + for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) { + int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x); + float3 color = make_float3( + image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]); + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + float L = average(color); - /* Find the position of L. */ - int i; - for(i = 0; i < n; i++) { - if(values[i] > L) break; - } - /* Make space for L by shifting all following values to the right. */ - for(int j = n; j > i; j--) { - values[j] = values[j-1]; - } - /* Insert L. */ - values[i] = L; - n++; + /* Find the position of L. */ + int i; + for (i = 0; i < n; i++) { + if (values[i] > L) + break; + } + /* Make space for L by shifting all following values to the right. */ + for (int j = n; j > i; j--) { + values[j] = values[j - 1]; + } + /* Insert L. */ + values[i] = L; + n++; - float3 pixel_var = make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]); - float var = average(pixel_var); - if((x1 == x) && (y1 == y)) { - pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f)? -1.0f : var; - } - else { - max_variance = max(max_variance, var); - } - } - } + float3 pixel_var = make_float3( + variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]); + float var = average(pixel_var); + if ((x1 == x) && (y1 == y)) { + pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f : + var; + } + else { + max_variance = max(max_variance, var); + } + } + } - max_variance += 1e-4f; + max_variance += 1e-4f; - int idx = (y-rect.y)*buffer_w + (x-rect.x); - float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]); - color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - float L = average(color); + int idx = (y - rect.y) * buffer_w + (x - rect.x); + float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]); + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + float L = average(color); - float ref = 2.0f*values[(int)(n*0.75f)]; + float ref = 2.0f * values[(int)(n * 0.75f)]; - /* Slightly offset values to avoid false positives in (almost) black areas. */ - max_variance += 1e-5f; - ref -= 1e-5f; + /* Slightly offset values to avoid false positives in (almost) black areas. */ + max_variance += 1e-5f; + ref -= 1e-5f; - if(L > ref) { - /* The pixel appears to be an outlier. - * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel - * should actually be at the reference value: - * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. - * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. - */ + if (L > ref) { + /* The pixel appears to be an outlier. + * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel + * should actually be at the reference value: + * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. + * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. + */ - if(pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { - depth[idx] = -depth[idx]; - color *= ref/L; - variance[idx] = variance[idx + pass_stride] = variance[idx + 2*pass_stride] = max_variance; - } - else { - float stddev = sqrtf(pixel_variance); - if(L - 3*stddev < ref) { - /* The pixel is an outlier, so negate the depth value to mark it as one. - * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ - depth[idx] = -depth[idx]; - float fac = ref/L; - color *= fac; - variance[idx ] *= fac*fac; - variance[idx + pass_stride] *= fac*fac; - variance[idx+2*pass_stride] *= fac*fac; - } - } - } - out[idx ] = color.x; - out[idx + pass_stride] = color.y; - out[idx+2*pass_stride] = color.z; + if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { + depth[idx] = -depth[idx]; + color *= ref / L; + variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance; + } + else { + float stddev = sqrtf(pixel_variance); + if (L - 3 * stddev < ref) { + /* The pixel is an outlier, so negate the depth value to mark it as one. + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + depth[idx] = -depth[idx]; + float fac = ref / L; + color *= fac; + variance[idx] *= fac * fac; + variance[idx + pass_stride] *= fac * fac; + variance[idx + 2 * pass_stride] *= fac * fac; + } + } + } + out[idx] = color.x; + out[idx + pass_stride] = color.y; + out[idx + 2 * pass_stride] = color.z; } /* Combine A/B buffers. * Calculates the combined mean and the buffer variance. */ -ccl_device void kernel_filter_combine_halves(int x, int y, +ccl_device void kernel_filter_combine_halves(int x, + int y, ccl_global float *mean, ccl_global float *variance, ccl_global float *a, ccl_global float *b, - int4 rect, int r) + int4 rect, + int r) { - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); - if(mean) mean[idx] = 0.5f * (a[idx]+b[idx]); - if(variance) { - if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]); - else { - variance[idx] = 0.0f; - float values[25]; - int numValues = 0; - for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) { - for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) { - int pidx = (py-rect.y)*buffer_w + (px-rect.x); - values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]); - } - } - /* Insertion-sort the variances (fast enough for 25 elements). */ - for(int i = 1; i < numValues; i++) { - float v = values[i]; - int j; - for(j = i-1; j >= 0 && values[j] > v; j--) - values[j+1] = values[j]; - values[j+1] = v; - } - variance[idx] = values[(7*numValues)/8]; - } - } + if (mean) + mean[idx] = 0.5f * (a[idx] + b[idx]); + if (variance) { + if (r == 0) + variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]); + else { + variance[idx] = 0.0f; + float values[25]; + int numValues = 0; + for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) { + for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) { + int pidx = (py - rect.y) * buffer_w + (px - rect.x); + values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]); + } + } + /* Insertion-sort the variances (fast enough for 25 elements). */ + for (int i = 1; i < numValues; i++) { + float v = values[i]; + int j; + for (j = i - 1; j >= 0 && values[j] > v; j--) + values[j + 1] = values[j]; + values[j + 1] = v; + } + variance[idx] = values[(7 * numValues) / 8]; + } + } } CCL_NAMESPACE_END |