diff options
Diffstat (limited to 'source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl')
-rw-r--r-- | source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl new file mode 100644 index 00000000000..46a25b84840 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl @@ -0,0 +1,367 @@ + +/** + * Temporal Stabilization of the Depth of field input. + * Corresponds to the TAA pass in the paper. + * We actually duplicate the TAA logic but with a few changes: + * - We run this pass at half resolution. + * - We store CoC instead of Opacity in the alpha channel of the history. + * + * This is and adaption of the code found in eevee_film_lib.glsl + * + * Inputs: + * - Output of setup pass (halfres). + * Outputs: + * - Stabilized Color and CoC (halfres). + **/ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) + +struct DofSample { + vec4 color; + float coc; +}; + +/* -------------------------------------------------------------------- */ +/** \name LDS Cache + * \{ */ + +const uint cache_size = gl_WorkGroupSize.x + 2; +shared vec4 color_cache[cache_size][cache_size]; +shared float coc_cache[cache_size][cache_size]; +/* Need 2 pixel border for depth. */ +const uint cache_depth_size = gl_WorkGroupSize.x + 4; +shared float depth_cache[cache_depth_size][cache_depth_size]; + +void dof_cache_init() +{ + /** + * Load enough values into LDS to perform the filter. + * + * ┌──────────────────────────────┐ + * │ │ < Border texels that needs to be loaded. + * │ x x x x x x x x │ ─┐ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ Thread Group Size 8x8. + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ ─┘ + * │ L L L L L │ < Border texels that needs to be loaded. + * └──────────────────────────────┘ + * └───────────┘ + * Load using 5x5 threads. + */ + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + /* 1 Pixel border. */ + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) { + ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u); + ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset; + ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1); + + vec4 color = texelFetch(color_tx, load_texel, 0); + color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color); + coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x; + } + /* 2 Pixels border. */ + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) { + ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u); + ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset; + /* Depth is fullres. Load every 2 pixels. */ + ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1); + + depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x; + } + } + } + barrier(); +} + +/* NOTE: Sample color space is already in YCoCg space. */ +DofSample dof_fetch_input_sample(ivec2 offset) +{ + ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy); + return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]); +} + +float dof_fetch_half_depth(ivec2 offset) +{ + ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy); + return depth_cache[coord.y][coord.x]; +} + +/** \} */ + +float dof_luma_weight(float luma) +{ + /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */ + /* To preserve more details in dark areas, we use a bigger bias. */ + const float exposure_scale = 1.0; /* TODO. */ + return 1.0 / (4.0 + luma * exposure_scale); +} + +float dof_bilateral_weight(float reference_coc, float sample_coc) +{ + /* NOTE: The difference between the cocs should be inside a abs() function, + * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). + * Effectively bleed background into foreground. + * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */ + return saturate(1.0 - (sample_coc - reference_coc) / max(1.0, abs(reference_coc))); +} + +DofSample dof_spatial_filtering() +{ + /* Plus (+) shape offsets. */ + const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); + DofSample center = dof_fetch_input_sample(ivec2(0)); + DofSample accum = DofSample(vec4(0.0), 0.0); + float accum_weight = 0.0; + for (int i = 0; i < 4; i++) { + DofSample samp = dof_fetch_input_sample(plus_offsets[i]); + float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) * + dof_bilateral_weight(center.coc, samp.coc); + + accum.color += samp.color * weight; + accum.coc += samp.coc * weight; + accum_weight += weight; + } + /* Accumulate center sample last as it does not need bilateral_weights. */ + float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x); + accum.color += center.color * weight; + accum.coc += center.coc * weight; + accum_weight += weight; + + float rcp_weight = 1.0 / accum_weight; + accum.color *= rcp_weight; + accum.coc *= rcp_weight; + return accum; +} + +struct DofNeighborhoodMinMax { + DofSample min; + DofSample max; +}; + +/* Return history clipping bounding box in YCoCg color space. */ +DofNeighborhoodMinMax dof_neighbor_boundbox() +{ + /* Plus (+) shape offsets. */ + const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); + /** + * Simple bounding box calculation in YCoCg as described in: + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 + */ + DofSample min_c = dof_fetch_input_sample(ivec2(0)); + DofSample max_c = min_c; + for (int i = 0; i < 4; i++) { + DofSample samp = dof_fetch_input_sample(plus_offsets[i]); + min_c.color = min(min_c.color, samp.color); + max_c.color = max(max_c.color, samp.color); + min_c.coc = min(min_c.coc, samp.coc); + max_c.coc = max(max_c.coc, samp.coc); + } + /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts. + * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */ + DofSample min_c_3x3 = min_c; + DofSample max_c_3x3 = max_c; + const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); + for (int i = 0; i < 4; i++) { + DofSample samp = dof_fetch_input_sample(corners[i]); + min_c_3x3.color = min(min_c_3x3.color, samp.color); + max_c_3x3.color = max(max_c_3x3.color, samp.color); + min_c_3x3.coc = min(min_c_3x3.coc, samp.coc); + max_c_3x3.coc = max(max_c_3x3.coc, samp.coc); + } + min_c.color = (min_c.color + min_c_3x3.color) * 0.5; + max_c.color = (max_c.color + max_c_3x3.color) * 0.5; + min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5; + max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5; + + return DofNeighborhoodMinMax(min_c, max_c); +} + +/* Returns motion in pixel space to retrieve the pixel history. */ +vec2 dof_pixel_history_motion_vector(ivec2 texel_sample) +{ + /** + * Dilate velocity by using the nearest pixel in a cross pattern. + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27) + */ + const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); + float min_depth = dof_fetch_half_depth(ivec2(0)); + ivec2 nearest_texel = ivec2(0); + for (int i = 0; i < 4; i++) { + float depth = dof_fetch_half_depth(corners[i]); + if (min_depth > depth) { + min_depth = depth; + nearest_texel = corners[i]; + } + } + /* Convert to full resolution buffer pixel. */ + ivec2 velocity_texel = (texel_sample + nearest_texel) * 2; + velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1); + vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth); + /* Transform to **half** pixel space. */ + return vector.xy * vec2(textureSize(color_tx, 0)); +} + +/* Load color using a special filter to avoid losing detail. + * \a texel is sample position with subpixel accuracy. */ +DofSample dof_sample_history(vec2 input_texel) +{ +#if 1 /* Bilinar. */ + vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0); + vec4 color = textureLod(in_history_tx, uv, 0.0); + +#else /* Catmull Rom interpolation. 5 Bilinear Taps. */ + vec2 center_texel; + vec2 inter_texel = modf(input_texel, center_texel); + vec2 weights[4]; + film_get_catmull_rom_weights(inter_texel, weights); + + /** + * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing + * corner taps. + * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016 + * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx + */ + center_texel += 0.5; + + /* Slide 92. */ + vec2 weight_12 = weights[1] + weights[2]; + vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv; + vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv; + vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv; + + vec4 color; + vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy); + float weight_center = weight_12.x * weight_12.y; + + color = textureLod(in_history_tx, uv_12, 0.0) * weight_center; + color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x; + color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y; + color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z; + color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w; + /* Re-normalize for the removed corners. */ + color /= (weight_center + sum(weight_cross)); +#endif + /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */ + return DofSample(color.xyzz, color.w); +} + +/* Modulate the history color to avoid ghosting artifact. */ +DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src) +{ +#if 0 + /* Clip instead of clamping to avoid color accumulating in the AABB corners. */ + vec3 clip_dir = src.color.rgb - history.color.rgb; + + float t = line_aabb_clipping_dist( + history.color.rgb, clip_dir, bbox.min.color.rgb, bbox.max.color.rgb); + history.color.rgb += clip_dir * saturate(t); +#else + /* More responsive. */ + history.color = clamp(history.color, bbox.min.color, bbox.max.color); +#endif + /* Clamp CoC to reduce convergence time. Otherwise the result is laggy. */ + history.coc = clamp(history.coc, bbox.min.coc, bbox.max.coc); + + return history; +} + +float dof_history_blend_factor( + float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst) +{ + float luma_min = bbox.min.color.x; + float luma_max = bbox.max.color.x; + float luma_incoming = src.color.x; + float luma_history = dst.color.x; + + /* 5% of incoming color by default. */ + float blend = 0.05; + /* Blend less history if the pixel has substantial velocity. */ + /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */ + blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0)); + /** + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43) + * Bias towards history if incoming pixel is near clamping. Reduces flicker. + */ + float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history)); + /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */ + distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min); + /* Linearly blend when history gets below to 25% of the bbox size. */ + blend *= saturate(distance_to_luma_clip * 4.0 + 0.1); + /* Progressively discard history until history CoC is twice as big as the filtered CoC. + * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to + * dilate thin features like hair (slide 19). */ + float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc))); + blend = mix(blend, 1.0, coc_diff_ratio); + /* Discard out of view history. */ + if (any(lessThan(texel, vec2(0))) || + any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) { + blend = 1.0; + } + /* Discard history if invalid. */ + if (use_history == false) { + blend = 1.0; + } + return blend; +} + +void main() +{ + dof_cache_init(); + + ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy); + + /** + * Naming convention is taken from the film implementation. + * SRC is incoming new data. + * DST is history data. + */ + DofSample src = dof_spatial_filtering(); + + /* Reproject by finding where this pixel was in the previous frame. */ + vec2 motion = dof_pixel_history_motion_vector(src_texel); + vec2 history_texel = vec2(src_texel) + motion; + + float velocity = length(motion); + + DofSample dst = dof_sample_history(history_texel); + + /* Get local color bounding box of source neighborhood. */ + DofNeighborhoodMinMax bbox = dof_neighbor_boundbox(); + + float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst); + + dst = dof_amend_history(bbox, dst, src); + + /* Luma weighted blend to reduce flickering. */ + float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend); + float weight_src = dof_luma_weight(src.color.x) * (blend); + + DofSample result; + /* Weighted blend. */ + result.color = vec4(dst.color.rgb, dst.coc) * weight_dst + + vec4(src.color.rgb, src.coc) * weight_src; + result.color /= weight_src + weight_dst; + + /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */ + imageStore(out_history_img, src_texel, result.color); + + /* Un-swizzle. */ + result.coc = result.color.a; + /* Clamp opacity since we don't store it in history. */ + result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a); + + result.color = colorspace_scene_linear_from_YCoCg(result.color); + + imageStore(out_color_img, src_texel, result.color); + imageStore(out_coc_img, src_texel, vec4(result.coc)); +} |