EEVEE-Next: Depth Of Field: Improve Temporal stability

This implement a full TAA pass on the depth of field input. An history buffer is kept for each view needing Depth of field. This uses a swap with a `TextureFromPool` in order to not always 2 textures allocated. Since this uses luma weighting without any input, the firefly parameter is now obsolete and has been removed. There is some tiny difference with the Film TAA so the implementation is mostly copy pasted. Also this implementation uses a LDS cache to speedup the TAA computations.
author: Clément Foucault <foucault.clem@gmail.com> 2022-08-04 13:33:43 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2022-08-05 15:45:09 +0300
commit: 49d85dc8b5d8056b226a33dfe01b7af0e4067ee1 (patch)
tree: bf9c05eb19ef2b2ceed0c95b89cd2d98253e98aa /source/blender/draw/engines/eevee_next/shaders
parent: 8659e62d1e6371c115f2b5fdf7f82b70db73d720 (diff)
6 files changed, 410 insertions, 80 deletions
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
new file mode 100644
index 00000000000..d5fdaae6fc1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
@@ -0,0 +1,37 @@
+
+/* -------------------------------------------------------------------- */
+/** \name YCoCg
+ * \{ */
+
+vec3 colorspace_YCoCg_from_scene_linear(vec3 rgb_color)
+{
+  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
+                                            vec3(2, 0, -2),    /* Co */
+                                            vec3(-1, 2, -1))); /* Cg */
+  return colorspace_tx * rgb_color;
+}
+
+vec4 colorspace_YCoCg_from_scene_linear(vec4 rgba_color)
+{
+  return vec4(colorspace_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
+}
+
+vec3 colorspace_scene_linear_from_YCoCg(vec3 ycocg_color)
+{
+  float Y = ycocg_color.x;
+  float Co = ycocg_color.y;
+  float Cg = ycocg_color.z;
+
+  vec3 rgb_color;
+  rgb_color.r = Y + Co - Cg;
+  rgb_color.g = Y + Cg;
+  rgb_color.b = Y - Co - Cg;
+  return rgb_color * 0.25;
+}
+
+vec4 colorspace_scene_linear_from_YCoCg(vec4 ycocg_color)
+{
+  return vec4(colorspace_scene_linear_from_YCoCg(ycocg_color.rgb), ycocg_color.a);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
index 88ecaab6a00..bf7c9413da3 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
@@ -15,8 +15,9 @@ struct FilterSample {
 /** \name Pixel cache.
  * \{ */
 
-shared vec4 color_cache[10][10];
-shared float weight_cache[10][10];
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float weight_cache[cache_size][cache_size];
 
 void cache_init()
 {
@@ -40,11 +41,12 @@ void cache_init()
    */
 
   ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1;
-  for (int y = 0; y < 2; y++) {
-    for (int x = 0; x < 2; x++) {
-      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(5)))) {
-        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + ivec2(x, y) * 5;
-        ivec2 load_texel = clamp(texel + ivec2(x, y) * 5, ivec2(0), textureSize(color_tx, 0) - 1);
+  if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+    for (int y = 0; y < 2; y++) {
+      for (int x = 0; x < 2; x++) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset, ivec2(0), textureSize(color_tx, 0) - 1);
 
         color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0);
         weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r;
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
index 88a577a1c3c..622b545357e 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
@@ -73,9 +73,10 @@ float fast_luma(vec3 color)
   return (2.0 * color.g) + color.r + color.b;
 }
 
-shared vec4 color_cache[8][8];
-shared float coc_cache[8][8];
-shared float do_scatter[8][8];
+const uint cache_size = gl_WorkGroupSize.x;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+shared float do_scatter[cache_size][cache_size];
 
 void main()
 {
@@ -200,9 +201,9 @@ void main()
   imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);
 
   /* Recursive downsample. */
-  for (uint i = 1u; i < DOF_MIP_MAX; i++) {
+  for (uint i = 1u; i < DOF_MIP_COUNT; i++) {
     barrier();
-    if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_MAX - 1u - i))))) {
+    if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_COUNT - 1u - i))))) {
       uvec2 texel_local = gl_LocalInvocationID.xy << i;
 
       /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
index ac371f76395..254cacc45b7 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
@@ -2,8 +2,11 @@
 /**
  * Temporal Stabilization of the Depth of field input.
  * Corresponds to the TAA pass in the paper.
+ * We actually duplicate the TAA logic but with a few changes:
+ * - We run this pass at half resolution.
+ * - We store CoC instead of Opacity in the alpha channel of the history.
  *
- * TODO: This pass needs a cleanup / improvement using much better TAA.
+ * This is and adaption of the code found in eevee_film_lib.glsl
  *
  * Inputs:
  * - Output of setup pass (halfres).
@@ -11,54 +14,362 @@
  * - Stabilized Color and CoC (halfres).
  **/
 
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
 
-float fast_luma(vec3 color)
+struct DofSample {
+  vec4 color;
+  float coc;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name LDS Cache
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+/* Need 2 pixel border for depth. */
+const uint cache_depth_size = gl_WorkGroupSize.x + 4;
+shared float depth_cache[cache_depth_size][cache_depth_size];
+
+void dof_cache_init()
+{
+  /**
+   * Load enough values into LDS to perform the filter.
+   *
+   * ┌──────────────────────────────┐
+   * │                              │  < Border texels that needs to be loaded.
+   * │    x  x  x  x  x  x  x  x    │  ─┐
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │ Thread Group Size 8x8.
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │  ─┘
+   * │ L  L  L  L  L                │  < Border texels that needs to be loaded.
+   * └──────────────────────────────┘
+   *   └───────────┘
+   *    Load using 5x5 threads.
+   */
+
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  for (int y = 0; y < 2; y++) {
+    for (int x = 0; x < 2; x++) {
+      /* 1 Pixel border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1);
+
+        vec4 color = texelFetch(color_tx, load_texel, 0);
+        color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color);
+        coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x;
+      }
+      /* 2 Pixels border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        /* Depth is fullres. Load every 2 pixels. */
+        ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1);
+
+        depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x;
+      }
+    }
+  }
+  barrier();
+}
+
+/* Note: Sample color space is already in YCoCg space. */
+DofSample dof_fetch_input_sample(ivec2 offset)
+{
+  ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy);
+  return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]);
+}
+
+float dof_fetch_half_depth(ivec2 offset)
+{
+  ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy);
+  return depth_cache[coord.y][coord.x];
+}
+
+/** \} */
+
+float dof_luma_weight(float luma)
 {
-  return (2.0 * color.g) + color.r + color.b;
+  /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+  /* To preserve more details in dark areas, we use a bigger bias. */
+  const float exposure_scale = 1.0; /* TODO. */
+  return 1.0 / (4.0 + luma * exposure_scale);
 }
 
-/* Lightweight version of neighborhood clamping found in TAA. */
-vec3 dof_neighborhood_clamping(vec3 color)
+float dof_bilateral_weight(float reference_coc, float sample_coc)
 {
-  vec2 texel_size = 1.0 / vec2(textureSize(color_tx, 0));
-  vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) * texel_size;
-  vec4 ofs = vec4(-1, 1, -1, 1) * texel_size.xxyy;
+  /* NOTE: The difference between the cocs should be inside a abs() function,
+   * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19).
+   * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */
+  return saturate(1.0 - (reference_coc - sample_coc) / max(1.0, abs(reference_coc)));
+}
 
-  /* Luma clamping. 3x3 square neighborhood. */
-  float c00 = fast_luma(textureLod(color_tx, uv + ofs.xz, 0.0).rgb);
-  float c01 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(1.0, 0.0), 0.0).rgb);
-  float c02 = fast_luma(textureLod(color_tx, uv + ofs.xw, 0.0).rgb);
+DofSample dof_spatial_filtering()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  DofSample center = dof_fetch_input_sample(ivec2(0));
+  DofSample accum = DofSample(vec4(0.0), 0.0);
+  float accum_weight = 0.0;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) *
+                   dof_bilateral_weight(center.coc, samp.coc);
 
-  float c10 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(0.0, 1.0), 0.0).rgb);
-  float c11 = fast_luma(color);
-  float c12 = fast_luma(textureLod(color_tx, uv + ofs.xw * vec2(0.0, 1.0), 0.0).rgb);
+    accum.color += samp.color * weight;
+    accum.coc += samp.coc * weight;
+    accum_weight += weight;
+  }
+  /* Accumulate center sample last as it does not need bilateral_weights. */
+  float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x);
+  accum.color += center.color * weight;
+  accum.coc += center.coc * weight;
+  accum_weight += weight;
 
-  float c20 = fast_luma(textureLod(color_tx, uv + ofs.yz, 0.0).rgb);
-  float c21 = fast_luma(textureLod(color_tx, uv + ofs.yz * vec2(1.0, 0.0), 0.0).rgb);
-  float c22 = fast_luma(textureLod(color_tx, uv + ofs.yw, 0.0).rgb);
+  float rcp_weight = 1.0 / accum_weight;
+  accum.color *= rcp_weight;
+  accum.coc *= rcp_weight;
+  return accum;
+}
 
-  float avg_luma = avg8(c00, c01, c02, c10, c12, c20, c21, c22);
-  float max_luma = max8(c00, c01, c02, c10, c12, c20, c21, c22);
+struct DofNeighborhoodMinMax {
+  DofSample min;
+  DofSample max;
+};
 
-  float upper_bound = mix(max_luma, avg_luma, dof_buf.denoise_factor);
-  upper_bound = mix(c11, upper_bound, dof_buf.denoise_factor);
+/* Return history clipping bounding box in YCoCg color space. */
+DofNeighborhoodMinMax dof_neighbor_boundbox()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  /**
+   * Simple bounding box calculation in YCoCg as described in:
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+   */
+  DofSample min_c = dof_fetch_input_sample(ivec2(0));
+  DofSample max_c = min_c;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    min_c.color = min(min_c.color, samp.color);
+    max_c.color = max(max_c.color, samp.color);
+    min_c.coc = min(min_c.coc, samp.coc);
+    max_c.coc = max(max_c.coc, samp.coc);
+  }
+  /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+   * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+  DofSample min_c_3x3 = min_c;
+  DofSample max_c_3x3 = max_c;
+  const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(corners[i]);
+    min_c_3x3.color = min(min_c_3x3.color, samp.color);
+    max_c_3x3.color = max(max_c_3x3.color, samp.color);
+    min_c_3x3.coc = min(min_c_3x3.coc, samp.coc);
+    max_c_3x3.coc = max(max_c_3x3.coc, samp.coc);
+  }
+  min_c.color = (min_c.color + min_c_3x3.color) * 0.5;
+  max_c.color = (max_c.color + max_c_3x3.color) * 0.5;
+  min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5;
+  max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5;
 
-  float clamped_luma = min(upper_bound, c11);
+  return DofNeighborhoodMinMax(min_c, max_c);
+}
 
-  return color * clamped_luma * safe_rcp(c11);
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
+{
+  /**
+   * Dilate velocity by using the nearest pixel in a cross pattern.
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+   */
+  const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+  float min_depth = dof_fetch_half_depth(ivec2(0));
+  ivec2 nearest_texel = ivec2(0);
+  for (int i = 0; i < 4; i++) {
+    float depth = dof_fetch_half_depth(corners[i]);
+    if (min_depth > depth) {
+      min_depth = depth;
+      nearest_texel = corners[i];
+    }
+  }
+  /* Convert to full resolution buffer pixel. */
+  ivec2 velocity_texel = (texel_sample + nearest_texel) * 2;
+  velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1);
+  vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth);
+  /* Transform to **half** pixel space. */
+  return vector.xy * vec2(textureSize(color_tx, 0));
+}
+
+/* Load color using a special filter to avoid loosing detail.
+ * \a texel is sample position with subpixel accuracy. */
+DofSample dof_sample_history(vec2 input_texel)
+{
+#if 1 /* Bilinar. */
+  vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0);
+  vec4 color = textureLod(in_history_tx, uv, 0.0);
+
+#elif 0 /* Catmull Rom interpolation. 5 Bilinear Taps. */
+  vec2 center_texel;
+  vec2 inter_texel = modf(input_texel, center_texel);
+  vec2 weights[4];
+  film_get_catmull_rom_weights(inter_texel, weights);
+
+  /**
+   * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+   * corner taps.
+   * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+   * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+   */
+  center_texel += 0.5;
+
+  /* Slide 92. */
+  vec2 weight_12 = weights[1] + weights[2];
+  vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+  vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+  vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+  vec4 color;
+  vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+  float weight_center = weight_12.x * weight_12.y;
+
+  color = textureLod(in_history_tx, uv_12, 0.0) * weight_center;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+  color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+  color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+  /* Re-normalize for the removed corners. */
+  color /= (weight_center + sum(weight_cross));
+#endif
+  /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */
+  return DofSample(color.xyzz, color.w);
+}
+
+/* 1D equivalent of line_aabb_clipping_dist(). */
+float dof_aabb_clipping_dist_coc(float origin, float direction, float aabb_min, float aabb_max)
+{
+  if (abs(direction) < 1e-5) {
+    return 0.0;
+  }
+  float nearest_plane = (direction > 0.0) ? aabb_min : aabb_max;
+  return (nearest_plane - origin) / direction;
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src)
+{
+  /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+  DofSample clip_dir;
+  clip_dir.color = src.color - history.color;
+  clip_dir.coc = src.coc - history.coc;
+
+  float t = line_aabb_clipping_dist(
+      history.color.rgb, clip_dir.color.rgb, bbox.min.color.rgb, bbox.max.color.rgb);
+  history.color.rgb += clip_dir.color.rgb * saturate(t);
+
+  /* Clip CoC on its own to avoid interference with other chanels. */
+  float t_a = dof_aabb_clipping_dist_coc(history.coc, clip_dir.coc, bbox.min.coc, bbox.max.coc);
+  history.coc += clip_dir.coc * saturate(t_a);
+
+  return history;
+}
+
+float dof_history_blend_factor(
+    float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst)
+{
+  float luma_min = bbox.min.color.x;
+  float luma_max = bbox.max.color.x;
+  float luma_incoming = src.color.x;
+  float luma_history = dst.color.x;
+
+  /* 5% of incoming color by default. */
+  float blend = 0.05;
+  /* Blend less history if the pixel has substential velocity. */
+  /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */
+  blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0));
+  /**
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+   * Bias towards history if incomming pixel is near clamping. Reduces flicker.
+   */
+  float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+  /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+  distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+  /* Linearly blend when history gets bellow to 25% of the bbox size. */
+  blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+  /* Progressively discard history until history CoC is twice as big as the filtered CoC.
+   * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to
+   * dilate thin features like hair (slide 19). */
+  float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc)));
+  blend = mix(blend, 1.0, coc_diff_ratio);
+  /* Discard out of view history. */
+  if (any(lessThan(texel, vec2(0))) ||
+      any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) {
+    blend = 1.0;
+  }
+  /* Discard history if invalid. */
+  if (use_history == false) {
+    blend = 1.0;
+  }
+  return blend;
 }
 
 void main()
 {
-  vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(textureSize(color_tx, 0).xy);
-  vec4 out_color = textureLod(color_tx, uv, 0.0);
-  float out_coc = textureLod(coc_tx, uv, 0.0).r;
+  dof_cache_init();
+
+  ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /**
+   * Naming convention is taken from the film implementation.
+   * SRC is incoming new data.
+   * DST is history data.
+   */
+  DofSample src = dof_spatial_filtering();
+
+  /* Reproject by finding where this pixel was in the previous frame. */
+  vec2 motion = dof_pixel_history_motion_vector(src_texel);
+  vec2 history_texel = vec2(src_texel) + motion;
+
+  float velocity = length(motion);
+
+  DofSample dst = dof_sample_history(history_texel);
+
+  /* Get local color bounding box of source neighboorhood. */
+  DofNeighborhoodMinMax bbox = dof_neighbor_boundbox();
+
+  float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst);
+
+  dst = dof_amend_history(bbox, dst, src);
+
+  /* Luma weighted blend to reduce flickering. */
+  float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend);
+  float weight_src = dof_luma_weight(src.color.x) * (blend);
+
+  DofSample result;
+  /* Weighted blend. */
+  result.color = vec4(dst.color.rgb, dst.coc) * weight_dst +
+                 vec4(src.color.rgb, src.coc) * weight_src;
+  result.color /= weight_src + weight_dst;
+
+  /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */
+  imageStore(out_history_img, src_texel, result.color);
+
+  /* Un-swizzle. */
+  result.coc = result.color.a;
+  /* Clamp opacity since we don't store it in history. */
+  result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a);
 
-  out_color.rgb = dof_neighborhood_clamping(out_color.rgb);
-  /* TODO(fclem): Stabilize CoC. */
+  result.color = colorspace_scene_linear_from_YCoCg(result.color);
 
-  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
-  imageStore(out_color_img, out_texel, out_color);
-  imageStore(out_coc_img, out_texel, vec4(out_coc));
+  imageStore(out_color_img, src_texel, result.color);
+  imageStore(out_coc_img, src_texel, vec4(result.coc));
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
index 08027f2ef6c..bf6293d5561 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@@ -7,6 +7,7 @@
 #pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
 
 /* Return scene linear Z depth from the camera or radial depth for panoramic cameras. */
 float film_depth_convert_to_scene(float depth)
@@ -18,32 +19,6 @@ float film_depth_convert_to_scene(float depth)
   return abs(get_view_z_from_depth(depth));
 }
 
-vec3 film_YCoCg_from_scene_linear(vec3 rgb_color)
-{
-  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
-                                            vec3(2, 0, -2),    /* Co */
-                                            vec3(-1, 2, -1))); /* Cg */
-  return colorspace_tx * rgb_color;
-}
-
-vec4 film_YCoCg_from_scene_linear(vec4 rgba_color)
-{
-  return vec4(film_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
-}
-
-vec3 film_scene_linear_from_YCoCg(vec3 ycocg_color)
-{
-  float Y = ycocg_color.x;
-  float Co = ycocg_color.y;
-  float Cg = ycocg_color.z;
-
-  vec3 rgb_color;
-  rgb_color.r = Y + Co - Cg;
-  rgb_color.g = Y + Cg;
-  rgb_color.b = Y - Co - Cg;
-  return rgb_color * 0.25;
-}
-
 /* Load a texture sample in a specific format. Combined pass needs to use this. */
 vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
 {
@@ -51,7 +26,7 @@ vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
   /* Convert transmittance to opacity. */
   color.a = saturate(1.0 - color.a);
   /* Transform to YCoCg for accumulation. */
-  color.rgb = film_YCoCg_from_scene_linear(color.rgb);
+  color.rgb = colorspace_YCoCg_from_scene_linear(color.rgb);
   return color;
 }
 
@@ -220,7 +195,7 @@ vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
   float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
   ivec2 nearest_texel = texel_sample;
   for (int i = 0; i < 4; i++) {
-    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy);
+    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy - 1);
     float depth = texelFetch(depth_tx, texel, 0).x;
     if (min_depth > depth) {
       min_depth = depth;
@@ -455,7 +430,7 @@ void film_store_combined(
     // dst.weight = film_weight_load(texel_combined);
 
     color_dst = film_sample_catmull_rom(in_combined_tx, history_texel);
-    color_dst.rgb = film_YCoCg_from_scene_linear(color_dst.rgb);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
 
     /* Get local color bounding box of source neighborhood. */
     vec4 min_color, max_color;
@@ -473,7 +448,7 @@ void film_store_combined(
   else {
     /* Everything is static. Use render accumulation. */
     color_dst = texelFetch(in_combined_tx, dst.texel, 0);
-    color_dst.rgb = film_YCoCg_from_scene_linear(color_dst.rgb);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
 
     /* Luma weighted blend to avoid flickering. */
     weight_dst = film_luma_weight(color_dst.x) * dst.weight;
@@ -483,7 +458,7 @@ void film_store_combined(
   color = color_dst * weight_dst + color_src * weight_src;
   color /= weight_src + weight_dst;
 
-  color.rgb = film_scene_linear_from_YCoCg(color.rgb);
+  color.rgb = colorspace_scene_linear_from_YCoCg(color.rgb);
 
   /* Fix alpha not accumulating to 1 because of float imprecision. */
   if (color.a > 0.995) {
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
index c95c0877c88..1dd9178ae84 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
@@ -29,14 +29,18 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
 
 GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
     .do_static_compilation(true)
-    .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
-    .additional_info("eevee_shared", "draw_view")
-    .uniform_buf(1, "DepthOfFieldData", "dof_buf")
-    .sampler(0, ImageType::DEPTH_2D, "coc_tx")
+    .local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+    .uniform_buf(4, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::FLOAT_2D, "coc_tx")
     .sampler(1, ImageType::FLOAT_2D, "color_tx")
-    // .sampler(2, ImageType::FLOAT_2D, "velocity_tx") /* TODO: TAA with reprojection. */
+    .sampler(2, ImageType::FLOAT_2D, "velocity_tx")
+    .sampler(3, ImageType::FLOAT_2D, "in_history_tx")
+    .sampler(4, ImageType::DEPTH_2D, "depth_tx")
+    .push_constant(Type::BOOL, "use_history")
     .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
     .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_history_img")
     .compute_source("eevee_depth_of_field_stabilize_comp.glsl");
 
 GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample)
author	Clément Foucault <foucault.clem@gmail.com>	2022-08-04 13:33:43 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2022-08-05 15:45:09 +0300
commit	49d85dc8b5d8056b226a33dfe01b7af0e4067ee1 (patch)
tree	bf9c05eb19ef2b2ceed0c95b89cd2d98253e98aa /source/blender/draw/engines/eevee_next/shaders
parent	8659e62d1e6371c115f2b5fdf7f82b70db73d720 (diff)