EEVEE-Next: Depth Of Field: Improve Temporal stability

This implement a full TAA pass on the depth of field input. An history buffer is kept for each view needing Depth of field. This uses a swap with a `TextureFromPool` in order to not always 2 textures allocated. Since this uses luma weighting without any input, the firefly parameter is now obsolete and has been removed. There is some tiny difference with the Film TAA so the implementation is mostly copy pasted. Also this implementation uses a LDS cache to speedup the TAA computations.
author: Clément Foucault <foucault.clem@gmail.com> 2022-08-04 13:33:43 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2022-08-05 15:45:09 +0300
commit: 49d85dc8b5d8056b226a33dfe01b7af0e4067ee1 (patch)
tree: bf9c05eb19ef2b2ceed0c95b89cd2d98253e98aa /source/blender/draw/engines
parent: 8659e62d1e6371c115f2b5fdf7f82b70db73d720 (diff)
12 files changed, 506 insertions, 110 deletions
diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh
index 8240af14203..2067d1c708c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -57,9 +57,10 @@
 #define DOF_TILES_DILATE_GROUP_SIZE 8
 #define DOF_BOKEH_LUT_SIZE 32
 #define DOF_MAX_SLIGHT_FOCUS_RADIUS 5
-#define DOF_REDUCE_GROUP_SIZE 8
+#define DOF_MIP_COUNT 4
+#define DOF_REDUCE_GROUP_SIZE (1 << (DOF_MIP_COUNT - 1))
 #define DOF_DEFAULT_GROUP_SIZE 32
+#define DOF_STABILIZE_GROUP_SIZE 16
 #define DOF_FILTER_GROUP_SIZE 8
 #define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE
 #define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)
-#define DOF_MIP_MAX 4
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
index 69f06da1782..de99a83b993 100644
--- a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
@@ -62,7 +62,6 @@ void DepthOfField::init()
   update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size);
   update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold);
   update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max);
-  update += assign_if_different(data_.denoise_factor, sce_eevee.bokeh_denoise_fac);
   update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades));
   if (update > 0) {
     inst_.sampling.reset();
@@ -162,18 +161,15 @@ void DepthOfField::sync()
   /* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */
   int2 max_render_res = inst_.film.render_extent_get();
   int2 half_res = math::divide_ceil(max_render_res, int2(2));
-  int2 reduce_size = math::ceil_to_multiple(half_res, int2(1 < (DOF_MIP_MAX - 1)));
+  int2 reduce_size = math::ceil_to_multiple(half_res, int2(DOF_REDUCE_GROUP_SIZE));
 
   data_.gather_uv_fac = 1.0f / float2(reduce_size);
 
   /* Now that we know the maximum render resolution of every view, using depth of field, allocate
    * the reduced buffers. Color needs to be signed format here. See note in shader for
    * explanation. Do not use texture pool because of needs mipmaps. */
-  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_MAX);
-  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_MAX);
-  GPU_texture_wrap_mode(reduced_color_tx_, false, false);
-  GPU_texture_wrap_mode(reduced_coc_tx_, false, false);
-
+  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_COUNT);
+  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_COUNT);
   reduced_color_tx_.ensure_mip_views();
   reduced_coc_tx_.ensure_mip_views();
 
@@ -276,16 +272,28 @@ void DepthOfField::setup_pass_sync()
 
 void DepthOfField::stabilize_pass_sync()
 {
+  RenderBuffers &render_buffers = inst_.render_buffers;
+  VelocityModule &velocity = inst_.velocity;
+
   stabilize_ps_ = DRW_pass_create("Dof.stabilize_ps_", DRW_STATE_NO_DRAW);
   GPUShader *sh = inst_.shaders.static_shader_get(DOF_STABILIZE);
   DRWShadingGroup *grp = DRW_shgroup_create(sh, stabilize_ps_);
+  DRW_shgroup_uniform_block_ref(grp, "camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
+  DRW_shgroup_uniform_block_ref(grp, "camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
+  /* This is only for temporal stability. The next step is not needed. */
+  DRW_shgroup_uniform_block_ref(grp, "camera_next", &(*velocity.camera_steps[STEP_PREVIOUS]));
   DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter);
   DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &setup_color_tx_, no_filter);
+  DRW_shgroup_uniform_texture_ref_ex(grp, "velocity_tx", &render_buffers.vector_tx, no_filter);
+  DRW_shgroup_uniform_texture_ref_ex(grp, "in_history_tx", &stabilize_input_, with_filter);
+  DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
+  DRW_shgroup_uniform_bool(grp, "use_history", &stabilize_valid_history_, 1);
   DRW_shgroup_uniform_block(grp, "dof_buf", data_);
   DRW_shgroup_uniform_image(grp, "out_coc_img", reduced_coc_tx_.mip_view(0));
   DRW_shgroup_uniform_image(grp, "out_color_img", reduced_color_tx_.mip_view(0));
+  DRW_shgroup_uniform_image_ref(grp, "out_history_img", &stabilize_output_tx_);
   DRW_shgroup_call_compute_ref(grp, dispatch_stabilize_size_);
-  DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
+  DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
 }
 
 void DepthOfField::downsample_pass_sync()
@@ -319,8 +327,6 @@ void DepthOfField::reduce_pass_sync()
   DRW_shgroup_uniform_image(grp, "out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
   DRW_shgroup_uniform_image(grp, "out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
   DRW_shgroup_uniform_image(grp, "out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
-  /* Sync writes to inout_color_lod0_img from stabilize_ps_. */
-  DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
   DRW_shgroup_call_compute_ref(grp, dispatch_reduce_size_);
   /* NOTE: Command buffer barrier is done automatically by the GPU backend. */
   DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
@@ -481,7 +487,29 @@ void DepthOfField::resolve_pass_sync()
 /** \name Post-FX Rendering.
  * \{ */
 
-void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
+/* Similar to Film::update_sample_table() but with constant filter radius and constant sample
+ * count. */
+void DepthOfField::update_sample_table()
+{
+  float2 subpixel_offset = inst_.film.pixel_jitter_get();
+  /* Since the film jitter is in full-screen res, divide by 2 to get the jitter in half res. */
+  subpixel_offset *= 0.5;
+
+  /* Same offsets as in dof_spatial_filtering(). */
+  const std::array<int2, 4> plus_offsets = {int2(-1, 0), int2(0, -1), int2(1, 0), int2(0, 1)};
+
+  const float radius = 1.5f;
+  int i = 0;
+  for (int2 offset : plus_offsets) {
+    float2 pixel_ofs = float2(offset) - subpixel_offset;
+    data_.filter_samples_weight[i++] = film_filter_weight(radius, math::length_squared(pixel_ofs));
+  }
+  data_.filter_center_weight = film_filter_weight(radius, math::length_squared(subpixel_offset));
+}
+
+void DepthOfField::render(GPUTexture **input_tx,
+                          GPUTexture **output_tx,
+                          DepthOfFieldBuffer &dof_buffer)
 {
   if (fx_radius_ == 0.0f) {
     return;
@@ -521,6 +549,8 @@ void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
     /* TODO(fclem): Make this dependent of the quality of the gather pass. */
     data_.scatter_coc_threshold = 4.0f;
 
+    update_sample_table();
+
     data_.push_update();
   }
 
@@ -529,7 +559,7 @@ void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
   int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE));
 
   dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
-  dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
+  dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_STABILIZE_GROUP_SIZE)), 1);
   dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)),
                                    1);
   dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1);
@@ -550,24 +580,41 @@ void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
 
   {
     DRW_stats_group_start("Setup");
+    {
+      bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
+      bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
+      bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
 
-    bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
-    bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
-    bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
-
-    DRW_draw_pass(bokeh_lut_ps_);
+      DRW_draw_pass(bokeh_lut_ps_);
+    }
+    {
+      setup_color_tx_.acquire(half_res, GPU_RGBA16F);
+      setup_coc_tx_.acquire(half_res, GPU_RG16F);
 
-    setup_color_tx_.acquire(half_res, GPU_RGBA16F);
-    setup_coc_tx_.acquire(half_res, GPU_RG16F);
+      DRW_draw_pass(setup_ps_);
+    }
+    {
+      stabilize_output_tx_.acquire(half_res, GPU_RGBA16F);
+      stabilize_valid_history_ = !dof_buffer.stabilize_history_tx_.ensure_2d(GPU_RGBA16F,
+                                                                             half_res);
 
-    DRW_draw_pass(setup_ps_);
+      if (stabilize_valid_history_ == false) {
+        /* Avoid uninitialized memory that can contain NaNs. */
+        dof_buffer.stabilize_history_tx_.clear(float4(0.0f));
+      }
 
-    /* Outputs to reduced_*_tx_ mip 0. */
-    DRW_draw_pass(stabilize_ps_);
+      stabilize_input_ = dof_buffer.stabilize_history_tx_;
+      /* Outputs to reduced_*_tx_ mip 0. */
+      DRW_draw_pass(stabilize_ps_);
 
-    /* Used by stabilize pass. */
-    setup_color_tx_.release();
+      /* WATCH(fclem): Swap Texture an TextureFromPool internal GPUTexture in order to reuse
+       * the one that we just consumed. */
+      TextureFromPool::swap(stabilize_output_tx_, dof_buffer.stabilize_history_tx_);
 
+      /* Used by stabilize pass. */
+      stabilize_output_tx_.release();
+      setup_color_tx_.release();
+    }
     {
       DRW_stats_group_start("Tile Prepare");
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
index e1c9d3117e3..a11924c3806 100644
--- a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
@@ -29,6 +29,17 @@ class Instance;
 /** \name Depth of field
  * \{ */
 
+struct DepthOfFieldBuffer {
+  /**
+   * Per view history texture for stabilize pass.
+   * Swapped with stabilize_output_tx_ in order to reuse the previous history during DoF
+   * processing.
+   * Note this should be private as its inner working only concerns the Depth Of Field
+   * implementation. The view itself should not touch it.
+   */
+  Texture stabilize_history_tx_ = {"dof_taa"};
+};
+
 class DepthOfField {
  private:
   class Instance &inst_;
@@ -58,6 +69,9 @@ class DepthOfField {
   Texture reduced_color_tx_ = {"dof_reduced_color"};
 
   /** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */
+  TextureFromPool stabilize_output_tx_ = {"dof_taa"};
+  GPUTexture *stabilize_input_ = nullptr;
+  bool1 stabilize_valid_history_ = false;
   int3 dispatch_stabilize_size_ = int3(-1);
   DRWPass *stabilize_ps_ = nullptr;
 
@@ -152,7 +166,7 @@ class DepthOfField {
    * Will swap input and output texture if rendering happens. The actual output of this function
    * is in input_tx.
    */
-  void render(GPUTexture **input_tx, GPUTexture **output_tx);
+  void render(GPUTexture **input_tx, GPUTexture **output_tx, DepthOfFieldBuffer &dof_buffer);
 
   bool postfx_enabled() const
   {
@@ -172,6 +186,8 @@ class DepthOfField {
   void scatter_pass_sync();
   void hole_fill_pass_sync();
   void resolve_pass_sync();
+
+  void update_sample_table();
 };
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index 07957cd2c8c..fe36cb1a17c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -372,8 +372,6 @@ struct DepthOfFieldData {
   float scatter_color_threshold;
   float scatter_neighbor_max_color;
   int scatter_sprite_per_row;
-  /** Firefly removing factor. */
-  float denoise_factor;
   /** Number of side the bokeh shape has. */
   float bokeh_blades;
   /** Rotation of the bokeh shape. */
@@ -384,6 +382,9 @@ struct DepthOfFieldData {
   float coc_abs_max;
   /** Copy of camera type. */
   eCameraType camera_type;
+  /** Weights of spatial filtering in stabilize pass. Not array to avoid alignment restriction. */
+  float4 filter_samples_weight;
+  float filter_center_weight;
   /** Max number of sprite in the scatter pass for each ground. */
   int scatter_max_rect;
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc
index 68c855b9bc5..c195f68380c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@@ -151,7 +151,7 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
   GPUTexture *output_tx = postfx_tx_;
 
   /* Swapping is done internally. Actual output is set to the next input. */
-  inst_.depth_of_field.render(&input_tx, &output_tx);
+  inst_.depth_of_field.render(&input_tx, &output_tx, dof_buffer_);
   inst_.motion_blur.render(&input_tx, &output_tx);
 
   return input_tx;
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.hh b/source/blender/draw/engines/eevee_next/eevee_view.hh
index ee169bf418e..65f27aba795 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_view.hh
@@ -44,6 +44,7 @@ class ShadingView {
   /** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */
   // RaytraceBuffer rt_buffer_opaque_;
   // RaytraceBuffer rt_buffer_refract_;
+  DepthOfFieldBuffer dof_buffer_;
 
   Framebuffer prepass_fb_;
   Framebuffer combined_fb_;
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
new file mode 100644
index 00000000000..d5fdaae6fc1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
@@ -0,0 +1,37 @@
+
+/* -------------------------------------------------------------------- */
+/** \name YCoCg
+ * \{ */
+
+vec3 colorspace_YCoCg_from_scene_linear(vec3 rgb_color)
+{
+  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
+                                            vec3(2, 0, -2),    /* Co */
+                                            vec3(-1, 2, -1))); /* Cg */
+  return colorspace_tx * rgb_color;
+}
+
+vec4 colorspace_YCoCg_from_scene_linear(vec4 rgba_color)
+{
+  return vec4(colorspace_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
+}
+
+vec3 colorspace_scene_linear_from_YCoCg(vec3 ycocg_color)
+{
+  float Y = ycocg_color.x;
+  float Co = ycocg_color.y;
+  float Cg = ycocg_color.z;
+
+  vec3 rgb_color;
+  rgb_color.r = Y + Co - Cg;
+  rgb_color.g = Y + Cg;
+  rgb_color.b = Y - Co - Cg;
+  return rgb_color * 0.25;
+}
+
+vec4 colorspace_scene_linear_from_YCoCg(vec4 ycocg_color)
+{
+  return vec4(colorspace_scene_linear_from_YCoCg(ycocg_color.rgb), ycocg_color.a);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
index 88ecaab6a00..bf7c9413da3 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
@@ -15,8 +15,9 @@ struct FilterSample {
 /** \name Pixel cache.
  * \{ */
 
-shared vec4 color_cache[10][10];
-shared float weight_cache[10][10];
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float weight_cache[cache_size][cache_size];
 
 void cache_init()
 {
@@ -40,11 +41,12 @@ void cache_init()
    */
 
   ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1;
-  for (int y = 0; y < 2; y++) {
-    for (int x = 0; x < 2; x++) {
-      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(5)))) {
-        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + ivec2(x, y) * 5;
-        ivec2 load_texel = clamp(texel + ivec2(x, y) * 5, ivec2(0), textureSize(color_tx, 0) - 1);
+  if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+    for (int y = 0; y < 2; y++) {
+      for (int x = 0; x < 2; x++) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset, ivec2(0), textureSize(color_tx, 0) - 1);
 
         color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0);
         weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r;
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
index 88a577a1c3c..622b545357e 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
@@ -73,9 +73,10 @@ float fast_luma(vec3 color)
   return (2.0 * color.g) + color.r + color.b;
 }
 
-shared vec4 color_cache[8][8];
-shared float coc_cache[8][8];
-shared float do_scatter[8][8];
+const uint cache_size = gl_WorkGroupSize.x;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+shared float do_scatter[cache_size][cache_size];
 
 void main()
 {
@@ -200,9 +201,9 @@ void main()
   imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);
 
   /* Recursive downsample. */
-  for (uint i = 1u; i < DOF_MIP_MAX; i++) {
+  for (uint i = 1u; i < DOF_MIP_COUNT; i++) {
     barrier();
-    if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_MAX - 1u - i))))) {
+    if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_COUNT - 1u - i))))) {
       uvec2 texel_local = gl_LocalInvocationID.xy << i;
 
       /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
index ac371f76395..254cacc45b7 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
@@ -2,8 +2,11 @@
 /**
  * Temporal Stabilization of the Depth of field input.
  * Corresponds to the TAA pass in the paper.
+ * We actually duplicate the TAA logic but with a few changes:
+ * - We run this pass at half resolution.
+ * - We store CoC instead of Opacity in the alpha channel of the history.
  *
- * TODO: This pass needs a cleanup / improvement using much better TAA.
+ * This is and adaption of the code found in eevee_film_lib.glsl
  *
  * Inputs:
  * - Output of setup pass (halfres).
@@ -11,54 +14,362 @@
  * - Stabilized Color and CoC (halfres).
  **/
 
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
 
-float fast_luma(vec3 color)
+struct DofSample {
+  vec4 color;
+  float coc;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name LDS Cache
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+/* Need 2 pixel border for depth. */
+const uint cache_depth_size = gl_WorkGroupSize.x + 4;
+shared float depth_cache[cache_depth_size][cache_depth_size];
+
+void dof_cache_init()
+{
+  /**
+   * Load enough values into LDS to perform the filter.
+   *
+   * ┌──────────────────────────────┐
+   * │                              │  < Border texels that needs to be loaded.
+   * │    x  x  x  x  x  x  x  x    │  ─┐
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │ Thread Group Size 8x8.
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │  ─┘
+   * │ L  L  L  L  L                │  < Border texels that needs to be loaded.
+   * └──────────────────────────────┘
+   *   └───────────┘
+   *    Load using 5x5 threads.
+   */
+
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  for (int y = 0; y < 2; y++) {
+    for (int x = 0; x < 2; x++) {
+      /* 1 Pixel border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1);
+
+        vec4 color = texelFetch(color_tx, load_texel, 0);
+        color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color);
+        coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x;
+      }
+      /* 2 Pixels border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        /* Depth is fullres. Load every 2 pixels. */
+        ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1);
+
+        depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x;
+      }
+    }
+  }
+  barrier();
+}
+
+/* Note: Sample color space is already in YCoCg space. */
+DofSample dof_fetch_input_sample(ivec2 offset)
+{
+  ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy);
+  return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]);
+}
+
+float dof_fetch_half_depth(ivec2 offset)
+{
+  ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy);
+  return depth_cache[coord.y][coord.x];
+}
+
+/** \} */
+
+float dof_luma_weight(float luma)
 {
-  return (2.0 * color.g) + color.r + color.b;
+  /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+  /* To preserve more details in dark areas, we use a bigger bias. */
+  const float exposure_scale = 1.0; /* TODO. */
+  return 1.0 / (4.0 + luma * exposure_scale);
 }
 
-/* Lightweight version of neighborhood clamping found in TAA. */
-vec3 dof_neighborhood_clamping(vec3 color)
+float dof_bilateral_weight(float reference_coc, float sample_coc)
 {
-  vec2 texel_size = 1.0 / vec2(textureSize(color_tx, 0));
-  vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) * texel_size;
-  vec4 ofs = vec4(-1, 1, -1, 1) * texel_size.xxyy;
+  /* NOTE: The difference between the cocs should be inside a abs() function,
+   * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19).
+   * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */
+  return saturate(1.0 - (reference_coc - sample_coc) / max(1.0, abs(reference_coc)));
+}
 
-  /* Luma clamping. 3x3 square neighborhood. */
-  float c00 = fast_luma(textureLod(color_tx, uv + ofs.xz, 0.0).rgb);
-  float c01 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(1.0, 0.0), 0.0).rgb);
-  float c02 = fast_luma(textureLod(color_tx, uv + ofs.xw, 0.0).rgb);
+DofSample dof_spatial_filtering()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  DofSample center = dof_fetch_input_sample(ivec2(0));
+  DofSample accum = DofSample(vec4(0.0), 0.0);
+  float accum_weight = 0.0;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) *
+                   dof_bilateral_weight(center.coc, samp.coc);
 
-  float c10 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(0.0, 1.0), 0.0).rgb);
-  float c11 = fast_luma(color);
-  float c12 = fast_luma(textureLod(color_tx, uv + ofs.xw * vec2(0.0, 1.0), 0.0).rgb);
+    accum.color += samp.color * weight;
+    accum.coc += samp.coc * weight;
+    accum_weight += weight;
+  }
+  /* Accumulate center sample last as it does not need bilateral_weights. */
+  float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x);
+  accum.color += center.color * weight;
+  accum.coc += center.coc * weight;
+  accum_weight += weight;
 
-  float c20 = fast_luma(textureLod(color_tx, uv + ofs.yz, 0.0).rgb);
-  float c21 = fast_luma(textureLod(color_tx, uv + ofs.yz * vec2(1.0, 0.0), 0.0).rgb);
-  float c22 = fast_luma(textureLod(color_tx, uv + ofs.yw, 0.0).rgb);
+  float rcp_weight = 1.0 / accum_weight;
+  accum.color *= rcp_weight;
+  accum.coc *= rcp_weight;
+  return accum;
+}
 
-  float avg_luma = avg8(c00, c01, c02, c10, c12, c20, c21, c22);
-  float max_luma = max8(c00, c01, c02, c10, c12, c20, c21, c22);
+struct DofNeighborhoodMinMax {
+  DofSample min;
+  DofSample max;
+};
 
-  float upper_bound = mix(max_luma, avg_luma, dof_buf.denoise_factor);
-  upper_bound = mix(c11, upper_bound, dof_buf.denoise_factor);
+/* Return history clipping bounding box in YCoCg color space. */
+DofNeighborhoodMinMax dof_neighbor_boundbox()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  /**
+   * Simple bounding box calculation in YCoCg as described in:
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+   */
+  DofSample min_c = dof_fetch_input_sample(ivec2(0));
+  DofSample max_c = min_c;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    min_c.color = min(min_c.color, samp.color);
+    max_c.color = max(max_c.color, samp.color);
+    min_c.coc = min(min_c.coc, samp.coc);
+    max_c.coc = max(max_c.coc, samp.coc);
+  }
+  /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+   * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+  DofSample min_c_3x3 = min_c;
+  DofSample max_c_3x3 = max_c;
+  const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(corners[i]);
+    min_c_3x3.color = min(min_c_3x3.color, samp.color);
+    max_c_3x3.color = max(max_c_3x3.color, samp.color);
+    min_c_3x3.coc = min(min_c_3x3.coc, samp.coc);
+    max_c_3x3.coc = max(max_c_3x3.coc, samp.coc);
+  }
+  min_c.color = (min_c.color + min_c_3x3.color) * 0.5;
+  max_c.color = (max_c.color + max_c_3x3.color) * 0.5;
+  min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5;
+  max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5;
 
-  float clamped_luma = min(upper_bound, c11);
+  return DofNeighborhoodMinMax(min_c, max_c);
+}
 
-  return color * clamped_luma * safe_rcp(c11);
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
+{
+  /**
+   * Dilate velocity by using the nearest pixel in a cross pattern.
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+   */
+  const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+  float min_depth = dof_fetch_half_depth(ivec2(0));
+  ivec2 nearest_texel = ivec2(0);
+  for (int i = 0; i < 4; i++) {
+    float depth = dof_fetch_half_depth(corners[i]);
+    if (min_depth > depth) {
+      min_depth = depth;
+      nearest_texel = corners[i];
+    }
+  }
+  /* Convert to full resolution buffer pixel. */
+  ivec2 velocity_texel = (texel_sample + nearest_texel) * 2;
+  velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1);
+  vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth);
+  /* Transform to **half** pixel space. */
+  return vector.xy * vec2(textureSize(color_tx, 0));
+}
+
+/* Load color using a special filter to avoid loosing detail.
+ * \a texel is sample position with subpixel accuracy. */
+DofSample dof_sample_history(vec2 input_texel)
+{
+#if 1 /* Bilinar. */
+  vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0);
+  vec4 color = textureLod(in_history_tx, uv, 0.0);
+
+#elif 0 /* Catmull Rom interpolation. 5 Bilinear Taps. */
+  vec2 center_texel;
+  vec2 inter_texel = modf(input_texel, center_texel);
+  vec2 weights[4];
+  film_get_catmull_rom_weights(inter_texel, weights);
+
+  /**
+   * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+   * corner taps.
+   * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+   * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+   */
+  center_texel += 0.5;
+
+  /* Slide 92. */
+  vec2 weight_12 = weights[1] + weights[2];
+  vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+  vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+  vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+  vec4 color;
+  vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+  float weight_center = weight_12.x * weight_12.y;
+
+  color = textureLod(in_history_tx, uv_12, 0.0) * weight_center;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+  color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+  color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+  /* Re-normalize for the removed corners. */
+  color /= (weight_center + sum(weight_cross));
+#endif
+  /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */
+  return DofSample(color.xyzz, color.w);
+}
+
+/* 1D equivalent of line_aabb_clipping_dist(). */
+float dof_aabb_clipping_dist_coc(float origin, float direction, float aabb_min, float aabb_max)
+{
+  if (abs(direction) < 1e-5) {
+    return 0.0;
+  }
+  float nearest_plane = (direction > 0.0) ? aabb_min : aabb_max;
+  return (nearest_plane - origin) / direction;
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src)
+{
+  /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+  DofSample clip_dir;
+  clip_dir.color = src.color - history.color;
+  clip_dir.coc = src.coc - history.coc;
+
+  float t = line_aabb_clipping_dist(
+      history.color.rgb, clip_dir.color.rgb, bbox.min.color.rgb, bbox.max.color.rgb);
+  history.color.rgb += clip_dir.color.rgb * saturate(t);
+
+  /* Clip CoC on its own to avoid interference with other chanels. */
+  float t_a = dof_aabb_clipping_dist_coc(history.coc, clip_dir.coc, bbox.min.coc, bbox.max.coc);
+  history.coc += clip_dir.coc * saturate(t_a);
+
+  return history;
+}
+
+float dof_history_blend_factor(
+    float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst)
+{
+  float luma_min = bbox.min.color.x;
+  float luma_max = bbox.max.color.x;
+  float luma_incoming = src.color.x;
+  float luma_history = dst.color.x;
+
+  /* 5% of incoming color by default. */
+  float blend = 0.05;
+  /* Blend less history if the pixel has substential velocity. */
+  /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */
+  blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0));
+  /**
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+   * Bias towards history if incomming pixel is near clamping. Reduces flicker.
+   */
+  float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+  /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+  distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+  /* Linearly blend when history gets bellow to 25% of the bbox size. */
+  blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+  /* Progressively discard history until history CoC is twice as big as the filtered CoC.
+   * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to
+   * dilate thin features like hair (slide 19). */
+  float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc)));
+  blend = mix(blend, 1.0, coc_diff_ratio);
+  /* Discard out of view history. */
+  if (any(lessThan(texel, vec2(0))) ||
+      any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) {
+    blend = 1.0;
+  }
+  /* Discard history if invalid. */
+  if (use_history == false) {
+    blend = 1.0;
+  }
+  return blend;
 }
 
 void main()
 {
-  vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(textureSize(color_tx, 0).xy);
-  vec4 out_color = textureLod(color_tx, uv, 0.0);
-  float out_coc = textureLod(coc_tx, uv, 0.0).r;
+  dof_cache_init();
+
+  ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /**
+   * Naming convention is taken from the film implementation.
+   * SRC is incoming new data.
+   * DST is history data.
+   */
+  DofSample src = dof_spatial_filtering();
+
+  /* Reproject by finding where this pixel was in the previous frame. */
+  vec2 motion = dof_pixel_history_motion_vector(src_texel);
+  vec2 history_texel = vec2(src_texel) + motion;
+
+  float velocity = length(motion);
+
+  DofSample dst = dof_sample_history(history_texel);
+
+  /* Get local color bounding box of source neighboorhood. */
+  DofNeighborhoodMinMax bbox = dof_neighbor_boundbox();
+
+  float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst);
+
+  dst = dof_amend_history(bbox, dst, src);
+
+  /* Luma weighted blend to reduce flickering. */
+  float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend);
+  float weight_src = dof_luma_weight(src.color.x) * (blend);
+
+  DofSample result;
+  /* Weighted blend. */
+  result.color = vec4(dst.color.rgb, dst.coc) * weight_dst +
+                 vec4(src.color.rgb, src.coc) * weight_src;
+  result.color /= weight_src + weight_dst;
+
+  /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */
+  imageStore(out_history_img, src_texel, result.color);
+
+  /* Un-swizzle. */
+  result.coc = result.color.a;
+  /* Clamp opacity since we don't store it in history. */
+  result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a);
 
-  out_color.rgb = dof_neighborhood_clamping(out_color.rgb);
-  /* TODO(fclem): Stabilize CoC. */
+  result.color = colorspace_scene_linear_from_YCoCg(result.color);
 
-  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
-  imageStore(out_color_img, out_texel, out_color);
-  imageStore(out_coc_img, out_texel, vec4(out_coc));
+  imageStore(out_color_img, src_texel, result.color);
+  imageStore(out_coc_img, src_texel, vec4(result.coc));
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
index 08027f2ef6c..bf6293d5561 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@@ -7,6 +7,7 @@
 #pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
 
 /* Return scene linear Z depth from the camera or radial depth for panoramic cameras. */
 float film_depth_convert_to_scene(float depth)
@@ -18,32 +19,6 @@ float film_depth_convert_to_scene(float depth)
   return abs(get_view_z_from_depth(depth));
 }
 
-vec3 film_YCoCg_from_scene_linear(vec3 rgb_color)
-{
-  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
-                                            vec3(2, 0, -2),    /* Co */
-                                            vec3(-1, 2, -1))); /* Cg */
-  return colorspace_tx * rgb_color;
-}
-
-vec4 film_YCoCg_from_scene_linear(vec4 rgba_color)
-{
-  return vec4(film_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
-}
-
-vec3 film_scene_linear_from_YCoCg(vec3 ycocg_color)
-{
-  float Y = ycocg_color.x;
-  float Co = ycocg_color.y;
-  float Cg = ycocg_color.z;
-
-  vec3 rgb_color;
-  rgb_color.r = Y + Co - Cg;
-  rgb_color.g = Y + Cg;
-  rgb_color.b = Y - Co - Cg;
-  return rgb_color * 0.25;
-}
-
 /* Load a texture sample in a specific format. Combined pass needs to use this. */
 vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
 {
@@ -51,7 +26,7 @@ vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
   /* Convert transmittance to opacity. */
   color.a = saturate(1.0 - color.a);
   /* Transform to YCoCg for accumulation. */
-  color.rgb = film_YCoCg_from_scene_linear(color.rgb);
+  color.rgb = colorspace_YCoCg_from_scene_linear(color.rgb);
   return color;
 }
 
@@ -220,7 +195,7 @@ vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
   float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
   ivec2 nearest_texel = texel_sample;
   for (int i = 0; i < 4; i++) {
-    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy);
+    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy - 1);
     float depth = texelFetch(depth_tx, texel, 0).x;
     if (min_depth > depth) {
       min_depth = depth;
@@ -455,7 +430,7 @@ void film_store_combined(
     // dst.weight = film_weight_load(texel_combined);
 
     color_dst = film_sample_catmull_rom(in_combined_tx, history_texel);
-    color_dst.rgb = film_YCoCg_from_scene_linear(color_dst.rgb);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
 
     /* Get local color bounding box of source neighborhood. */
     vec4 min_color, max_color;
@@ -473,7 +448,7 @@ void film_store_combined(
   else {
     /* Everything is static. Use render accumulation. */
     color_dst = texelFetch(in_combined_tx, dst.texel, 0);
-    color_dst.rgb = film_YCoCg_from_scene_linear(color_dst.rgb);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
 
     /* Luma weighted blend to avoid flickering. */
     weight_dst = film_luma_weight(color_dst.x) * dst.weight;
@@ -483,7 +458,7 @@ void film_store_combined(
   color = color_dst * weight_dst + color_src * weight_src;
   color /= weight_src + weight_dst;
 
-  color.rgb = film_scene_linear_from_YCoCg(color.rgb);
+  color.rgb = colorspace_scene_linear_from_YCoCg(color.rgb);
 
   /* Fix alpha not accumulating to 1 because of float imprecision. */
   if (color.a > 0.995) {
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
index c95c0877c88..1dd9178ae84 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
@@ -29,14 +29,18 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
 
 GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
     .do_static_compilation(true)
-    .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
-    .additional_info("eevee_shared", "draw_view")
-    .uniform_buf(1, "DepthOfFieldData", "dof_buf")
-    .sampler(0, ImageType::DEPTH_2D, "coc_tx")
+    .local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+    .uniform_buf(4, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::FLOAT_2D, "coc_tx")
     .sampler(1, ImageType::FLOAT_2D, "color_tx")
-    // .sampler(2, ImageType::FLOAT_2D, "velocity_tx") /* TODO: TAA with reprojection. */
+    .sampler(2, ImageType::FLOAT_2D, "velocity_tx")
+    .sampler(3, ImageType::FLOAT_2D, "in_history_tx")
+    .sampler(4, ImageType::DEPTH_2D, "depth_tx")
+    .push_constant(Type::BOOL, "use_history")
     .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
     .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_history_img")
     .compute_source("eevee_depth_of_field_stabilize_comp.glsl");
 
 GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample)
author	Clément Foucault <foucault.clem@gmail.com>	2022-08-04 13:33:43 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2022-08-05 15:45:09 +0300
commit	49d85dc8b5d8056b226a33dfe01b7af0e4067ee1 (patch)
tree	bf9c05eb19ef2b2ceed0c95b89cd2d98253e98aa /source/blender/draw/engines
parent	8659e62d1e6371c115f2b5fdf7f82b70db73d720 (diff)