13 files changed, 507 insertions, 110 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index e907c17e9d1..c34a6daa126 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -363,6 +363,7 @@ set(GLSL_SRC
 
   engines/eevee_next/shaders/eevee_attributes_lib.glsl
   engines/eevee_next/shaders/eevee_camera_lib.glsl
+  engines/eevee_next/shaders/eevee_colorspace_lib.glsl
   engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
   engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
   engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh
index 8240af14203..2067d1c708c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -57,9 +57,10 @@
 #define DOF_TILES_DILATE_GROUP_SIZE 8
 #define DOF_BOKEH_LUT_SIZE 32
 #define DOF_MAX_SLIGHT_FOCUS_RADIUS 5
-#define DOF_REDUCE_GROUP_SIZE 8
+#define DOF_MIP_COUNT 4
+#define DOF_REDUCE_GROUP_SIZE (1 << (DOF_MIP_COUNT - 1))
 #define DOF_DEFAULT_GROUP_SIZE 32
+#define DOF_STABILIZE_GROUP_SIZE 16
 #define DOF_FILTER_GROUP_SIZE 8
 #define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE
 #define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)
-#define DOF_MIP_MAX 4
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
index 69f06da1782..de99a83b993 100644
--- a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
@@ -62,7 +62,6 @@ void DepthOfField::init()
   update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size);
   update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold);
   update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max);
-  update += assign_if_different(data_.denoise_factor, sce_eevee.bokeh_denoise_fac);
   update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades));
   if (update > 0) {
     inst_.sampling.reset();
@@ -162,18 +161,15 @@ void DepthOfField::sync()
   /* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */
   int2 max_render_res = inst_.film.render_extent_get();
   int2 half_res = math::divide_ceil(max_render_res, int2(2));
-  int2 reduce_size = math::ceil_to_multiple(half_res, int2(1 < (DOF_MIP_MAX - 1)));
+  int2 reduce_size = math::ceil_to_multiple(half_res, int2(DOF_REDUCE_GROUP_SIZE));
 
   data_.gather_uv_fac = 1.0f / float2(reduce_size);
 
   /* Now that we know the maximum render resolution of every view, using depth of field, allocate
    * the reduced buffers. Color needs to be signed format here. See note in shader for
    * explanation. Do not use texture pool because of needs mipmaps. */
-  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_MAX);
-  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_MAX);
-  GPU_texture_wrap_mode(reduced_color_tx_, false, false);
-  GPU_texture_wrap_mode(reduced_coc_tx_, false, false);
-
+  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_COUNT);
+  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_COUNT);
   reduced_color_tx_.ensure_mip_views();
   reduced_coc_tx_.ensure_mip_views();
 
@@ -276,16 +272,28 @@ void DepthOfField::setup_pass_sync()
 
 void DepthOfField::stabilize_pass_sync()
 {
+  RenderBuffers &render_buffers = inst_.render_buffers;
+  VelocityModule &velocity = inst_.velocity;
+
   stabilize_ps_ = DRW_pass_create("Dof.stabilize_ps_", DRW_STATE_NO_DRAW);
   GPUShader *sh = inst_.shaders.static_shader_get(DOF_STABILIZE);
   DRWShadingGroup *grp = DRW_shgroup_create(sh, stabilize_ps_);
+  DRW_shgroup_uniform_block_ref(grp, "camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
+  DRW_shgroup_uniform_block_ref(grp, "camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
+  /* This is only for temporal stability. The next step is not needed. */
+  DRW_shgroup_uniform_block_ref(grp, "camera_next", &(*velocity.camera_steps[STEP_PREVIOUS]));
   DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter);
   DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &setup_color_tx_, no_filter);
+  DRW_shgroup_uniform_texture_ref_ex(grp, "velocity_tx", &render_buffers.vector_tx, no_filter);
+  DRW_shgroup_uniform_texture_ref_ex(grp, "in_history_tx", &stabilize_input_, with_filter);
+  DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
+  DRW_shgroup_uniform_bool(grp, "use_history", &stabilize_valid_history_, 1);
   DRW_shgroup_uniform_block(grp, "dof_buf", data_);
   DRW_shgroup_uniform_image(grp, "out_coc_img", reduced_coc_tx_.mip_view(0));
   DRW_shgroup_uniform_image(grp, "out_color_img", reduced_color_tx_.mip_view(0));
+  DRW_shgroup_uniform_image_ref(grp, "out_history_img", &stabilize_output_tx_);
   DRW_shgroup_call_compute_ref(grp, dispatch_stabilize_size_);
-  DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
+  DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
 }
 
 void DepthOfField::downsample_pass_sync()
@@ -319,8 +327,6 @@ void DepthOfField::reduce_pass_sync()
   DRW_shgroup_uniform_image(grp, "out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
   DRW_shgroup_uniform_image(grp, "out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
   DRW_shgroup_uniform_image(grp, "out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
-  /* Sync writes to inout_color_lod0_img from stabilize_ps_. */
-  DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
   DRW_shgroup_call_compute_ref(grp, dispatch_reduce_size_);
   /* NOTE: Command buffer barrier is done automatically by the GPU backend. */
   DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
@@ -481,7 +487,29 @@ void DepthOfField::resolve_pass_sync()
 /** \name Post-FX Rendering.
  * \{ */
 
-void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
+/* Similar to Film::update_sample_table() but with constant filter radius and constant sample
+ * count. */
+void DepthOfField::update_sample_table()
+{
+  float2 subpixel_offset = inst_.film.pixel_jitter_get();
+  /* Since the film jitter is in full-screen res, divide by 2 to get the jitter in half res. */
+  subpixel_offset *= 0.5;
+
+  /* Same offsets as in dof_spatial_filtering(). */
+  const std::array<int2, 4> plus_offsets = {int2(-1, 0), int2(0, -1), int2(1, 0), int2(0, 1)};
+
+  const float radius = 1.5f;
+  int i = 0;
+  for (int2 offset : plus_offsets) {
+    float2 pixel_ofs = float2(offset) - subpixel_offset;
+    data_.filter_samples_weight[i++] = film_filter_weight(radius, math::length_squared(pixel_ofs));
+  }
+  data_.filter_center_weight = film_filter_weight(radius, math::length_squared(subpixel_offset));
+}
+
+void DepthOfField::render(GPUTexture **input_tx,
+                          GPUTexture **output_tx,
+                          DepthOfFieldBuffer &dof_buffer)
 {
   if (fx_radius_ == 0.0f) {
     return;
@@ -521,6 +549,8 @@ void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
     /* TODO(fclem): Make this dependent of the quality of the gather pass. */
     data_.scatter_coc_threshold = 4.0f;
 
+    update_sample_table();
+
     data_.push_update();
   }
 
@@ -529,7 +559,7 @@ void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
   int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE));
 
   dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
-  dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
+  dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_STABILIZE_GROUP_SIZE)), 1);
   dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)),
                                    1);
   dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1);
@@ -550,24 +580,41 @@ void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
 
   {
     DRW_stats_group_start("Setup");
+    {
+      bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
+      bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
+      bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
 
-    bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
-    bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
-    bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
-
-    DRW_draw_pass(bokeh_lut_ps_);
+      DRW_draw_pass(bokeh_lut_ps_);
+    }
+    {
+      setup_color_tx_.acquire(half_res, GPU_RGBA16F);
+      setup_coc_tx_.acquire(half_res, GPU_RG16F);
 
-    setup_color_tx_.acquire(half_res, GPU_RGBA16F);
-    setup_coc_tx_.acquire(half_res, GPU_RG16F);
+      DRW_draw_pass(setup_ps_);
+    }
+    {
+      stabilize_output_tx_.acquire(half_res, GPU_RGBA16F);
+      stabilize_valid_history_ = !dof_buffer.stabilize_history_tx_.ensure_2d(GPU_RGBA16F,
+                                                                             half_res);
 
-    DRW_draw_pass(setup_ps_);
+      if (stabilize_valid_history_ == false) {
+        /* Avoid uninitialized memory that can contain NaNs. */
+        dof_buffer.stabilize_history_tx_.clear(float4(0.0f));
+      }
 
-    /* Outputs to reduced_*_tx_ mip 0. */
-    DRW_draw_pass(stabilize_ps_);
+      stabilize_input_ = dof_buffer.stabilize_history_tx_;
+      /* Outputs to reduced_*_tx_ mip 0. */
+      DRW_draw_pass(stabilize_ps_);
 
-    /* Used by stabilize pass. */
-    setup_color_tx_.release();
+      /* WATCH(fclem): Swap Texture an TextureFromPool internal GPUTexture in order to reuse
+       * the one that we just consumed. */
+      TextureFromPool::swap(stabilize_output_tx_, dof_buffer.stabilize_history_tx_);
 
+      /* Used by stabilize pass. */
+      stabilize_output_tx_.release();
+      setup_color_tx_.release();
+    }
     {
       DRW_stats_group_start("Tile Prepare");
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
index e1c9d3117e3..a11924c3806 100644
--- a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
@@ -29,6 +29,17 @@ class Instance;
 /** \name Depth of field
  * \{ */
 
+struct DepthOfFieldBuffer {
+  /**
+   * Per view history texture for stabilize pass.
+   * Swapped with stabilize_output_tx_ in order to reuse the previous history during DoF
+   * processing.
+   * Note this should be private as its inner working only concerns the Depth Of Field
+   * implementation. The view itself should not touch it.
+   */
+  Texture stabilize_history_tx_ = {"dof_taa"};
+};
+
 class DepthOfField {
  private:
   class Instance &inst_;
@@ -58,6 +69,9 @@ class DepthOfField {
   Texture reduced_color_tx_ = {"dof_reduced_color"};
 
   /** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */
+  TextureFromPool stabilize_output_tx_ = {"dof_taa"};
+  GPUTexture *stabilize_input_ = nullptr;
+  bool1 stabilize_valid_history_ = false;
   int3 dispatch_stabilize_size_ = int3(-1);
   DRWPass *stabilize_ps_ = nullptr;
 
@@ -152,7 +166,7 @@ class DepthOfField {
    * Will swap input and output texture if rendering happens. The actual output of this function
    * is in input_tx.
    */
-  void render(GPUTexture **input_tx, GPUTexture **output_tx);
+  void render(GPUTexture **input_tx, GPUTexture **output_tx, DepthOfFieldBuffer &dof_buffer);
 
   bool postfx_enabled() const
   {
@@ -172,6 +186,8 @@ class DepthOfField {
   void scatter_pass_sync();
   void hole_fill_pass_sync();
   void resolve_pass_sync();
+
+  void update_sample_table();
 };
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index 07957cd2c8c..fe36cb1a17c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -372,8 +372,6 @@ struct DepthOfFieldData {
   float scatter_color_threshold;
   float scatter_neighbor_max_color;
   int scatter_sprite_per_row;
-  /** Firefly removing factor. */
-  float denoise_factor;
   /** Number of side the bokeh shape has. */
   float bokeh_blades;
   /** Rotation of the bokeh shape. */
@@ -384,6 +382,9 @@ struct DepthOfFieldData {
   float coc_abs_max;
   /** Copy of camera type. */
   eCameraType camera_type;
+  /** Weights of spatial filtering in stabilize pass. Not array to avoid alignment restriction. */
+  float4 filter_samples_weight;
+  float filter_center_weight;
   /** Max number of sprite in the scatter pass for each ground. */
   int scatter_max_rect;
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc
index 68c855b9bc5..c195f68380c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@@ -151,7 +151,7 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
   GPUTexture *output_tx = postfx_tx_;
 
   /* Swapping is done internally. Actual output is set to the next input. */
-  inst_.depth_of_field.render(&input_tx, &output_tx);
+  inst_.depth_of_field.render(&input_tx, &output_tx, dof_buffer_);
   inst_.motion_blur.render(&input_tx, &output_tx);
 
   return input_tx;
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.hh b/source/blender/draw/engines/eevee_next/eevee_view.hh
index ee169bf418e..65f27aba795 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_view.hh
@@ -44,6 +44,7 @@ class ShadingView {
   /** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */
   // RaytraceBuffer rt_buffer_opaque_;
   // RaytraceBuffer rt_buffer_refract_;
+  DepthOfFieldBuffer dof_buffer_;
 
   Framebuffer prepass_fb_;
   Framebuffer combined_fb_;
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
new file mode 100644
index 00000000000..d5fdaae6fc1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
@@ -0,0 +1,37 @@
+
+/* -------------------------------------------------------------------- */
+/** \name YCoCg
+ * \{ */
+
+vec3 colorspace_YCoCg_from_scene_linear(vec3 rgb_color)
+{
+  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
+                                            vec3(2, 0, -2),    /* Co */
+                                            vec3(-1, 2, -1))); /* Cg */
+  return colorspace_tx * rgb_color;
+}
+
+vec4 colorspace_YCoCg_from_scene_linear(vec4 rgba_color)
+{
+  return vec4(colorspace_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
+}
+
+vec3 colorspace_scene_linear_from_YCoCg(vec3 ycocg_color)
+{
+  float Y = ycocg_color.x;
+  float Co = ycocg_color.y;
+  float Cg = ycocg_color.z;
+
+  vec3 rgb_color;
+  rgb_color.r = Y + Co - Cg;
+  rgb_color.g = Y + Cg;
+  rgb_color.b = Y - Co - Cg;
+  return rgb_color * 0.25;
+}
+
+vec4 colorspace_scene_linear_from_YCoCg(vec4 ycocg_color)
+{
+  return vec4(colorspace_scene_linear_from_YCoCg(ycocg_color.rgb), ycocg_color.a);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
index 88ecaab6a00..bf7c9413da3 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
@@ -15,8 +15,9 @@ struct FilterSample {
 /** \name Pixel cache.
  * \{ */
 
-shared vec4 color_cache[10][10];
-shared float weight_cache[10][10];
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float weight_cache[cache_size][cache_size];
 
 void cache_init()
 {
@@ -40,11 +41,12 @@ void cache_init()
    */
 
   ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1;
-  for (int y = 0; y < 2; y++) {
-    for (int x = 0; x < 2; x++) {
-      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(5)))) {
-        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + ivec2(x, y) * 5;
-        ivec2 load_texel = clamp(texel + ivec2(x, y) * 5, ivec2(0), textureSize(color_tx, 0) - 1);
+  if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+    for (int y = 0; y < 2; y++) {
+      for (int x = 0; x < 2; x++) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset, ivec2(0), textureSize(color_tx, 0) - 1);
 
         color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0);
         weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r;
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
index 88a577a1c3c..622b545357e 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
@@ -73,9 +73,10 @@ float fast_luma(vec3 color)
   return (2.0 * color.g) + color.r + color.b;
 }
 
-shared vec4 color_cache[8][8];
-shared float coc_cache[8][8];
-shared float do_scatter[8][8];
+const uint cache_size = gl_WorkGroupSize.x;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+shared float do_scatter[cache_size][cache_size];
 
 void main()
 {
@@ -200,9 +201,9 @@ void main()
   imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);
 
   /* Recursive downsample. */
-  for (uint i = 1u; i < DOF_MIP_MAX; i++) {
+  for (uint i = 1u; i < DOF_MIP_COUNT; i++) {
     barrier();
-    if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_MAX - 1u - i))))) {
+    if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_COUNT - 1u - i))))) {
       uvec2 texel_local = gl_LocalInvocationID.xy << i;
 
       /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
index ac371f76395..254cacc45b7 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
@@ -2,8 +2,11 @@
 /**
  * Temporal Stabilization of the Depth of field input.
  * Corresponds to the TAA pass in the paper.
+ * We actually duplicate the TAA logic but with a few changes:
+ * - We run this pass at half resolution.
+ * - We store CoC instead of Opacity in the alpha channel of the history.
  *
- * TODO: This pass needs a cleanup / improvement using much better TAA.
+ * This is and adaption of the code found in eevee_film_lib.glsl
  *
  * Inputs:
  * - Output of setup pass (halfres).
@@ -11,54 +14,362 @@
  * - Stabilized Color and CoC (halfres).
  **/
 
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
 
-float fast_luma(vec3 color)
+struct DofSample {
+  vec4 color;
+  float coc;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name LDS Cache
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+/* Need 2 pixel border for depth. */
+const uint cache_depth_size = gl_WorkGroupSize.x + 4;
+shared float depth_cache[cache_depth_size][cache_depth_size];
+
+void dof_cache_init()
+{
+  /**
+   * Load enough values into LDS to perform the filter.
+   *
+   * ┌──────────────────────────────┐
+   * │                              │  < Border texels that needs to be loaded.
+   * │    x  x  x  x  x  x  x  x    │  ─┐
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │ Thread Group Size 8x8.
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │  ─┘
+   * │ L  L  L  L  L                │  < Border texels that needs to be loaded.
+   * └──────────────────────────────┘
+   *   └───────────┘
+   *    Load using 5x5 threads.
+   */
+
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  for (int y = 0; y < 2; y++) {
+    for (int x = 0; x < 2; x++) {
+      /* 1 Pixel border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1);
+
+        vec4 color = texelFetch(color_tx, load_texel, 0);
+        color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color);
+        coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x;
+      }
+      /* 2 Pixels border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        /* Depth is fullres. Load every 2 pixels. */
+        ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1);
+
+        depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x;
+      }
+    }
+  }
+  barrier();
+}
+
+/* Note: Sample color space is already in YCoCg space. */
+DofSample dof_fetch_input_sample(ivec2 offset)
+{
+  ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy);
+  return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]);
+}
+
+float dof_fetch_half_depth(ivec2 offset)
+{
+  ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy);
+  return depth_cache[coord.y][coord.x];
+}
+
+/** \} */
+
+float dof_luma_weight(float luma)
 {
-  return (2.0 * color.g) + color.r + color.b;
+  /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+  /* To preserve more details in dark areas, we use a bigger bias. */
+  const float exposure_scale = 1.0; /* TODO. */
+  return 1.0 / (4.0 + luma * exposure_scale);
 }
 
-/* Lightweight version of neighborhood clamping found in TAA. */
-vec3 dof_neighborhood_clamping(vec3 color)
+float dof_bilateral_weight(float reference_coc, float sample_coc)
 {
-  vec2 texel_size = 1.0 / vec2(textureSize(color_tx, 0));
-  vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) * texel_size;
-  vec4 ofs = vec4(-1, 1, -1, 1) * texel_size.xxyy;
+  /* NOTE: The difference between the cocs should be inside a abs() function,
+   * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19).
+   * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */
+  return saturate(1.0 - (reference_coc - sample_coc) / max(1.0, abs(reference_coc)));
+}
 
-  /* Luma clamping. 3x3 square neighborhood. */
-  float c00 = fast_luma(textureLod(color_tx, uv + ofs.xz, 0.0).rgb);
-  float c01 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(1.0, 0.0), 0.0).rgb);
-  float c02 = fast_luma(textureLod(color_tx, uv + ofs.xw, 0.0).rgb);
+DofSample dof_spatial_filtering()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  DofSample center = dof_fetch_input_sample(ivec2(0));
+  DofSample accum = DofSample(vec4(0.0), 0.0);
+  float accum_weight = 0.0;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) *
+                   dof_bilateral_weight(center.coc, samp.coc);
 
-  float c10 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(0.0, 1.0), 0.0).rgb);
-  float c11 = fast_luma(color);
-  float c12 = fast_luma(textureLod(color_tx, uv + ofs.xw * vec2(0.0, 1.0), 0.0).rgb);
+    accum.color += samp.color * weight;
+    accum.coc += samp.coc * weight;
+    accum_weight += weight;
+  }
+  /* Accumulate center sample last as it does not need bilateral_weights. */
+  float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x);
+  accum.color += center.color * weight;
+  accum.coc += center.coc * weight;
+  accum_weight += weight;
 
-  float c20 = fast_luma(textureLod(color_tx, uv + ofs.yz, 0.0).rgb);
-  float c21 = fast_luma(textureLod(color_tx, uv + ofs.yz * vec2(1.0, 0.0), 0.0).rgb);
-  float c22 = fast_luma(textureLod(color_tx, uv + ofs.yw, 0.0).rgb);
+  float rcp_weight = 1.0 / accum_weight;
+  accum.color *= rcp_weight;
+  accum.coc *= rcp_weight;
+  return accum;
+}
 
-  float avg_luma = avg8(c00, c01, c02, c10, c12, c20, c21, c22);
-  float max_luma = max8(c00, c01, c02, c10, c12, c20, c21, c22);
+struct DofNeighborhoodMinMax {
+  DofSample min;
+  DofSample max;
+};
 
-  float upper_bound = mix(max_luma, avg_luma, dof_buf.denoise_factor);
-  upper_bound = mix(c11, upper_bound, dof_buf.denoise_factor);
+/* Return history clipping bounding box in YCoCg color space. */
+DofNeighborhoodMinMax dof_neighbor_boundbox()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  /**
+   * Simple bounding box calculation in YCoCg as described in:
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+   */
+  DofSample min_c = dof_fetch_input_sample(ivec2(0));
+  DofSample max_c = min_c;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    min_c.color = min(min_c.color, samp.color);
+    max_c.color = max(max_c.color, samp.color);
+    min_c.coc = min(min_c.coc, samp.coc);
+    max_c.coc = max(max_c.coc, samp.coc);
+  }
+  /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+   * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+  DofSample min_c_3x3 = min_c;
+  DofSample max_c_3x3 = max_c;
+  const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(corners[i]);
+    min_c_3x3.color = min(min_c_3x3.color, samp.color);
+    max_c_3x3.color = max(max_c_3x3.color, samp.color);
+    min_c_3x3.coc = min(min_c_3x3.coc, samp.coc);
+    max_c_3x3.coc = max(max_c_3x3.coc, samp.coc);
+  }
+  min_c.color = (min_c.color + min_c_3x3.color) * 0.5;
+  max_c.color = (max_c.color + max_c_3x3.color) * 0.5;
+  min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5;
+  max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5;
 
-  float clamped_luma = min(upper_bound, c11);
+  return DofNeighborhoodMinMax(min_c, max_c);
+}
 
-  return color * clamped_luma * safe_rcp(c11);
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
+{
+  /**
+   * Dilate velocity by using the nearest pixel in a cross pattern.
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+   */
+  const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+  float min_depth = dof_fetch_half_depth(ivec2(0));
+  ivec2 nearest_texel = ivec2(0);
+  for (int i = 0; i < 4; i++) {
+    float depth = dof_fetch_half_depth(corners[i]);
+    if (min_depth > depth) {
+      min_depth = depth;
+      nearest_texel = corners[i];
+    }
+  }
+  /* Convert to full resolution buffer pixel. */
+  ivec2 velocity_texel = (texel_sample + nearest_texel) * 2;
+  velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1);
+  vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth);
+  /* Transform to **half** pixel space. */
+  return vector.xy * vec2(textureSize(color_tx, 0));
+}
+
+/* Load color using a special filter to avoid loosing detail.
+ * \a texel is sample position with subpixel accuracy. */
+DofSample dof_sample_history(vec2 input_texel)
+{
+#if 1 /* Bilinar. */
+  vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0);
+  vec4 color = textureLod(in_history_tx, uv, 0.0);
+
+#elif 0 /* Catmull Rom interpolation. 5 Bilinear Taps. */
+  vec2 center_texel;
+  vec2 inter_texel = modf(input_texel, center_texel);
+  vec2 weights[4];
+  film_get_catmull_rom_weights(inter_texel, weights);
+
+  /**
+   * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+   * corner taps.
+   * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+   * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+   */
+  center_texel += 0.5;
+
+  /* Slide 92. */
+  vec2 weight_12 = weights[1] + weights[2];
+  vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+  vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+  vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+  vec4 color;
+  vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+  float weight_center = weight_12.x * weight_12.y;
+
+  color = textureLod(in_history_tx, uv_12, 0.0) * weight_center;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+  color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+  color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+  /* Re-normalize for the removed corners. */
+  color /= (weight_center + sum(weight_cross));
+#endif
+  /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */
+  return DofSample(color.xyzz, color.w);
+}
+
+/* 1D equivalent of line_aabb_clipping_dist(). */
+float dof_aabb_clipping_dist_coc(float origin, float direction, float aabb_min, float aabb_max)
+{
+  if (abs(direction) < 1e-5) {
+    return 0.0;
+  }
+  float nearest_plane = (direction > 0.0) ? aabb_min : aabb_max;
+  return (nearest_plane - origin) / direction;
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src)
+{
+  /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+  DofSample clip_dir;
+  clip_dir.color = src.color - history.color;
+  clip_dir.coc = src.coc - history.coc;
+
+  float t = line_aabb_clipping_dist(
+      history.color.rgb, clip_dir.color.rgb, bbox.min.color.rgb, bbox.max.color.rgb);
+  history.color.rgb += clip_dir.color.rgb * saturate(t);
+
+  /* Clip CoC on its own to avoid interference with other chanels. */
+  float t_a = dof_aabb_clipping_dist_coc(history.coc, clip_dir.coc, bbox.min.coc, bbox.max.coc);
+  history.coc += clip_dir.coc * saturate(t_a);
+
+  return history;
+}
+
+float dof_history_blend_factor(
+    float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst)
+{
+  float luma_min = bbox.min.color.x;
+  float luma_max = bbox.max.color.x;
+  float luma_incoming = src.color.x;
+  float luma_history = dst.color.x;
+
+  /* 5% of incoming color by default. */
+  float blend = 0.05;
+  /* Blend less history if the pixel has substential velocity. */
+  /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */
+  blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0));
+  /**
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+   * Bias towards history if incomming pixel is near clamping. Reduces flicker.
+   */
+  float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+  /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+  distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+  /* Linearly blend when history gets bellow to 25% of the bbox size. */
+  blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+  /* Progressively discard history until history CoC is twice as big as the filtered CoC.
+   * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to
+   * dilate thin features like hair (slide 19). */
+  float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc)));
+  blend = mix(blend, 1.0, coc_diff_ratio);
+  /* Discard out of view history. */
+  if (any(lessThan(texel, vec2(0))) ||
+      any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) {
+    blend = 1.0;
+  }
+  /* Discard history if invalid. */
+  if (use_history == false) {
+    blend = 1.0;
+  }
+  return blend;
 }
 
 void main()
 {
-  vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(textureSize(color_tx, 0).xy);
-  vec4 out_color = textureLod(color_tx, uv, 0.0);
-  float out_coc = textureLod(coc_tx, uv, 0.0).r;
+  dof_cache_init();
+
+  ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /**
+   * Naming convention is taken from the film implementation.
+   * SRC is incoming new data.
+   * DST is history data.
+   */
+  DofSample src = dof_spatial_filtering();
+
+  /* Reproject by finding where this pixel was in the previous frame. */
+  vec2 motion = dof_pixel_history_motion_vector(src_texel);
+  vec2 history_texel = vec2(src_texel) + motion;
+
+  float velocity = length(motion);
+
+  DofSample dst = dof_sample_history(history_texel);
+
+  /* Get local color bounding box of source neighboorhood. */
+  DofNeighborhoodMinMax bbox = dof_neighbor_boundbox();
+
+  float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst);
+
+  dst = dof_amend_history(bbox, dst, src);
+
+  /* Luma weighted blend to reduce flickering. */
+  float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend);
+  float weight_src = dof_luma_weight(src.color.x) * (blend);
+
+  DofSample result;
+  /* Weighted blend. */
+  result.color = vec4(dst.color.rgb, dst.coc) * weight_dst +
+                 vec4(src.color.rgb, src.coc) * weight_src;
+  result.color /= weight_src + weight_dst;
+
+  /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */
+  imageStore(out_history_img, src_texel, result.color);
+
+  /* Un-swizzle. */
+  result.coc = result.color.a;
+  /* Clamp opacity since we don't store it in history. */
+  result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a);
 
-  out_color.rgb = dof_neighborhood_clamping(out_color.rgb);
-  /* TODO(fclem): Stabilize CoC. */
+  result.color = colorspace_scene_linear_from_YCoCg(result.color);
 
-  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
-  imageStore(out_color_img, out_texel, out_color);
-  imageStore(out_coc_img, out_texel, vec4(out_coc));
+  imageStore(out_color_img, src_texel, result.color);
+  imageStore(out_coc_img, src_texel, vec4(result.coc));
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
index 08027f2ef6c..bf6293d5561 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@@ -7,6 +7,7 @@
 #pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
 
 /* Return scene linear Z depth from the camera or radial depth for panoramic cameras. */
 float film_depth_convert_to_scene(float depth)
@@ -18,32 +19,6 @@ float film_depth_convert_to_scene(float depth)
   return abs(get_view_z_from_depth(depth));
 }
 
-vec3 film_YCoCg_from_scene_linear(vec3 rgb_color)
-{
-  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
-                                            vec3(2, 0, -2),    /* Co */
-                                            vec3(-1, 2, -1))); /* Cg */
-  return colorspace_tx * rgb_color;
-}
-
-vec4 film_YCoCg_from_scene_linear(vec4 rgba_color)
-{
-  return vec4(film_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
-}
-
-vec3 film_scene_linear_from_YCoCg(vec3 ycocg_color)
-{
-  float Y = ycocg_color.x;
-  float Co = ycocg_color.y;
-  float Cg = ycocg_color.z;
-
-  vec3 rgb_color;
-  rgb_color.r = Y + Co - Cg;
-  rgb_color.g = Y + Cg;
-  rgb_color.b = Y - Co - Cg;
-  return rgb_color * 0.25;
-}
-
 /* Load a texture sample in a specific format. Combined pass needs to use this. */
 vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
 {
@@ -51,7 +26,7 @@ vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
   /* Convert transmittance to opacity. */
   color.a = saturate(1.0 - color.a);
   /* Transform to YCoCg for accumulation. */
-  color.rgb = film_YCoCg_from_scene_linear(color.rgb);
+  color.rgb = colorspace_YCoCg_from_scene_linear(color.rgb);
   return color;
 }
 
@@ -220,7 +195,7 @@ vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
   float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
   ivec2 nearest_texel = texel_sample;
   for (int i = 0; i < 4; i++) {
-    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy);
+    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy - 1);
     float depth = texelFetch(depth_tx, texel, 0).x;
     if (min_depth > depth) {
       min_depth = depth;
@@ -455,7 +430,7 @@ void film_store_combined(
     // dst.weight = film_weight_load(texel_combined);
 
     color_dst = film_sample_catmull_rom(in_combined_tx, history_texel);
-    color_dst.rgb = film_YCoCg_from_scene_linear(color_dst.rgb);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
 
     /* Get local color bounding box of source neighborhood. */
     vec4 min_color, max_color;
@@ -473,7 +448,7 @@ void film_store_combined(
   else {
     /* Everything is static. Use render accumulation. */
     color_dst = texelFetch(in_combined_tx, dst.texel, 0);
-    color_dst.rgb = film_YCoCg_from_scene_linear(color_dst.rgb);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
 
     /* Luma weighted blend to avoid flickering. */
     weight_dst = film_luma_weight(color_dst.x) * dst.weight;
@@ -483,7 +458,7 @@ void film_store_combined(
   color = color_dst * weight_dst + color_src * weight_src;
   color /= weight_src + weight_dst;
 
-  color.rgb = film_scene_linear_from_YCoCg(color.rgb);
+  color.rgb = colorspace_scene_linear_from_YCoCg(color.rgb);
 
   /* Fix alpha not accumulating to 1 because of float imprecision. */
   if (color.a > 0.995) {
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
index c95c0877c88..1dd9178ae84 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
@@ -29,14 +29,18 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
 
 GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
     .do_static_compilation(true)
-    .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
-    .additional_info("eevee_shared", "draw_view")
-    .uniform_buf(1, "DepthOfFieldData", "dof_buf")
-    .sampler(0, ImageType::DEPTH_2D, "coc_tx")
+    .local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+    .uniform_buf(4, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::FLOAT_2D, "coc_tx")
     .sampler(1, ImageType::FLOAT_2D, "color_tx")
-    // .sampler(2, ImageType::FLOAT_2D, "velocity_tx") /* TODO: TAA with reprojection. */
+    .sampler(2, ImageType::FLOAT_2D, "velocity_tx")
+    .sampler(3, ImageType::FLOAT_2D, "in_history_tx")
+    .sampler(4, ImageType::DEPTH_2D, "depth_tx")
+    .push_constant(Type::BOOL, "use_history")
     .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
     .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_history_img")
     .compute_source("eevee_depth_of_field_stabilize_comp.glsl");
 
 GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample)