299 files changed, 22316 insertions, 3931 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 9cb3743dd02..e6b532ed25a 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -23,11 +23,11 @@ set(INC
   ../nodes
   ../render
   ../render/intern
+  ../compositor/realtime_compositor
   ../windowmanager
 
   ../../../intern/atomic
   ../../../intern/clog
-  ../../../intern/glew-mx
   ../../../intern/guardedalloc
   ../../../intern/opensubdiv
 
@@ -67,28 +67,27 @@ set(SRC
   intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
   intern/mesh_extractors/extract_mesh_vbo_tan.cc
   intern/mesh_extractors/extract_mesh_vbo_uv.cc
-  intern/mesh_extractors/extract_mesh_vbo_vcol.cc
   intern/mesh_extractors/extract_mesh_vbo_weights.cc
   intern/draw_attributes.cc
   intern/draw_cache_impl_curve.cc
   intern/draw_cache_impl_curves.cc
-  intern/draw_cache_impl_displist.c
   intern/draw_cache_impl_gpencil.c
   intern/draw_cache_impl_lattice.c
   intern/draw_cache_impl_mesh.cc
-  intern/draw_cache_impl_metaball.c
   intern/draw_cache_impl_particles.c
-  intern/draw_cache_impl_pointcloud.c
+  intern/draw_cache_impl_pointcloud.cc
   intern/draw_cache_impl_subdivision.cc
   intern/draw_cache_impl_volume.c
   intern/draw_color_management.cc
+  intern/draw_command.cc
   intern/draw_common.c
   intern/draw_curves.cc
-  intern/draw_debug.c
+  intern/draw_debug.cc
   intern/draw_fluid.c
   intern/draw_hair.cc
   intern/draw_instance_data.c
   intern/draw_manager.c
+  intern/draw_manager.cc
   intern/draw_manager_data.c
   intern/draw_manager_exec.c
   intern/draw_manager_profiling.c
@@ -104,6 +103,7 @@ set(SRC
   intern/smaa_textures.c
   engines/basic/basic_engine.c
   engines/basic/basic_shader.c
+  engines/compositor/compositor_engine.cc
   engines/image/image_engine.cc
   engines/image/image_shader.cc
   engines/eevee/eevee_bloom.c
@@ -135,10 +135,18 @@ set(SRC
   engines/eevee/eevee_temporal_sampling.c
   engines/eevee/eevee_volumes.c
   engines/eevee_next/eevee_camera.cc
+  engines/eevee_next/eevee_cryptomatte.cc
+  engines/eevee_next/eevee_depth_of_field.cc
   engines/eevee_next/eevee_engine.cc
+  engines/eevee_next/eevee_film.cc
+  engines/eevee_next/eevee_hizbuffer.cc
   engines/eevee_next/eevee_instance.cc
+  engines/eevee_next/eevee_light.cc
   engines/eevee_next/eevee_material.cc
+  engines/eevee_next/eevee_motion_blur.cc
   engines/eevee_next/eevee_pipeline.cc
+  engines/eevee_next/eevee_renderbuffers.cc
+  engines/eevee_next/eevee_sampling.cc
   engines/eevee_next/eevee_shader.cc
   engines/eevee_next/eevee_sync.cc
   engines/eevee_next/eevee_velocity.cc
@@ -191,6 +199,7 @@ set(SRC
   engines/overlay/overlay_paint.c
   engines/overlay/overlay_particle.c
   engines/overlay/overlay_sculpt.c
+  engines/overlay/overlay_sculpt_curves.cc
   engines/overlay/overlay_shader.c
   engines/overlay/overlay_volume.c
   engines/overlay/overlay_wireframe.c
@@ -205,31 +214,58 @@ set(SRC
   intern/draw_cache_impl.h
   intern/draw_cache_inline.h
   intern/draw_color_management.h
+  intern/draw_command.hh
   intern/draw_common.h
   intern/draw_common_shader_shared.h
   intern/draw_curves_private.h
   intern/draw_debug.h
+  intern/draw_debug.hh
   intern/draw_hair_private.h
+  intern/draw_handle.hh
   intern/draw_instance_data.h
   intern/draw_manager.h
+  intern/draw_manager.hh
   intern/draw_manager_profiling.h
   intern/draw_manager_testing.h
   intern/draw_manager_text.h
+  intern/draw_pass.hh
+  intern/draw_resource.cc
+  intern/draw_resource.hh
   intern/draw_shader.h
   intern/draw_shader_shared.h
+  intern/draw_state.h
   intern/draw_subdivision.h
   intern/draw_texture_pool.h
+  intern/draw_view.cc
   intern/draw_view.h
+  intern/draw_view.hh
   intern/draw_view_data.h
   intern/mesh_extractors/extract_mesh.hh
   intern/smaa_textures.h
   engines/basic/basic_engine.h
   engines/basic/basic_private.h
+  engines/compositor/compositor_engine.h
   engines/eevee/eevee_engine.h
   engines/eevee/eevee_lightcache.h
   engines/eevee/eevee_lut.h
   engines/eevee/eevee_private.h
+  engines/eevee_next/eevee_camera.hh
+  engines/eevee_next/eevee_depth_of_field.hh
   engines/eevee_next/eevee_engine.h
+  engines/eevee_next/eevee_film.hh
+  engines/eevee_next/eevee_hizbuffer.hh
+  engines/eevee_next/eevee_instance.hh
+  engines/eevee_next/eevee_light.hh
+  engines/eevee_next/eevee_material.hh
+  engines/eevee_next/eevee_motion_blur.hh
+  engines/eevee_next/eevee_pipeline.hh
+  engines/eevee_next/eevee_renderbuffers.hh
+  engines/eevee_next/eevee_sampling.hh
+  engines/eevee_next/eevee_shader.hh
+  engines/eevee_next/eevee_sync.hh
+  engines/eevee_next/eevee_velocity.hh
+  engines/eevee_next/eevee_view.hh
+  engines/eevee_next/eevee_world.hh
   engines/external/external_engine.h
   engines/image/image_batches.hh
   engines/image/image_buffer_cache.hh
@@ -256,6 +292,7 @@ set(SRC
 set(LIB
   bf_blenkernel
   bf_blenlib
+  bf_realtime_compositor
   bf_windowmanager
 )
 
@@ -336,6 +373,7 @@ set(GLSL_SRC
   engines/eevee/shaders/raytrace_lib.glsl
   engines/eevee/shaders/renderpass_lib.glsl
   engines/eevee/shaders/renderpass_postprocess_frag.glsl
+  engines/eevee/shaders/cryptomatte_lib.glsl
   engines/eevee/shaders/cryptomatte_frag.glsl
   engines/eevee/shaders/cryptomatte_vert.glsl
   engines/eevee/shaders/ltc_lib.glsl
@@ -350,6 +388,7 @@ set(GLSL_SRC
   engines/eevee/shaders/volumetric_frag.glsl
   engines/eevee/shaders/volumetric_geom.glsl
   engines/eevee/shaders/volumetric_vert.glsl
+  engines/eevee/shaders/volumetric_resolve_comp.glsl
   engines/eevee/shaders/volumetric_resolve_frag.glsl
   engines/eevee/shaders/volumetric_scatter_frag.glsl
   engines/eevee/shaders/volumetric_integration_frag.glsl
@@ -357,18 +396,54 @@ set(GLSL_SRC
 
   engines/eevee_next/shaders/eevee_attributes_lib.glsl
   engines/eevee_next/shaders/eevee_camera_lib.glsl
+  engines/eevee_next/shaders/eevee_colorspace_lib.glsl
+  engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl
+  engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
+  engines/eevee_next/shaders/eevee_film_comp.glsl
+  engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl
+  engines/eevee_next/shaders/eevee_film_frag.glsl
+  engines/eevee_next/shaders/eevee_film_lib.glsl
   engines/eevee_next/shaders/eevee_geom_curves_vert.glsl
   engines/eevee_next/shaders/eevee_geom_gpencil_vert.glsl
   engines/eevee_next/shaders/eevee_geom_mesh_vert.glsl
   engines/eevee_next/shaders/eevee_geom_world_vert.glsl
+  engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
+  engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
+  engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
+  engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
+  engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
+  engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
+  engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
+  engines/eevee_next/shaders/eevee_light_eval_lib.glsl
+  engines/eevee_next/shaders/eevee_light_iter_lib.glsl
+  engines/eevee_next/shaders/eevee_light_lib.glsl
+  engines/eevee_next/shaders/eevee_ltc_lib.glsl
+  engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
+  engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
+  engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
+  engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
   engines/eevee_next/shaders/eevee_nodetree_lib.glsl
+  engines/eevee_next/shaders/eevee_sampling_lib.glsl
   engines/eevee_next/shaders/eevee_surf_deferred_frag.glsl
   engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
   engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
   engines/eevee_next/shaders/eevee_surf_lib.glsl
   engines/eevee_next/shaders/eevee_surf_world_frag.glsl
   engines/eevee_next/shaders/eevee_velocity_lib.glsl
-  engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl
 
   engines/eevee_next/eevee_defines.hh
   engines/eevee_next/eevee_shader_shared.hh
@@ -403,22 +478,25 @@ set(GLSL_SRC
 
   engines/workbench/workbench_shader_shared.h
 
+  intern/shaders/common_aabb_lib.glsl
   intern/shaders/common_attribute_lib.glsl
   intern/shaders/common_colormanagement_lib.glsl
+  intern/shaders/common_debug_draw_lib.glsl
+  intern/shaders/common_debug_print_lib.glsl
+  intern/shaders/common_debug_shape_lib.glsl
+  intern/shaders/common_fullscreen_vert.glsl
+  intern/shaders/common_fxaa_lib.glsl
   intern/shaders/common_globals_lib.glsl
   intern/shaders/common_gpencil_lib.glsl
-  intern/shaders/common_pointcloud_lib.glsl
   intern/shaders/common_hair_lib.glsl
-  intern/shaders/common_hair_refine_vert.glsl
   intern/shaders/common_hair_refine_comp.glsl
-  intern/shaders/common_math_lib.glsl
+  intern/shaders/common_hair_refine_vert.glsl
+  intern/shaders/common_intersect_lib.glsl
   intern/shaders/common_math_geom_lib.glsl
-  intern/shaders/common_view_clipping_lib.glsl
-  intern/shaders/common_view_lib.glsl
-  intern/shaders/common_fxaa_lib.glsl
+  intern/shaders/common_math_lib.glsl
+  intern/shaders/common_pointcloud_lib.glsl
+  intern/shaders/common_shape_lib.glsl
   intern/shaders/common_smaa_lib.glsl
-  intern/shaders/common_fullscreen_vert.glsl
-
   intern/shaders/common_subdiv_custom_data_interp_comp.glsl
   intern/shaders/common_subdiv_ibo_lines_comp.glsl
   intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -431,8 +509,20 @@ set(GLSL_SRC
   intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
   intern/shaders/common_subdiv_vbo_lnor_comp.glsl
   intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
-
+  intern/shaders/common_view_clipping_lib.glsl
+  intern/shaders/common_view_lib.glsl
+  intern/shaders/draw_command_generate_comp.glsl
+  intern/shaders/draw_debug_draw_display_frag.glsl
+  intern/shaders/draw_debug_draw_display_vert.glsl
+  intern/shaders/draw_debug_info.hh
+  intern/shaders/draw_debug_print_display_frag.glsl
+  intern/shaders/draw_debug_print_display_vert.glsl
+  intern/shaders/draw_resource_finalize_comp.glsl
+  intern/shaders/draw_visibility_comp.glsl
+
+  intern/draw_command_shared.hh
   intern/draw_common_shader_shared.h
+  intern/draw_defines.h
   intern/draw_shader_shared.h
 
   engines/gpencil/shaders/gpencil_frag.glsl
@@ -454,6 +544,7 @@ set(GLSL_SRC
 
   engines/basic/shaders/basic_conservative_depth_geom.glsl
   engines/basic/shaders/basic_depth_vert.glsl
+  engines/basic/shaders/basic_depth_curves_vert.glsl
   engines/basic/shaders/basic_depth_pointcloud_vert.glsl
   engines/basic/shaders/basic_depth_frag.glsl
 
@@ -531,6 +622,7 @@ set(GLSL_SRC
   engines/overlay/shaders/overlay_motion_path_line_vert.glsl
   engines/overlay/shaders/overlay_motion_path_point_vert.glsl
   engines/overlay/shaders/overlay_outline_detect_frag.glsl
+  engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl
   engines/overlay/shaders/overlay_outline_prepass_frag.glsl
   engines/overlay/shaders/overlay_outline_prepass_geom.glsl
   engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
@@ -550,6 +642,8 @@ set(GLSL_SRC
   engines/overlay/shaders/overlay_particle_vert.glsl
   engines/overlay/shaders/overlay_point_varying_color_frag.glsl
   engines/overlay/shaders/overlay_point_varying_color_varying_outline_aa_frag.glsl
+  engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl
+  engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl
   engines/overlay/shaders/overlay_sculpt_mask_frag.glsl
   engines/overlay/shaders/overlay_sculpt_mask_vert.glsl
   engines/overlay/shaders/overlay_uniform_color_frag.glsl
@@ -633,6 +727,7 @@ add_dependencies(bf_draw bf_dna)
 if(WITH_GTESTS)
   if(WITH_OPENGL_DRAW_TESTS)
     set(TEST_SRC
+      tests/draw_pass_test.cc
       tests/draw_testing.cc
       tests/shaders_test.cc
 
diff --git a/source/blender/draw/engines/basic/basic_engine.c b/source/blender/draw/engines/basic/basic_engine.c
index 04a3c27959d..975d9e299bf 100644
--- a/source/blender/draw/engines/basic/basic_engine.c
+++ b/source/blender/draw/engines/basic/basic_engine.c
@@ -53,6 +53,7 @@ typedef struct BASIC_PrivateData {
   DRWShadingGroup *depth_shgrp[2];
   DRWShadingGroup *depth_shgrp_cull[2];
   DRWShadingGroup *depth_hair_shgrp[2];
+  DRWShadingGroup *depth_curves_shgrp[2];
   DRWShadingGroup *depth_pointcloud_shgrp[2];
   bool use_material_slot_selection;
 } BASIC_PrivateData; /* Transient data */
@@ -99,6 +100,9 @@ static void basic_cache_init(void *vedata)
     stl->g_data->depth_hair_shgrp[i] = grp = DRW_shgroup_create(
         BASIC_shaders_depth_sh_get(draw_ctx->sh_cfg), psl->depth_pass[i]);
 
+    stl->g_data->depth_curves_shgrp[i] = grp = DRW_shgroup_create(
+        BASIC_shaders_curves_depth_sh_get(draw_ctx->sh_cfg), psl->depth_pass[i]);
+
     sh = DRW_state_is_select() ? BASIC_shaders_depth_conservative_sh_get(draw_ctx->sh_cfg) :
                                  BASIC_shaders_depth_sh_get(draw_ctx->sh_cfg);
     state |= DRW_STATE_CULL_BACK;
@@ -156,8 +160,12 @@ static void basic_cache_populate(void *vedata, Object *ob)
     basic_cache_populate_particles(vedata, ob);
   }
 
-  /* Make flat object selectable in ortho view if wireframe is enabled. */
   const bool do_in_front = (ob->dtx & OB_DRAW_IN_FRONT) != 0;
+  if (ob->type == OB_CURVES) {
+    DRW_shgroup_curves_create_sub(ob, stl->g_data->depth_curves_shgrp[do_in_front], NULL);
+  }
+
+  /* Make flat object selectable in ortho view if wireframe is enabled. */
   if ((draw_ctx->v3d->overlay.flag & V3D_OVERLAY_WIREFRAMES) ||
       (draw_ctx->v3d->shading.type == OB_WIRE) || (ob->dtx & OB_DRAWWIRE) || (ob->dt == OB_WIRE)) {
     int flat_axis = 0;
diff --git a/source/blender/draw/engines/basic/basic_private.h b/source/blender/draw/engines/basic/basic_private.h
index 22b458baca2..197831b9ee8 100644
--- a/source/blender/draw/engines/basic/basic_private.h
+++ b/source/blender/draw/engines/basic/basic_private.h
@@ -11,6 +11,7 @@ extern "C" {
 
 GPUShader *BASIC_shaders_depth_sh_get(eGPUShaderConfig config);
 GPUShader *BASIC_shaders_pointcloud_depth_sh_get(eGPUShaderConfig config);
+GPUShader *BASIC_shaders_curves_depth_sh_get(eGPUShaderConfig config);
 GPUShader *BASIC_shaders_depth_conservative_sh_get(eGPUShaderConfig config);
 GPUShader *BASIC_shaders_pointcloud_depth_conservative_sh_get(eGPUShaderConfig config);
 void BASIC_shaders_free(void);
diff --git a/source/blender/draw/engines/basic/basic_shader.c b/source/blender/draw/engines/basic/basic_shader.c
index 3d40c627fff..5b7636ca9fd 100644
--- a/source/blender/draw/engines/basic/basic_shader.c
+++ b/source/blender/draw/engines/basic/basic_shader.c
@@ -24,6 +24,7 @@ typedef struct BASIC_Shaders {
   /* Depth Pre Pass */
   struct GPUShader *depth;
   struct GPUShader *pointcloud_depth;
+  struct GPUShader *curves_depth;
   struct GPUShader *depth_conservative;
   struct GPUShader *pointcloud_depth_conservative;
 } BASIC_Shaders;
@@ -53,6 +54,16 @@ GPUShader *BASIC_shaders_pointcloud_depth_sh_get(eGPUShaderConfig config)
   return sh_data->pointcloud_depth;
 }
 
+GPUShader *BASIC_shaders_curves_depth_sh_get(eGPUShaderConfig config)
+{
+  BASIC_Shaders *sh_data = &e_data.sh_data[config];
+  if (sh_data->curves_depth == NULL) {
+    sh_data->curves_depth = GPU_shader_create_from_info_name(
+        config == GPU_SHADER_CFG_CLIPPED ? "basic_depth_curves_clipped" : "basic_depth_curves");
+  }
+  return sh_data->curves_depth;
+}
+
 GPUShader *BASIC_shaders_depth_conservative_sh_get(eGPUShaderConfig config)
 {
   BASIC_Shaders *sh_data = &e_data.sh_data[config];
diff --git a/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl b/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl
new file mode 100644
index 00000000000..b0da9754fc6
--- /dev/null
+++ b/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl
@@ -0,0 +1,27 @@
+
+#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+
+void main()
+{
+  GPU_INTEL_VERTEX_SHADER_WORKAROUND
+
+  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  float time, thick_time, thickness;
+  vec3 world_pos, tan, binor;
+  hair_get_pos_tan_binor_time(is_persp,
+                              ModelMatrixInverse,
+                              ViewMatrixInverse[3].xyz,
+                              ViewMatrixInverse[2].xyz,
+                              world_pos,
+                              tan,
+                              binor,
+                              time,
+                              thickness,
+                              thick_time);
+
+  gl_Position = point_world_to_ndc(world_pos);
+
+  view_clipping_distances(world_pos);
+}
diff --git a/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh b/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh
index bae50eb48fa..561cef0e442 100644
--- a/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh
+++ b/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh
@@ -27,6 +27,9 @@ GPU_SHADER_CREATE_INFO(basic_pointcloud)
     .vertex_source("basic_depth_pointcloud_vert.glsl")
     .additional_info("draw_pointcloud");
 
+GPU_SHADER_CREATE_INFO(basic_curves)
+    .vertex_source("basic_depth_curves_vert.glsl")
+    .additional_info("draw_hair");
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -46,7 +49,8 @@ GPU_SHADER_CREATE_INFO(basic_pointcloud)
 
 #define BASIC_OBTYPE_VARIATIONS(prefix, ...) \
   BASIC_CONSERVATIVE_VARIATIONS(prefix##_mesh, "basic_mesh", __VA_ARGS__) \
-  BASIC_CONSERVATIVE_VARIATIONS(prefix##_pointcloud, "basic_pointcloud", __VA_ARGS__)
+  BASIC_CONSERVATIVE_VARIATIONS(prefix##_pointcloud, "basic_pointcloud", __VA_ARGS__) \
+  BASIC_CLIPPING_VARIATIONS(prefix##_curves, "basic_curves", __VA_ARGS__)
 
 /** \} */
 
diff --git a/source/blender/draw/engines/compositor/compositor_engine.cc b/source/blender/draw/engines/compositor/compositor_engine.cc
new file mode 100644
index 00000000000..f36a59a4ce6
--- /dev/null
+++ b/source/blender/draw/engines/compositor/compositor_engine.cc
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_listbase.h"
+#include "BLI_math_vec_types.hh"
+#include "BLI_string_ref.hh"
+#include "BLI_utildefines.h"
+
+#include "BLT_translation.h"
+
+#include "DNA_ID_enums.h"
+#include "DNA_scene_types.h"
+
+#include "DEG_depsgraph_query.h"
+
+#include "DRW_render.h"
+
+#include "IMB_colormanagement.h"
+
+#include "COM_context.hh"
+#include "COM_evaluator.hh"
+#include "COM_texture_pool.hh"
+
+#include "GPU_texture.h"
+
+namespace blender::draw::compositor {
+
+class TexturePool : public realtime_compositor::TexturePool {
+ public:
+  GPUTexture *allocate_texture(int2 size, eGPUTextureFormat format) override
+  {
+    DrawEngineType *owner = (DrawEngineType *)this;
+    return DRW_texture_pool_query_2d(size.x, size.y, format, owner);
+  }
+};
+
+class Context : public realtime_compositor::Context {
+ private:
+  /* A pointer to the info message of the compositor engine. This is a char array of size
+   * GPU_INFO_SIZE. The message is cleared prior to updating or evaluating the compositor. */
+  char *info_message_;
+
+ public:
+  Context(realtime_compositor::TexturePool &texture_pool, char *info_message)
+      : realtime_compositor::Context(texture_pool), info_message_(info_message)
+  {
+  }
+
+  const Scene *get_scene() const override
+  {
+    return DRW_context_state_get()->scene;
+  }
+
+  int2 get_output_size() override
+  {
+    return int2(float2(DRW_viewport_size_get()));
+  }
+
+  GPUTexture *get_output_texture() override
+  {
+    return DRW_viewport_texture_list_get()->color;
+  }
+
+  GPUTexture *get_input_texture(int UNUSED(view_layer), eScenePassType UNUSED(pass_type)) override
+  {
+    return get_output_texture();
+  }
+
+  StringRef get_view_name() override
+  {
+    const SceneRenderView *view = static_cast<SceneRenderView *>(
+        BLI_findlink(&get_scene()->r.views, DRW_context_state_get()->v3d->multiview_eye));
+    return view->name;
+  }
+
+  void set_info_message(StringRef message) const override
+  {
+    message.copy(info_message_, GPU_INFO_SIZE);
+  }
+};
+
+class Engine {
+ private:
+  TexturePool texture_pool_;
+  Context context_;
+  realtime_compositor::Evaluator evaluator_;
+  /* Stores the viewport size at the time the last compositor evaluation happened. See the
+   * update_viewport_size method for more information. */
+  int2 last_viewport_size_;
+
+ public:
+  Engine(char *info_message)
+      : context_(texture_pool_, info_message),
+        evaluator_(context_, node_tree()),
+        last_viewport_size_(context_.get_output_size())
+  {
+  }
+
+  /* Update the viewport size and evaluate the compositor. */
+  void draw()
+  {
+    update_viewport_size();
+    evaluator_.evaluate();
+  }
+
+  /* If the size of the viewport changed from the last time the compositor was evaluated, update
+   * the viewport size and reset the evaluator. That's because the evaluator compiles the node tree
+   * in a manner that is specifically optimized for the size of the viewport. This should be called
+   * before evaluating the compositor. */
+  void update_viewport_size()
+  {
+    if (last_viewport_size_ == context_.get_output_size()) {
+      return;
+    }
+
+    last_viewport_size_ = context_.get_output_size();
+
+    evaluator_.reset();
+  }
+
+  /* If the compositor node tree changed, reset the evaluator. */
+  void update(const Depsgraph *depsgraph)
+  {
+    if (DEG_id_type_updated(depsgraph, ID_NT)) {
+      evaluator_.reset();
+    }
+  }
+
+  /* Get a reference to the compositor node tree. */
+  static bNodeTree &node_tree()
+  {
+    return *DRW_context_state_get()->scene->nodetree;
+  }
+};
+
+}  // namespace blender::draw::compositor
+
+using namespace blender::draw::compositor;
+
+struct COMPOSITOR_Data {
+  DrawEngineType *engine_type;
+  DRWViewportEmptyList *fbl;
+  DRWViewportEmptyList *txl;
+  DRWViewportEmptyList *psl;
+  DRWViewportEmptyList *stl;
+  Engine *instance_data;
+  char info[GPU_INFO_SIZE];
+};
+
+static void compositor_engine_init(void *data)
+{
+  COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
+
+  if (!compositor_data->instance_data) {
+    compositor_data->instance_data = new Engine(compositor_data->info);
+  }
+}
+
+static void compositor_engine_free(void *instance_data)
+{
+  Engine *engine = static_cast<Engine *>(instance_data);
+  delete engine;
+}
+
+static void compositor_engine_draw(void *data)
+{
+  const COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
+  compositor_data->instance_data->draw();
+}
+
+static void compositor_engine_update(void *data)
+{
+  COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
+
+  /* Clear any info message that was set in a previous update. */
+  compositor_data->info[0] = '\0';
+
+  if (compositor_data->instance_data) {
+    compositor_data->instance_data->update(DRW_context_state_get()->depsgraph);
+  }
+}
+
+extern "C" {
+
+static const DrawEngineDataSize compositor_data_size = DRW_VIEWPORT_DATA_SIZE(COMPOSITOR_Data);
+
+DrawEngineType draw_engine_compositor_type = {
+    nullptr,                   /* next */
+    nullptr,                   /* prev */
+    N_("Compositor"),          /* idname */
+    &compositor_data_size,     /* vedata_size */
+    &compositor_engine_init,   /* engine_init */
+    nullptr,                   /* engine_free */
+    &compositor_engine_free,   /* instance_free */
+    nullptr,                   /* cache_init */
+    nullptr,                   /* cache_populate */
+    nullptr,                   /* cache_finish */
+    &compositor_engine_draw,   /* draw_scene */
+    &compositor_engine_update, /* view_update */
+    nullptr,                   /* id_update */
+    nullptr,                   /* render_to_image */
+    nullptr,                   /* store_metadata */
+};
+}
diff --git a/source/blender/draw/engines/compositor/compositor_engine.h b/source/blender/draw/engines/compositor/compositor_engine.h
new file mode 100644
index 00000000000..5de0de8a0b3
--- /dev/null
+++ b/source/blender/draw/engines/compositor/compositor_engine.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern DrawEngineType draw_engine_compositor_type;
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/engines/eevee/eevee_bloom.c b/source/blender/draw/engines/eevee/eevee_bloom.c
index d12ce7213f9..4528027a9ea 100644
--- a/source/blender/draw/engines/eevee/eevee_bloom.c
+++ b/source/blender/draw/engines/eevee/eevee_bloom.c
@@ -125,7 +125,8 @@ static DRWShadingGroup *eevee_create_bloom_pass(const char *name,
                                                 struct GPUShader *sh,
                                                 DRWPass **pass,
                                                 bool upsample,
-                                                bool resolve)
+                                                bool resolve,
+                                                bool resolve_add_base)
 {
   struct GPUBatch *quad = DRW_cache_fullscreen_quad_get();
 
@@ -141,7 +142,7 @@ static DRWShadingGroup *eevee_create_bloom_pass(const char *name,
   }
   if (resolve) {
     DRW_shgroup_uniform_vec3(grp, "bloomColor", effects->bloom_color, 1);
-    DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", true);
+    DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", resolve_add_base);
   }
 
   return grp;
@@ -193,18 +194,21 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved
                             EEVEE_shaders_bloom_downsample_get(use_antiflicker),
                             &psl->bloom_downsample_first,
                             false,
+                            false,
                             false);
     eevee_create_bloom_pass("Bloom Downsample",
                             effects,
                             EEVEE_shaders_bloom_downsample_get(false),
                             &psl->bloom_downsample,
                             false,
+                            false,
                             false);
     eevee_create_bloom_pass("Bloom Upsample",
                             effects,
                             EEVEE_shaders_bloom_upsample_get(use_highres),
                             &psl->bloom_upsample,
                             true,
+                            false,
                             false);
 
     grp = eevee_create_bloom_pass("Bloom Blit",
@@ -212,6 +216,7 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved
                                   EEVEE_shaders_bloom_blit_get(use_antiflicker),
                                   &psl->bloom_blit,
                                   false,
+                                  false,
                                   false);
     DRW_shgroup_uniform_vec4(grp, "curveThreshold", effects->bloom_curve_threshold, 1);
     DRW_shgroup_uniform_float(grp, "clampIntensity", &effects->bloom_clamp, 1);
@@ -221,6 +226,7 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved
                                   EEVEE_shaders_bloom_resolve_get(use_highres),
                                   &psl->bloom_resolve,
                                   true,
+                                  true,
                                   true);
   }
 }
@@ -304,13 +310,13 @@ void EEVEE_bloom_output_init(EEVEE_ViewLayerData *UNUSED(sldata),
                                 {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(txl->bloom_accum)});
 
   /* Create Pass and shgroup. */
-  DRWShadingGroup *grp = eevee_create_bloom_pass("Bloom Accumulate",
-                                                 effects,
-                                                 EEVEE_shaders_bloom_resolve_get(use_highres),
-                                                 &psl->bloom_accum_ps,
-                                                 true,
-                                                 true);
-  DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", false);
+  eevee_create_bloom_pass("Bloom Accumulate",
+                          effects,
+                          EEVEE_shaders_bloom_resolve_get(use_highres),
+                          &psl->bloom_accum_ps,
+                          true,
+                          true,
+                          false);
 }
 
 void EEVEE_bloom_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata)
diff --git a/source/blender/draw/engines/eevee/eevee_cryptomatte.c b/source/blender/draw/engines/eevee/eevee_cryptomatte.c
index 33063e14c03..d805a039e8f 100644
--- a/source/blender/draw/engines/eevee/eevee_cryptomatte.c
+++ b/source/blender/draw/engines/eevee/eevee_cryptomatte.c
@@ -25,7 +25,6 @@
  * they take into account to create the render passes. When accurate mode is off the number of
  * levels is used as the number of cryptomatte samples to take. When accuracy mode is on the number
  * of render samples is used.
- *
  */
 
 #include "DRW_engine.h"
@@ -94,7 +93,7 @@ BLI_INLINE int eevee_cryptomatte_pixel_stride(const ViewLayer *view_layer)
 /** \} */
 
 /* -------------------------------------------------------------------- */
-/** \name Init Renderpasses
+/** \name Init Render-Passes
  * \{ */
 
 void EEVEE_cryptomatte_renderpasses_init(EEVEE_Data *vedata)
@@ -249,7 +248,9 @@ void EEVEE_cryptomatte_object_curves_cache_populate(EEVEE_Data *vedata,
 {
   BLI_assert(ob->type == OB_CURVES);
   Material *material = BKE_object_material_get_eval(ob, CURVES_MATERIAL_NR);
-  eevee_cryptomatte_curves_cache_populate(vedata, sldata, ob, NULL, NULL, material);
+  DRWShadingGroup *grp = eevee_cryptomatte_shading_group_create(
+      vedata, sldata, ob, material, true);
+  DRW_shgroup_curves_create_sub(ob, grp, NULL);
 }
 
 void EEVEE_cryptomatte_particle_hair_cache_populate(EEVEE_Data *vedata,
@@ -420,27 +421,31 @@ void EEVEE_cryptomatte_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EE
 
 void EEVEE_cryptomatte_update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer)
 {
+  /* NOTE: Name channels lowercase rgba so that compression rules check in OpenEXR DWA code uses
+   * lossless compression. Reportedly this naming is the only one which works good from the
+   * interoperability point of view. Using XYZW naming is not portable. */
+
   char cryptomatte_pass_name[MAX_NAME];
   const short num_passes = eevee_cryptomatte_passes_per_layer(view_layer);
   if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_OBJECT) != 0) {
     for (short pass = 0; pass < num_passes; pass++) {
       BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoObject%02d", pass);
       RE_engine_register_pass(
-          engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA);
+          engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA);
     }
   }
   if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_MATERIAL) != 0) {
     for (short pass = 0; pass < num_passes; pass++) {
       BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoMaterial%02d", pass);
       RE_engine_register_pass(
-          engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA);
+          engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA);
     }
   }
   if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_ASSET) != 0) {
     for (short pass = 0; pass < num_passes; pass++) {
       BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoAsset%02d", pass);
       RE_engine_register_pass(
-          engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA);
+          engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA);
     }
   }
 }
diff --git a/source/blender/draw/engines/eevee/eevee_engine.c b/source/blender/draw/engines/eevee/eevee_engine.c
index 227757bad23..5ae4b730cfa 100644
--- a/source/blender/draw/engines/eevee/eevee_engine.c
+++ b/source/blender/draw/engines/eevee/eevee_engine.c
@@ -109,7 +109,7 @@ void EEVEE_cache_populate(void *vedata, Object *ob)
   }
 
   if (DRW_object_is_renderable(ob) && (ob_visibility & OB_VISIBLE_SELF)) {
-    if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) {
+    if (ob->type == OB_MESH) {
       EEVEE_materials_cache_populate(vedata, sldata, ob, &cast_shadow);
     }
     else if (ob->type == OB_CURVES) {
@@ -312,12 +312,12 @@ static void eevee_draw_scene(void *vedata)
     /* Volumetrics Resolve Opaque */
     EEVEE_volumes_resolve(sldata, vedata);
 
-    /* Renderpasses */
+    /* Render-passes. */
     EEVEE_renderpasses_output_accumulate(sldata, vedata, false);
 
     /* Transparent */
-    /* TODO(fclem): should be its own Frame-buffer.
-     * This is needed because dualsource blending only works with 1 color buffer. */
+    /* TODO(@fclem): should be its own Frame-buffer.
+     * This is needed because dual-source blending only works with 1 color buffer. */
     GPU_framebuffer_texture_attach(fbl->main_color_fb, dtxl->depth, 0, 0);
     GPU_framebuffer_bind(fbl->main_color_fb);
     DRW_draw_pass(psl->transparent_pass);
@@ -366,7 +366,7 @@ static void eevee_draw_scene(void *vedata)
 static void eevee_view_update(void *vedata)
 {
   EEVEE_StorageList *stl = ((EEVEE_Data *)vedata)->stl;
-  if (stl->g_data) {
+  if (stl && stl->g_data) {
     stl->g_data->view_updated = true;
   }
 }
@@ -451,8 +451,8 @@ static void eevee_render_to_image(void *vedata,
   }
   EEVEE_PrivateData *g_data = ved->stl->g_data;
 
-  int initial_frame = CFRA;
-  float initial_subframe = SUBFRA;
+  int initial_frame = scene->r.cfra;
+  float initial_subframe = scene->r.subframe;
   float shuttertime = (do_motion_blur) ? scene->eevee.motion_blur_shutter : 0.0f;
   int time_steps_tot = (do_motion_blur) ? max_ii(1, scene->eevee.motion_blur_steps) : 1;
   g_data->render_timesteps = time_steps_tot;
@@ -588,7 +588,7 @@ static void eevee_render_to_image(void *vedata,
   /* Restore original viewport size. */
   DRW_render_viewport_size_set((int[2]){g_data->size_orig[0], g_data->size_orig[1]});
 
-  if (CFRA != initial_frame || SUBFRA != initial_subframe) {
+  if (scene->r.cfra != initial_frame || scene->r.subframe != initial_subframe) {
     /* Restore original frame number. This is because the render pipeline expects it. */
     RE_engine_frame_set(engine, initial_frame, initial_subframe);
   }
diff --git a/source/blender/draw/engines/eevee/eevee_lightcache.c b/source/blender/draw/engines/eevee/eevee_lightcache.c
index 7f722ff1764..614ea0b0892 100644
--- a/source/blender/draw/engines/eevee/eevee_lightcache.c
+++ b/source/blender/draw/engines/eevee/eevee_lightcache.c
@@ -849,7 +849,7 @@ static void eevee_lightbake_delete_resources(EEVEE_LightBake *lbake)
     DRW_opengl_context_enable();
   }
 
-  /* XXX Free the resources contained in the viewlayer data
+  /* XXX: Free the resources contained in the view-layer data
    * to be able to free the context before deleting the depsgraph. */
   if (lbake->sldata) {
     EEVEE_view_layer_data_free(lbake->sldata);
diff --git a/source/blender/draw/engines/eevee/eevee_materials.c b/source/blender/draw/engines/eevee/eevee_materials.c
index efd27c19654..94f29d64628 100644
--- a/source/blender/draw/engines/eevee/eevee_materials.c
+++ b/source/blender/draw/engines/eevee/eevee_materials.c
@@ -806,7 +806,7 @@ void EEVEE_materials_cache_populate(EEVEE_Data *vedata,
                          !DRW_state_is_image_render();
 
   /* First get materials for this mesh. */
-  if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) {
+  if (ELEM(ob->type, OB_MESH, OB_SURF)) {
     const int materials_len = DRW_cache_object_material_count_get(ob);
 
     EeveeMaterialCache *matcache = BLI_array_alloca(matcache, materials_len);
diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h
index ad218d80cdf..573c29b78a1 100644
--- a/source/blender/draw/engines/eevee/eevee_private.h
+++ b/source/blender/draw/engines/eevee/eevee_private.h
@@ -1015,7 +1015,7 @@ typedef struct EEVEE_PrivateData {
   struct GHash *material_hash;
   float background_alpha; /* TODO: find a better place for this. */
   bool disable_ligthprobes;
-  /* Chosen lightcache: can come from Lookdev or the viewlayer. */
+  /** Chosen light-cache: can come from Lookdev or the view-layer. */
   struct LightCache *light_cache;
   /* For planar probes */
   float planar_texel_size[2];
@@ -1050,7 +1050,7 @@ typedef struct EEVEE_PrivateData {
   float studiolight_glossy_clamp;
   float studiolight_filter_quality;
 
-  /* Renderpasses */
+  /* Render-passes */
   /* Bitmask containing the active render_passes */
   eViewLayerEEVEEPassType render_passes;
   uint aov_hash;
@@ -1261,6 +1261,7 @@ struct GPUShader *EEVEE_shaders_volumes_scatter_sh_get(void);
 struct GPUShader *EEVEE_shaders_volumes_scatter_with_lights_sh_get(void);
 struct GPUShader *EEVEE_shaders_volumes_integration_sh_get(void);
 struct GPUShader *EEVEE_shaders_volumes_resolve_sh_get(bool accum);
+struct GPUShader *EEVEE_shaders_volumes_resolve_comp_sh_get(bool float_target);
 struct GPUShader *EEVEE_shaders_volumes_accum_sh_get(void);
 struct GPUShader *EEVEE_shaders_ggx_lut_sh_get(void);
 struct GPUShader *EEVEE_shaders_ggx_refraction_lut_sh_get(void);
diff --git a/source/blender/draw/engines/eevee/eevee_render.c b/source/blender/draw/engines/eevee/eevee_render.c
index bef19c589c2..c3b909f5fb9 100644
--- a/source/blender/draw/engines/eevee/eevee_render.c
+++ b/source/blender/draw/engines/eevee/eevee_render.c
@@ -24,6 +24,7 @@
 #include "DEG_depsgraph_query.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_context.h"
 #include "GPU_framebuffer.h"
 #include "GPU_state.h"
 
@@ -223,7 +224,7 @@ void EEVEE_render_cache(void *vedata,
   }
 
   if (ob_visibility & OB_VISIBLE_SELF) {
-    if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) {
+    if (ob->type == OB_MESH) {
       EEVEE_materials_cache_populate(vedata, sldata, ob, &cast_shadow);
       if (do_cryptomatte) {
         EEVEE_cryptomatte_cache_populate(data, sldata, ob);
@@ -646,6 +647,10 @@ void EEVEE_render_draw(EEVEE_Data *vedata, RenderEngine *engine, RenderLayer *rl
     /* XXX Seems to fix TDR issue with NVidia drivers on linux. */
     GPU_finish();
 
+    /* Perform render step between samples to allow
+     * flushing of freed GPUBackend resources. */
+    GPU_render_step();
+
     RE_engine_update_progress(engine, (float)(render_samples++) / (float)tot_sample);
   }
 }
diff --git a/source/blender/draw/engines/eevee/eevee_sampling.c b/source/blender/draw/engines/eevee/eevee_sampling.c
index a1a3e98f34f..34d3cd74b36 100644
--- a/source/blender/draw/engines/eevee/eevee_sampling.c
+++ b/source/blender/draw/engines/eevee/eevee_sampling.c
@@ -74,7 +74,8 @@ void EEVEE_sample_ellipse(int sample_ofs,
 
   BLI_halton_2d(ht_primes, ht_offset, sample_ofs, ht_point);
 
-  /* Decorelate AA and shadow samples. (see T68594) */
+  /* Decorrelate AA and shadow samples. (see T68594) */
+
   ht_point[0] = fmod(ht_point[0] * 1151.0, 1.0);
   ht_point[1] = fmod(ht_point[1] * 1069.0, 1.0);
 
@@ -97,7 +98,7 @@ void EEVEE_random_rotation_m4(int sample_ofs, float scale, float r_mat[4][4])
 
   BLI_halton_3d(ht_primes, ht_offset, sample_ofs, ht_point);
 
-  /* Decorelate AA and shadow samples. (see T68594) */
+  /* Decorrelate AA and shadow samples. (see T68594) */
   ht_point[0] = fmod(ht_point[0] * 1151.0, 1.0);
   ht_point[1] = fmod(ht_point[1] * 1069.0, 1.0);
   ht_point[2] = fmod(ht_point[2] * 1151.0, 1.0);
diff --git a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c
index 5af794c9158..0d0e551f3dc 100644
--- a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c
+++ b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c
@@ -198,7 +198,7 @@ void EEVEE_reflection_compute(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *v
   if (((effects->enabled_effects & EFFECT_SSR) != 0) && stl->g_data->valid_double_buffer) {
     DRW_stats_group_start("SSR");
 
-    /* Raytrace. */
+    /* Ray-trace. */
     GPU_framebuffer_bind(fbl->screen_tracing_fb);
     DRW_draw_pass(psl->ssr_raytrace);
 
diff --git a/source/blender/draw/engines/eevee/eevee_shaders.c b/source/blender/draw/engines/eevee/eevee_shaders.c
index 5709621fc05..a7290b3894e 100644
--- a/source/blender/draw/engines/eevee/eevee_shaders.c
+++ b/source/blender/draw/engines/eevee/eevee_shaders.c
@@ -133,6 +133,7 @@ static struct {
   struct GPUShader *scatter_with_lights_sh;
   struct GPUShader *volumetric_integration_sh;
   struct GPUShader *volumetric_resolve_sh[2];
+  struct GPUShader *volumetric_resolve_comp_sh[2];
   struct GPUShader *volumetric_accum_sh;
 
   /* Shader strings */
@@ -181,6 +182,7 @@ extern char datatoc_closure_type_lib_glsl[];
 extern char datatoc_closure_eval_volume_lib_glsl[];
 extern char datatoc_common_uniforms_lib_glsl[];
 extern char datatoc_common_utiltex_lib_glsl[];
+extern char datatoc_cryptomatte_lib_glsl[];
 extern char datatoc_cryptomatte_frag_glsl[];
 extern char datatoc_cryptomatte_vert_glsl[];
 extern char datatoc_cubemap_lib_glsl[];
@@ -260,6 +262,7 @@ extern char datatoc_volumetric_frag_glsl[];
 extern char datatoc_volumetric_geom_glsl[];
 extern char datatoc_volumetric_integration_frag_glsl[];
 extern char datatoc_volumetric_lib_glsl[];
+extern char datatoc_volumetric_resolve_comp_glsl[];
 extern char datatoc_volumetric_resolve_frag_glsl[];
 extern char datatoc_volumetric_scatter_frag_glsl[];
 extern char datatoc_volumetric_vert_glsl[];
@@ -304,6 +307,7 @@ static void eevee_shader_library_ensure(void)
     DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_refraction_lib);
     DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_surface_lib);
     DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_volume_lib);
+    DRW_SHADER_LIB_ADD(e_data.lib, cryptomatte_lib);
     DRW_SHADER_LIB_ADD(e_data.lib, surface_vert);
 
     e_data.surface_lit_frag = DRW_shader_library_create_shader_string(e_data.lib,
@@ -901,6 +905,20 @@ struct GPUShader *EEVEE_shaders_volumes_resolve_sh_get(bool accum)
   return e_data.volumetric_resolve_sh[index];
 }
 
+struct GPUShader *EEVEE_shaders_volumes_resolve_comp_sh_get(bool float_target)
+{
+  const int index = (float_target ? 1 : 0);
+  if (e_data.volumetric_resolve_comp_sh[index] == NULL) {
+    e_data.volumetric_resolve_comp_sh[index] = DRW_shader_create_compute_with_shaderlib(
+        datatoc_volumetric_resolve_comp_glsl,
+        e_data.lib,
+        float_target ? "#define TARGET_IMG_FLOAT\n" SHADER_DEFINES : SHADER_DEFINES,
+        __func__);
+  }
+
+  return e_data.volumetric_resolve_comp_sh[index];
+}
+
 struct GPUShader *EEVEE_shaders_volumes_accum_sh_get()
 {
   if (e_data.volumetric_accum_sh == NULL) {
@@ -1190,8 +1208,8 @@ Material *EEVEE_material_default_diffuse_get(void)
   if (!e_data.diffuse_mat) {
     Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default diffuse");
 
-    bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname);
-    ma->nodetree = ntree;
+    bNodeTree *ntree = ntreeAddTreeEmbedded(
+        NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname);
     ma->use_nodes = true;
 
     bNode *bsdf = nodeAddStaticNode(NULL, ntree, SH_NODE_BSDF_DIFFUSE);
@@ -1217,8 +1235,8 @@ Material *EEVEE_material_default_glossy_get(void)
   if (!e_data.glossy_mat) {
     Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default metal");
 
-    bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname);
-    ma->nodetree = ntree;
+    bNodeTree *ntree = ntreeAddTreeEmbedded(
+        NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname);
     ma->use_nodes = true;
 
     bNode *bsdf = nodeAddStaticNode(NULL, ntree, SH_NODE_BSDF_GLOSSY);
@@ -1246,8 +1264,8 @@ Material *EEVEE_material_default_error_get(void)
   if (!e_data.error_mat) {
     Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default error");
 
-    bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname);
-    ma->nodetree = ntree;
+    bNodeTree *ntree = ntreeAddTreeEmbedded(
+        NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname);
     ma->use_nodes = true;
 
     /* Use emission and output material to be compatible with both World and Material. */
diff --git a/source/blender/draw/engines/eevee/eevee_shadows_cascade.c b/source/blender/draw/engines/eevee/eevee_shadows_cascade.c
index 536242f67d8..a3ab4cdb830 100644
--- a/source/blender/draw/engines/eevee/eevee_shadows_cascade.c
+++ b/source/blender/draw/engines/eevee/eevee_shadows_cascade.c
@@ -357,7 +357,7 @@ static void eevee_shadow_cascade_setup(EEVEE_LightsInfo *linfo,
     mul_m4_m4m4(csm_data->shadowmat[c], texcomat, viewprojmat);
 
 #ifdef DEBUG_CSM
-    DRW_debug_m4_as_bbox(viewprojmat, dbg_col, true);
+    DRW_debug_m4_as_bbox(viewprojmat, true, dbg_col);
 #endif
   }
 
diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c
index b8bef61f8b1..b2e5a0abe94 100644
--- a/source/blender/draw/engines/eevee/eevee_volumes.c
+++ b/source/blender/draw/engines/eevee/eevee_volumes.c
@@ -30,6 +30,7 @@
 #include "DEG_depsgraph_query.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 #include "GPU_texture.h"
 #include "eevee_private.h"
@@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
   tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size));
   tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1);
 
+  /* Clamp 3D texture size based on device maximum. */
+  int maxSize = GPU_max_texture_3d_size();
+  BLI_assert(tex_size[0] <= maxSize);
+  tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0];
+  tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1];
+  tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2];
+
   common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]);
   common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]);
   common_data->vol_coord_scale[2] = 1.0f / viewport_size[0];
@@ -306,9 +314,14 @@ void EEVEE_volumes_cache_object_add(EEVEE_ViewLayerData *sldata,
     return;
   }
 
+  GPUShader *sh = GPU_material_get_shader(mat);
+  if (sh == NULL) {
+    return;
+  }
+
   /* TODO(fclem): Reuse main shading group to avoid shading binding cost just like for surface
    * shaders. */
-  DRWShadingGroup *grp = DRW_shgroup_material_create(mat, vedata->psl->volumetric_objects_ps);
+  DRWShadingGroup *grp = DRW_shgroup_create(sh, vedata->psl->volumetric_objects_ps);
 
   grp = DRW_shgroup_volume_create_sub(scene, ob, grp, mat);
 
@@ -316,6 +329,8 @@ void EEVEE_volumes_cache_object_add(EEVEE_ViewLayerData *sldata,
     return;
   }
 
+  DRW_shgroup_add_material_resources(grp, mat);
+
   /* TODO(fclem): remove those "unnecessary" UBOs */
   DRW_shgroup_uniform_block(grp, "planar_block", sldata->planar_ubo);
   DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
@@ -381,18 +396,37 @@ void EEVEE_volumes_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
         grp, NULL, USE_VOLUME_OPTI ? 1 : common_data->vol_tex_size[2]);
 
     DRW_PASS_CREATE(psl->volumetric_resolve_ps, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
-    grp = DRW_shgroup_create(EEVEE_shaders_volumes_resolve_sh_get(false),
-                             psl->volumetric_resolve_ps);
-    DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter);
-    DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit);
-    DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src);
-    DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
-    DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
-    DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
-    DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
-    DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo);
+    if (GPU_compute_shader_support() && GPU_shader_image_load_store_support()) {
+      const bool use_float_target = DRW_state_is_image_render();
+      grp = DRW_shgroup_create(EEVEE_shaders_volumes_resolve_comp_sh_get(use_float_target),
+                               psl->volumetric_resolve_ps);
+      DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter);
+      DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit);
+      DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src);
+      DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
+      DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
+      DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
+      DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
+      DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo);
+      DRW_shgroup_uniform_image_ref(grp, "target_img", &txl->color);
 
-    DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
+      const float *size = DRW_viewport_size_get();
+      DRW_shgroup_call_compute(grp, size[0], size[1], 1);
+    }
+    else {
+      grp = DRW_shgroup_create(EEVEE_shaders_volumes_resolve_sh_get(false),
+                               psl->volumetric_resolve_ps);
+      DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter);
+      DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit);
+      DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src);
+      DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
+      DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
+      DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
+      DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
+      DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo);
+
+      DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
+    }
   }
 }
 
@@ -531,11 +565,16 @@ void EEVEE_volumes_resolve(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *veda
     }
 
     /* Apply for opaque geometry. */
-    GPU_framebuffer_bind(fbl->main_color_fb);
-    DRW_draw_pass(psl->volumetric_resolve_ps);
+    if (GPU_compute_shader_support() && GPU_shader_image_load_store_support()) {
+      DRW_draw_pass(psl->volumetric_resolve_ps);
+    }
+    else {
+      GPU_framebuffer_bind(fbl->main_color_fb);
+      DRW_draw_pass(psl->volumetric_resolve_ps);
 
-    /* Restore. */
-    GPU_framebuffer_bind(fbl->main_fb);
+      /* Restore. */
+      GPU_framebuffer_bind(fbl->main_fb);
+    }
   }
 }
 
diff --git a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl
index 0f5290a7c07..ffca97b6b8f 100644
--- a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl
@@ -181,6 +181,8 @@ Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection reflection)
   /* Glue with the old system. */
   CLOSURE_VARS_DECLARE_2(Diffuse, Glossy);
 
+  /* WORKAROUND: This is to avoid regression in 3.2 and avoid messing with EEVEE-Next. */
+  in_common.occlusion = (diffuse.sss_radius.g == -1.0) ? diffuse.sss_radius.r : 1.0;
   in_Diffuse_0.N = diffuse.N;
   in_Diffuse_0.albedo = diffuse.color;
   in_Glossy_1.N = reflection.N;
@@ -207,6 +209,8 @@ Closure closure_eval(ClosureDiffuse diffuse,
   /* Glue with the old system. */
   CLOSURE_VARS_DECLARE_3(Diffuse, Glossy, Glossy);
 
+  /* WORKAROUND: This is to avoid regression in 3.2 and avoid messing with EEVEE-Next. */
+  in_common.occlusion = (diffuse.sss_radius.g == -1.0) ? diffuse.sss_radius.r : 1.0;
   in_Diffuse_0.N = diffuse.N;
   in_Diffuse_0.albedo = diffuse.color;
   in_Glossy_1.N = reflection.N;
diff --git a/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl
index 4070ede116b..eeccb393a5c 100644
--- a/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl
@@ -6,8 +6,8 @@
 
 #ifndef VOLUMETRICS
 
-uniform int outputSsrId; /*Default = 1;*/
-uniform int outputSssId; /*Default = 1;*/
+uniform int outputSsrId; /* Default = 1; */
+uniform int outputSssId; /* Default = 1; */
 
 #endif
 
diff --git a/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl b/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl
new file mode 100644
index 00000000000..0f8810ff7ac
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl
@@ -0,0 +1,19 @@
+/* NOTE: this lib is included in the cryptomatte vertex shader to work around the issue that eevee
+ * cannot use create infos for its static shaders. Keep in sync with draw_shader_shared.h */
+#ifdef HAIR_SHADER
+/* Define the maximum number of attribute we allow in a curves UBO.
+ * This should be kept in sync with `GPU_ATTR_MAX` */
+#  define DRW_ATTRIBUTE_PER_CURVES_MAX 15
+
+struct CurvesInfos {
+  /* Per attribute scope, follows loading order.
+   * NOTE: uint as bool in GLSL is 4 bytes.
+   * NOTE: GLSL pad arrays of scalar to 16 bytes (std140). */
+  uvec4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
+};
+layout(std140) uniform drw_curves
+{
+  CurvesInfos _drw_curves;
+};
+#  define drw_curves (_drw_curves)
+#endif
diff --git a/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl b/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl
index f8dbc4772e9..14fbc98469a 100644
--- a/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl
@@ -3,4 +3,5 @@
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 #pragma BLENDER_REQUIRE(common_math_lib.glsl)
 #pragma BLENDER_REQUIRE(common_attribute_lib.glsl)
+#pragma BLENDER_REQUIRE(cryptomatte_lib.glsl)
 #pragma BLENDER_REQUIRE(surface_vert.glsl)
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl
index 688ae4915e1..7dec30a96b1 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl
@@ -124,7 +124,7 @@ void dof_slight_focus_gather(float radius, out vec4 out_color, out float out_wei
   dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion);
   dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion);
 
-  /* Fix weighting issues on perfectly focus > slight focus transitionning areas. */
+  /* Fix weighting issues on perfectly focus > slight focus transitioning areas. */
   if (abs(center_data.coc) < 0.5) {
     bg_col = center_data.color;
     bg_weight = 1.0;
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
index 06dcbeaed66..7230758a93f 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
@@ -67,7 +67,7 @@ void main(void)
     /* Occlude the sprite with geometry from the same field
      * using a VSM like chebychev test (slide 85). */
     float mean = occlusion_data.x;
-    float variance = occlusion_data.x;
+    float variance = occlusion_data.y;
     shapes *= variance * safe_rcp(variance + sqr(max(cocs * correction_fac - mean, 0.0)));
   }
 
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
index 9ecc50d9df5..c7f6687d2e2 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
@@ -100,7 +100,7 @@ void main()
           coef = 0.315392 * (3.0 * cubevec.y * cubevec.y - 1.0) * 1.0 / 4.0;
         }
         else if (comp == 7) {
-          coef = 1.092548 * cubevec.x * cubevec.y * 1.0 / 4.0;
+          coef = -1.092548 * cubevec.x * cubevec.y * 1.0 / 4.0;
         }
         else { /* (comp == 8) */
           coef = 0.546274 * (cubevec.x * cubevec.x - cubevec.z * cubevec.z) * 1.0 / 4.0;
diff --git a/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl b/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl
index 15c68dc5829..87e944a2ac0 100644
--- a/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl
@@ -91,3 +91,17 @@ void main()
   }
 #endif
 }
+
+/* Passthrough. */
+float attr_load_temperature_post(float attr)
+{
+  return attr;
+}
+vec4 attr_load_color_post(vec4 attr)
+{
+  return attr;
+}
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+  return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl b/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl
index 2926f8c5a89..062a40f35c2 100644
--- a/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl
@@ -73,7 +73,7 @@ int g_curves_attr_id = 0;
 int curves_attribute_element_id()
 {
   int id = hairStrandID;
-  if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) {
+  if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) {
     id = hair_get_base_id();
   }
 
@@ -152,3 +152,7 @@ vec4 attr_load_color_post(vec4 attr)
 {
   return attr;
 }
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+  return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/surface_frag.glsl b/source/blender/draw/engines/eevee/shaders/surface_frag.glsl
index ace6c7d788d..88755705a53 100644
--- a/source/blender/draw/engines/eevee/shaders/surface_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/surface_frag.glsl
@@ -152,7 +152,8 @@ void main()
 /* Only supported attrib for world/background shaders. */
 vec3 attr_load_orco(vec4 orco)
 {
-  return g_data.P;
+  /* Retain precision better than g_data.P (see T99128). */
+  return -normal_view_to_world(viewCameraVec(viewPosition));
 }
 /* Unsupported. */
 vec4 attr_load_tangent(vec4 tangent)
@@ -181,3 +182,7 @@ vec4 attr_load_color_post(vec4 attr)
 {
   return attr;
 }
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+  return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/surface_lib.glsl
index 8e1bafe8d92..69762027643 100644
--- a/source/blender/draw/engines/eevee/shaders/surface_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/surface_lib.glsl
@@ -97,11 +97,12 @@ GlobalData init_globals(void)
   GlobalData surf;
 
 #  if defined(WORLD_BACKGROUND) || defined(PROBE_CAPTURE)
-  surf.P = -cameraVec(worldPosition);
-  surf.N = surf.Ng = -surf.P;
+  surf.P = transform_direction(ViewMatrixInverse, -viewCameraVec(viewPosition));
+  surf.N = surf.Ng = surf.Ni = -surf.P;
   surf.ray_length = 0.0;
 #  else
   surf.P = worldPosition;
+  surf.Ni = worldNormal;
   surf.N = safe_normalize(worldNormal);
   surf.Ng = safe_normalize(cross(dFdx(surf.P), dFdy(surf.P)));
   surf.ray_length = distance(surf.P, cameraPos);
@@ -109,6 +110,7 @@ GlobalData init_globals(void)
   surf.barycentric_coords = vec2(0.0);
   surf.barycentric_dists = vec3(0.0);
   surf.N = (FrontFacing) ? surf.N : -surf.N;
+  surf.Ni = (FrontFacing) ? surf.Ni : -surf.Ni;
 #  ifdef HAIR_SHADER
   vec3 V = cameraVec(surf.P);
   /* Shade as a cylinder. */
@@ -123,7 +125,7 @@ GlobalData init_globals(void)
     cos_theta = hairThickTime / hairThickness;
   }
   float sin_theta = sqrt(max(0.0, 1.0 - cos_theta * cos_theta));
-  surf.N = safe_normalize(worldNormal * sin_theta + B * cos_theta);
+  surf.N = surf.Ni = safe_normalize(worldNormal * sin_theta + B * cos_theta);
   surf.curve_T = -hairTangent;
   /* Costly, but follows cycles per pixel tangent space (not following curve shape). */
   surf.curve_B = cross(V, surf.curve_T);
diff --git a/source/blender/draw/engines/eevee/shaders/surface_vert.glsl b/source/blender/draw/engines/eevee/shaders/surface_vert.glsl
index a8e95e13b12..54aad7891dc 100644
--- a/source/blender/draw/engines/eevee/shaders/surface_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/surface_vert.glsl
@@ -80,7 +80,7 @@ int g_curves_attr_id = 0;
 int curves_attribute_element_id()
 {
   int id = hairStrandID;
-  if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) {
+  if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) {
     id = hair_get_base_id();
   }
 
@@ -165,3 +165,7 @@ vec4 attr_load_color_post(vec4 attr)
 {
   return attr;
 }
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+  return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
index 88ade8451a4..9ed21fc0bf5 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
@@ -86,6 +86,8 @@ void main()
     discard;
     return;
   }
+#else /* WORLD_SHADER */
+  volumeOrco = worldPosition;
 #endif
 
 #ifdef CLEAR
@@ -176,3 +178,7 @@ vec4 attr_load_color_post(vec4 attr)
 #endif
   return attr;
 }
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+  return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_resolve_comp.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_resolve_comp.glsl
new file mode 100644
index 00000000000..2b0139ff923
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_resolve_comp.glsl
@@ -0,0 +1,38 @@
+
+#pragma BLENDER_REQUIRE(volumetric_lib.glsl)
+
+/* Based on Frosbite Unified Volumetric.
+ * https://www.ea.com/frostbite/news/physically-based-unified-volumetric-rendering-in-frostbite */
+
+/* Step 4 : Apply final integration on top of the scene color. */
+
+uniform sampler2D inSceneDepth;
+
+layout(local_size_x = 1, local_size_y = 1) in;
+
+#ifdef TARGET_IMG_FLOAT
+layout(binding = 0, rgba32f) uniform image2D target_img;
+#else
+layout(binding = 0, rgba16f) uniform image2D target_img;
+#endif
+
+void main()
+{
+  ivec2 co = ivec2(gl_GlobalInvocationID.xy);
+  vec2 uvs = co / vec2(textureSize(inSceneDepth, 0));
+  float scene_depth = texture(inSceneDepth, uvs).r;
+
+  vec3 transmittance, scattering;
+  volumetric_resolve(uvs, scene_depth, transmittance, scattering);
+
+  /* Approximate volume alpha by using a monochromatic transmittance
+   * and adding it to the scene alpha. */
+  float alpha = dot(transmittance, vec3(1.0 / 3.0));
+
+  vec4 color0 = vec4(scattering, 1.0 - alpha);
+  vec4 color1 = vec4(transmittance, alpha);
+
+  vec4 color_in = imageLoad(target_img, co);
+  vec4 color_out = color0 + color1 * color_in;
+  imageStore(target_img, co, color_out);
+}
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl
index b3b9c7af19c..2d51fbd9edc 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl
@@ -87,3 +87,8 @@ vec4 attr_load_color_post(vec4 attr)
 {
   return attr;
 }
+
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+  return attr;
+}
diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.cc b/source/blender/draw/engines/eevee_next/eevee_camera.cc
index e6d2e2db764..b9040f0f3ab 100644
--- a/source/blender/draw/engines/eevee_next/eevee_camera.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_camera.cc
@@ -29,10 +29,8 @@ namespace blender::eevee {
 void Camera::init()
 {
   const Object *camera_eval = inst_.camera_eval_object;
-  synced_ = false;
-  data_.swap();
 
-  CameraData &data = data_.current();
+  CameraData &data = data_;
 
   if (camera_eval) {
     const ::Camera *cam = reinterpret_cast<const ::Camera *>(camera_eval->data);
@@ -77,9 +75,8 @@ void Camera::init()
 void Camera::sync()
 {
   const Object *camera_eval = inst_.camera_eval_object;
-  CameraData &data = data_.current();
 
-  data.filter_size = inst_.scene->r.gauss;
+  CameraData &data = data_;
 
   if (inst_.drw_view) {
     DRW_view_viewmat_get(inst_.drw_view, data.viewmat.ptr(), false);
@@ -127,6 +124,10 @@ void Camera::sync()
     data.equirect_scale *= data.uv_scale;
 
     data.equirect_scale_inv = 1.0f / data.equirect_scale;
+#else
+    data.fisheye_fov = data.fisheye_lens = -1.0f;
+    data.equirect_bias = float2(0.0f);
+    data.equirect_scale = float2(0.0f);
 #endif
   }
   else if (inst_.drw_view) {
@@ -137,14 +138,8 @@ void Camera::sync()
     data.equirect_scale = float2(0.0f);
   }
 
-  data_.current().push_update();
-
-  synced_ = true;
-
-  /* Detect changes in parameters. */
-  if (data_.current() != data_.previous()) {
-    // inst_.sampling.reset();
-  }
+  data_.initialized = true;
+  data_.push_update();
 }
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.hh b/source/blender/draw/engines/eevee_next/eevee_camera.hh
index dfec738b1f3..49f9b14e11b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_camera.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_camera.hh
@@ -61,8 +61,7 @@ inline bool operator==(const CameraData &a, const CameraData &b)
   return compare_m4m4(a.persmat.ptr(), b.persmat.ptr(), FLT_MIN) && (a.uv_scale == b.uv_scale) &&
          (a.uv_bias == b.uv_bias) && (a.equirect_scale == b.equirect_scale) &&
          (a.equirect_bias == b.equirect_bias) && (a.fisheye_fov == b.fisheye_fov) &&
-         (a.fisheye_lens == b.fisheye_lens) && (a.filter_size == b.filter_size) &&
-         (a.type == b.type);
+         (a.fisheye_lens == b.fisheye_lens) && (a.type == b.type);
 }
 
 inline bool operator!=(const CameraData &a, const CameraData &b)
@@ -83,10 +82,7 @@ class Camera {
  private:
   Instance &inst_;
 
-  /** Double buffered to detect changes and have history for re-projection. */
-  SwapChain<CameraDataBuf, 2> data_;
-  /** Detects wrong usage. */
-  bool synced_ = false;
+  CameraDataBuf data_;
 
  public:
   Camera(Instance &inst) : inst_(inst){};
@@ -100,28 +96,32 @@ class Camera {
    **/
   const CameraData &data_get() const
   {
-    BLI_assert(synced_);
-    return data_.current();
+    BLI_assert(data_.initialized);
+    return data_;
   }
   const GPUUniformBuf *ubo_get() const
   {
-    return data_.current();
+    return data_;
   }
   bool is_panoramic() const
   {
-    return eevee::is_panoramic(data_.current().type);
+    return eevee::is_panoramic(data_.type);
   }
   bool is_orthographic() const
   {
-    return data_.current().type == CAMERA_ORTHO;
+    return data_.type == CAMERA_ORTHO;
+  }
+  bool is_perspective() const
+  {
+    return data_.type == CAMERA_PERSP;
   }
   const float3 &position() const
   {
-    return *reinterpret_cast<const float3 *>(data_.current().viewinv[3]);
+    return *reinterpret_cast<const float3 *>(data_.viewinv[3]);
   }
   const float3 &forward() const
   {
-    return *reinterpret_cast<const float3 *>(data_.current().viewinv[2]);
+    return *reinterpret_cast<const float3 *>(data_.viewinv[2]);
   }
 };
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_cryptomatte.cc b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.cc
new file mode 100644
index 00000000000..10be121f533
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.cc
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+#include "BKE_cryptomatte.hh"
+
+#include "GPU_material.h"
+
+#include "eevee_cryptomatte.hh"
+#include "eevee_instance.hh"
+#include "eevee_renderbuffers.hh"
+
+namespace blender::eevee {
+
+void Cryptomatte::begin_sync()
+{
+  const eViewLayerEEVEEPassType enabled_passes = static_cast<eViewLayerEEVEEPassType>(
+      inst_.film.enabled_passes_get() &
+      (EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET |
+       EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET));
+
+  session_.reset();
+  object_layer_ = nullptr;
+  asset_layer_ = nullptr;
+  material_layer_ = nullptr;
+
+  if (enabled_passes && !inst_.is_viewport()) {
+    session_.reset(BKE_cryptomatte_init_from_view_layer(inst_.view_layer));
+
+    for (const std::string &layer_name :
+         bke::cryptomatte::BKE_cryptomatte_layer_names_get(*session_)) {
+      StringRef layer_name_ref = layer_name;
+      bke::cryptomatte::CryptomatteLayer *layer = bke::cryptomatte::BKE_cryptomatte_layer_get(
+          *session_, layer_name);
+      if (layer_name_ref.endswith(RE_PASSNAME_CRYPTOMATTE_OBJECT)) {
+        object_layer_ = layer;
+      }
+      else if (layer_name_ref.endswith(RE_PASSNAME_CRYPTOMATTE_ASSET)) {
+        asset_layer_ = layer;
+      }
+      else if (layer_name_ref.endswith(RE_PASSNAME_CRYPTOMATTE_MATERIAL)) {
+        material_layer_ = layer;
+      }
+    }
+  }
+
+  if (!(enabled_passes &
+        (EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET))) {
+    cryptomatte_object_buf.resize(16);
+  }
+}
+
+void Cryptomatte::sync_object(Object *ob, ResourceHandle res_handle)
+{
+  const eViewLayerEEVEEPassType enabled_passes = inst_.film.enabled_passes_get();
+  if (!(enabled_passes &
+        (EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET))) {
+    return;
+  }
+
+  uint32_t resource_id = res_handle.resource_index();
+  float2 object_hashes(0.0f, 0.0f);
+
+  if (enabled_passes & EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT) {
+    object_hashes[0] = register_id(EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT, ob->id);
+  }
+
+  if (enabled_passes & EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET) {
+    Object *asset = ob;
+    while (asset->parent) {
+      asset = asset->parent;
+    }
+    object_hashes[1] = register_id(EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET, asset->id);
+  }
+  cryptomatte_object_buf.get_or_resize(resource_id) = object_hashes;
+}
+
+void Cryptomatte::sync_material(const ::Material *material)
+{
+  /* Material crypto hashes are generated during shader codegen stage. We only need to register
+   * them to store inside the metadata. */
+  if (material_layer_ && material) {
+    material_layer_->add_ID(material->id);
+  }
+}
+
+void Cryptomatte::end_sync()
+{
+  cryptomatte_object_buf.push_update();
+
+  object_layer_ = nullptr;
+  asset_layer_ = nullptr;
+  material_layer_ = nullptr;
+}
+
+float Cryptomatte::register_id(const eViewLayerEEVEEPassType layer, const ID &id) const
+{
+  BLI_assert(ELEM(layer,
+                  EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT,
+                  EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET,
+                  EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL));
+
+  uint32_t cryptomatte_hash = 0;
+  if (session_) {
+    if (layer == EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT) {
+      BLI_assert(object_layer_);
+      cryptomatte_hash = object_layer_->add_ID(id);
+    }
+    else if (layer == EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET) {
+      BLI_assert(asset_layer_);
+      cryptomatte_hash = asset_layer_->add_ID(id);
+    }
+    else if (layer == EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL) {
+      BLI_assert(material_layer_);
+      cryptomatte_hash = material_layer_->add_ID(id);
+    }
+  }
+  else {
+    const char *name = &id.name[2];
+    const int name_len = BLI_strnlen(name, MAX_NAME - 2);
+    cryptomatte_hash = BKE_cryptomatte_hash(name, name_len);
+  }
+
+  return BKE_cryptomatte_hash_to_float(cryptomatte_hash);
+}
+
+void Cryptomatte::store_metadata(RenderResult *render_result)
+{
+  if (session_) {
+    BKE_cryptomatte_store_metadata(&*session_, render_result, inst_.view_layer);
+  }
+}
+
+}  // namespace blender::eevee
+\ No newline at end of file
diff --git a/source/blender/draw/engines/eevee_next/eevee_cryptomatte.hh b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.hh
new file mode 100644
index 00000000000..86ab3d97b4b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.hh
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Cryptomatte.
+ *
+ * During rasterization, cryptomatte hashes are stored into a single array texture.
+ * The film pass then resamples this texture using pixel filter weighting.
+ * Each cryptomatte layer can hold N samples. These are stored in sequential layers
+ * of the array texture. The samples are sorted and merged only for final rendering.
+ */
+
+#pragma once
+
+#include "eevee_shader_shared.hh"
+
+#include "BKE_cryptomatte.hh"
+
+extern "C" {
+struct Material;
+struct CryptomatteSession;
+}
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Cryptomatte
+ * \{ */
+
+class Cryptomatte {
+ private:
+  class Instance &inst_;
+
+  bke::cryptomatte::CryptomatteSessionPtr session_;
+
+  /* Cached pointer to the cryptomatte layer instances. */
+  bke::cryptomatte::CryptomatteLayer *object_layer_ = nullptr;
+  bke::cryptomatte::CryptomatteLayer *asset_layer_ = nullptr;
+  bke::cryptomatte::CryptomatteLayer *material_layer_ = nullptr;
+
+  /** Contains per object hashes (object and asset hash). Indexed by resource ID. */
+  CryptomatteObjectBuf cryptomatte_object_buf;
+
+ public:
+  Cryptomatte(Instance &inst) : inst_(inst){};
+
+  void begin_sync();
+  void sync_object(Object *ob, ResourceHandle res_handle);
+  void sync_material(const ::Material *material);
+  void end_sync();
+
+  template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+  {
+    pass->bind_ssbo(CRYPTOMATTE_BUF_SLOT, &cryptomatte_object_buf);
+  }
+
+  /* Register ID to use inside cryptomatte layer and returns associated hash as float. */
+  float register_id(const eViewLayerEEVEEPassType layer, const ID &id) const;
+  void store_metadata(RenderResult *render_result);
+};
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh
index f75ebd2bd13..248dfae6df9 100644
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -11,12 +11,18 @@
 
 #pragma once
 
-/**
- * Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536.
- * Current limiting factor is the sorting phase which is single pass and only sort within a
- * thread-group which maximum size is 1024.
- */
-#define CULLING_BATCH_SIZE 1024
+/* Hierarchical Z down-sampling. */
+#define HIZ_MIP_COUNT 8
+/* NOTE: The shader is written to update 5 mipmaps using LDS. */
+#define HIZ_GROUP_SIZE 32
+
+/* Avoid too much overhead caused by resizing the light buffers too many time. */
+#define LIGHT_CHUNK 256
+
+#define CULLING_SELECT_GROUP_SIZE 256
+#define CULLING_SORT_GROUP_SIZE 256
+#define CULLING_ZBIN_GROUP_SIZE 1024
+#define CULLING_TILE_GROUP_SIZE 1024
 
 /**
  * IMPORTANT: Some data packing are tweaked for these values.
@@ -34,12 +40,68 @@
 #define SHADOW_MAX_PAGE 4096
 #define SHADOW_PAGE_PER_ROW 64
 
-#define HIZ_MIP_COUNT 6u
-/* Group size is 2x smaller because we simply copy the level 0. */
-#define HIZ_GROUP_SIZE 1u << (HIZ_MIP_COUNT - 2u)
-
+/* Ray-tracing. */
 #define RAYTRACE_GROUP_SIZE 16
 #define RAYTRACE_MAX_TILES (16384 / RAYTRACE_GROUP_SIZE) * (16384 / RAYTRACE_GROUP_SIZE)
 
 /* Minimum visibility size. */
 #define LIGHTPROBE_FILTER_VIS_GROUP_SIZE 16
+
+/* Film. */
+#define FILM_GROUP_SIZE 16
+
+/* Motion Blur. */
+#define MOTION_BLUR_GROUP_SIZE 32
+#define MOTION_BLUR_DILATE_GROUP_SIZE 512
+
+/* Depth Of Field. */
+#define DOF_TILES_SIZE 8
+#define DOF_TILES_FLATTEN_GROUP_SIZE DOF_TILES_SIZE
+#define DOF_TILES_DILATE_GROUP_SIZE 8
+#define DOF_BOKEH_LUT_SIZE 32
+#define DOF_MAX_SLIGHT_FOCUS_RADIUS 5
+#define DOF_SLIGHT_FOCUS_SAMPLE_MAX 16
+#define DOF_MIP_COUNT 4
+#define DOF_REDUCE_GROUP_SIZE (1 << (DOF_MIP_COUNT - 1))
+#define DOF_DEFAULT_GROUP_SIZE 32
+#define DOF_STABILIZE_GROUP_SIZE 16
+#define DOF_FILTER_GROUP_SIZE 8
+#define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE
+#define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)
+
+/* Resource bindings. */
+
+/* Texture. */
+#define RBUFS_UTILITY_TEX_SLOT 14
+
+/* Images. */
+#define RBUFS_NORMAL_SLOT 0
+#define RBUFS_LIGHT_SLOT 1
+#define RBUFS_DIFF_COLOR_SLOT 2
+#define RBUFS_SPEC_COLOR_SLOT 3
+#define RBUFS_EMISSION_SLOT 4
+#define RBUFS_AOV_COLOR_SLOT 5
+#define RBUFS_AOV_VALUE_SLOT 6
+#define RBUFS_CRYPTOMATTE_SLOT 7
+
+/* Uniform Buffers. */
+/* Only during prepass. */
+#define VELOCITY_CAMERA_PREV_BUF 3
+#define VELOCITY_CAMERA_CURR_BUF 4
+#define VELOCITY_CAMERA_NEXT_BUF 5
+
+/* Storage Buffers. */
+#define LIGHT_CULL_BUF_SLOT 0
+#define LIGHT_BUF_SLOT 1
+#define LIGHT_ZBIN_BUF_SLOT 2
+#define LIGHT_TILE_BUF_SLOT 3
+#define RBUFS_AOV_BUF_SLOT 5
+#define SAMPLING_BUF_SLOT 6
+#define CRYPTOMATTE_BUF_SLOT 7
+
+/* Only during pre-pass. */
+#define VELOCITY_OBJ_PREV_BUF_SLOT 0
+#define VELOCITY_OBJ_NEXT_BUF_SLOT 1
+#define VELOCITY_GEO_PREV_BUF_SLOT 2
+#define VELOCITY_GEO_NEXT_BUF_SLOT 3
+#define VELOCITY_INDIRECTION_BUF_SLOT 4
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
new file mode 100644
index 00000000000..bc0891ceb92
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
@@ -0,0 +1,761 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Depth of field post process effect.
+ *
+ * There are 2 methods to achieve this effect.
+ * - The first uses projection matrix offsetting and sample accumulation to give
+ * reference quality depth of field. But this needs many samples to hide the
+ * under-sampling.
+ * - The second one is a post-processing based one. It follows the
+ * implementation described in the presentation
+ * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie.
+ * There are some difference with our actual implementation that prioritize quality.
+ */
+
+#include "DRW_render.h"
+
+#include "BKE_camera.h"
+#include "DNA_camera_types.h"
+
+#include "GPU_platform.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+
+#include "eevee_camera.hh"
+#include "eevee_instance.hh"
+#include "eevee_sampling.hh"
+#include "eevee_shader.hh"
+#include "eevee_shader_shared.hh"
+
+#include "eevee_depth_of_field.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+void DepthOfField::init()
+{
+  const SceneEEVEE &sce_eevee = inst_.scene->eevee;
+  const Object *camera_object_eval = inst_.camera_eval_object;
+  const ::Camera *camera = (camera_object_eval) ?
+                               reinterpret_cast<const ::Camera *>(camera_object_eval->data) :
+                               nullptr;
+  if (camera == nullptr) {
+    /* Set to invalid value for update detection */
+    data_.scatter_color_threshold = -1.0f;
+    return;
+  }
+  /* Reminder: These are parameters not interpolated by motion blur. */
+  int update = 0;
+  int sce_flag = sce_eevee.flag;
+  update += assign_if_different(do_jitter_, (sce_flag & SCE_EEVEE_DOF_JITTER) != 0);
+  update += assign_if_different(user_overblur_, sce_eevee.bokeh_overblur / 100.0f);
+  update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size);
+  update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold);
+  update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max);
+  update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades));
+  if (update > 0) {
+    inst_.sampling.reset();
+  }
+}
+
+void DepthOfField::sync()
+{
+  const Camera &camera = inst_.camera;
+  const Object *camera_object_eval = inst_.camera_eval_object;
+  const ::Camera *camera_data = (camera_object_eval) ?
+                                    reinterpret_cast<const ::Camera *>(camera_object_eval->data) :
+                                    nullptr;
+
+  int update = 0;
+
+  if (camera_data == nullptr || (camera_data->dof.flag & CAM_DOF_ENABLED) == 0) {
+    update += assign_if_different(jitter_radius_, 0.0f);
+    update += assign_if_different(fx_radius_, 0.0f);
+    if (update > 0) {
+      inst_.sampling.reset();
+    }
+    return;
+  }
+
+  float2 anisotropic_scale = {clamp_f(1.0f / camera_data->dof.aperture_ratio, 1e-5f, 1.0f),
+                              clamp_f(camera_data->dof.aperture_ratio, 1e-5f, 1.0f)};
+  update += assign_if_different(data_.bokeh_anisotropic_scale, anisotropic_scale);
+  update += assign_if_different(data_.bokeh_rotation, camera_data->dof.aperture_rotation);
+  update += assign_if_different(focus_distance_,
+                                BKE_camera_object_dof_distance(camera_object_eval));
+  data_.bokeh_anisotropic_scale_inv = 1.0f / data_.bokeh_anisotropic_scale;
+
+  float fstop = max_ff(camera_data->dof.aperture_fstop, 1e-5f);
+
+  if (update) {
+    inst_.sampling.reset();
+  }
+
+  float aperture = 1.0f / (2.0f * fstop);
+  if (camera.is_perspective()) {
+    aperture *= camera_data->lens * 1e-3f;
+  }
+
+  if (camera.is_orthographic()) {
+    /* FIXME: Why is this needed? Some kind of implicit unit conversion? */
+    aperture *= 0.04f;
+    /* Really strange behavior from Cycles but replicating. */
+    focus_distance_ += camera.data_get().clip_near;
+  }
+
+  if (camera.is_panoramic()) {
+    /* FIXME: Eyeballed. */
+    aperture *= 0.185f;
+  }
+
+  if (camera_data->dof.aperture_ratio < 1.0) {
+    /* If ratio is scaling the bokeh outwards, we scale the aperture so that
+     * the gather kernel size will encompass the maximum axis. */
+    aperture /= max_ff(camera_data->dof.aperture_ratio, 1e-5f);
+  }
+
+  float jitter_radius, fx_radius;
+
+  /* Balance blur radius between fx dof and jitter dof. */
+  if (do_jitter_ && (inst_.sampling.dof_ring_count_get() > 0) && !camera.is_panoramic() &&
+      !inst_.is_viewport()) {
+    /* Compute a minimal overblur radius to fill the gaps between the samples.
+     * This is just the simplified form of dividing the area of the bokeh by
+     * the number of samples. */
+    float minimal_overblur = 1.0f / sqrtf(inst_.sampling.dof_sample_count_get());
+
+    fx_radius = (minimal_overblur + user_overblur_) * aperture;
+    /* Avoid dilating the shape. Over-blur only soften. */
+    jitter_radius = max_ff(0.0f, aperture - fx_radius);
+  }
+  else {
+    jitter_radius = 0.0f;
+    fx_radius = aperture;
+  }
+
+  /* Disable post fx if result wouldn't be noticeable. */
+  if (fx_max_coc_ <= 0.5f) {
+    fx_radius = 0.0f;
+  }
+
+  update += assign_if_different(jitter_radius_, jitter_radius);
+  update += assign_if_different(fx_radius_, fx_radius);
+  if (update > 0) {
+    inst_.sampling.reset();
+  }
+
+  if (fx_radius_ == 0.0f) {
+    return;
+  }
+
+  /* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */
+  int2 max_render_res = inst_.film.render_extent_get();
+  int2 half_res = math::divide_ceil(max_render_res, int2(2));
+  int2 reduce_size = math::ceil_to_multiple(half_res, int2(DOF_REDUCE_GROUP_SIZE));
+
+  data_.gather_uv_fac = 1.0f / float2(reduce_size);
+
+  /* Now that we know the maximum render resolution of every view, using depth of field, allocate
+   * the reduced buffers. Color needs to be signed format here. See note in shader for
+   * explanation. Do not use texture pool because of needs mipmaps. */
+  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_COUNT);
+  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_COUNT);
+  reduced_color_tx_.ensure_mip_views();
+  reduced_coc_tx_.ensure_mip_views();
+
+  /* Resize the scatter list to contain enough entry to cover half the screen with sprites (which
+   * is unlikely due to local contrast test). */
+  data_.scatter_max_rect = (reduced_color_tx_.pixel_count() / 4) / 2;
+  scatter_fg_list_buf_.resize(data_.scatter_max_rect);
+  scatter_bg_list_buf_.resize(data_.scatter_max_rect);
+
+  bokeh_lut_pass_sync();
+  setup_pass_sync();
+  stabilize_pass_sync();
+  downsample_pass_sync();
+  reduce_pass_sync();
+  tiles_flatten_pass_sync();
+  tiles_dilate_pass_sync();
+  gather_pass_sync();
+  filter_pass_sync();
+  scatter_pass_sync();
+  hole_fill_pass_sync();
+  resolve_pass_sync();
+}
+
+void DepthOfField::jitter_apply(float4x4 &winmat, float4x4 &viewmat)
+{
+  if (jitter_radius_ == 0.0f) {
+    return;
+  }
+
+  float radius, theta;
+  inst_.sampling.dof_disk_sample_get(&radius, &theta);
+
+  if (data_.bokeh_blades >= 3.0f) {
+    theta = circle_to_polygon_angle(data_.bokeh_blades, theta);
+    radius *= circle_to_polygon_radius(data_.bokeh_blades, theta);
+  }
+  radius *= jitter_radius_;
+  theta += data_.bokeh_rotation;
+
+  /* Sample in View Space. */
+  float2 sample = float2(radius * cosf(theta), radius * sinf(theta));
+  sample *= data_.bokeh_anisotropic_scale;
+  /* Convert to NDC Space. */
+  float3 jitter = float3(UNPACK2(sample), -focus_distance_);
+  float3 center = float3(0.0f, 0.0f, -focus_distance_);
+  mul_project_m4_v3(winmat.ptr(), jitter);
+  mul_project_m4_v3(winmat.ptr(), center);
+
+  const bool is_ortho = (winmat[2][3] != -1.0f);
+  if (is_ortho) {
+    sample *= focus_distance_;
+  }
+  /* Translate origin. */
+  sub_v2_v2(viewmat[3], sample);
+  /* Skew winmat Z axis. */
+  add_v2_v2(winmat[2], center - jitter);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Passes setup.
+ * \{ */
+
+void DepthOfField::bokeh_lut_pass_sync()
+{
+  const bool has_anisotropy = data_.bokeh_anisotropic_scale != float2(1.0f);
+  if (!has_anisotropy && (data_.bokeh_blades == 0.0)) {
+    /* No need for LUTs in these cases. */
+    use_bokeh_lut_ = false;
+    return;
+  }
+  use_bokeh_lut_ = true;
+
+  /* Precompute bokeh texture. */
+  bokeh_lut_ps_.init();
+  bokeh_lut_ps_.shader_set(inst_.shaders.static_shader_get(DOF_BOKEH_LUT));
+  bokeh_lut_ps_.bind_ubo("dof_buf", data_);
+  bokeh_lut_ps_.bind_image("out_gather_lut_img", &bokeh_gather_lut_tx_);
+  bokeh_lut_ps_.bind_image("out_scatter_lut_img", &bokeh_scatter_lut_tx_);
+  bokeh_lut_ps_.bind_image("out_resolve_lut_img", &bokeh_resolve_lut_tx_);
+  bokeh_lut_ps_.dispatch(int3(1, 1, 1));
+}
+
+void DepthOfField::setup_pass_sync()
+{
+  RenderBuffers &render_buffers = inst_.render_buffers;
+
+  setup_ps_.init();
+  setup_ps_.shader_set(inst_.shaders.static_shader_get(DOF_SETUP));
+  setup_ps_.bind_texture("color_tx", &input_color_tx_, no_filter);
+  setup_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+  setup_ps_.bind_ubo("dof_buf", data_);
+  setup_ps_.bind_image("out_color_img", &setup_color_tx_);
+  setup_ps_.bind_image("out_coc_img", &setup_coc_tx_);
+  setup_ps_.dispatch(&dispatch_setup_size_);
+  setup_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+void DepthOfField::stabilize_pass_sync()
+{
+  RenderBuffers &render_buffers = inst_.render_buffers;
+  VelocityModule &velocity = inst_.velocity;
+
+  stabilize_ps_.init();
+  stabilize_ps_.shader_set(inst_.shaders.static_shader_get(DOF_STABILIZE));
+  stabilize_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
+  stabilize_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
+  /* This is only for temporal stability. The next step is not needed. */
+  stabilize_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[STEP_PREVIOUS]));
+  stabilize_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter);
+  stabilize_ps_.bind_texture("color_tx", &setup_color_tx_, no_filter);
+  stabilize_ps_.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter);
+  stabilize_ps_.bind_texture("in_history_tx", &stabilize_input_, with_filter);
+  stabilize_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+  stabilize_ps_.bind_ubo("dof_buf", data_);
+  stabilize_ps_.push_constant("use_history", &stabilize_valid_history_, 1);
+  stabilize_ps_.bind_image("out_coc_img", reduced_coc_tx_.mip_view(0));
+  stabilize_ps_.bind_image("out_color_img", reduced_color_tx_.mip_view(0));
+  stabilize_ps_.bind_image("out_history_img", &stabilize_output_tx_);
+  stabilize_ps_.dispatch(&dispatch_stabilize_size_);
+  stabilize_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
+}
+
+void DepthOfField::downsample_pass_sync()
+{
+  downsample_ps_.init();
+  downsample_ps_.shader_set(inst_.shaders.static_shader_get(DOF_DOWNSAMPLE));
+  downsample_ps_.bind_texture("color_tx", reduced_color_tx_.mip_view(0), no_filter);
+  downsample_ps_.bind_texture("coc_tx", reduced_coc_tx_.mip_view(0), no_filter);
+  downsample_ps_.bind_image("out_color_img", &downsample_tx_);
+  downsample_ps_.dispatch(&dispatch_downsample_size_);
+  downsample_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+void DepthOfField::reduce_pass_sync()
+{
+  reduce_ps_.init();
+  reduce_ps_.shader_set(inst_.shaders.static_shader_get(DOF_REDUCE));
+  reduce_ps_.bind_ubo("dof_buf", data_);
+  reduce_ps_.bind_texture("downsample_tx", &downsample_tx_, no_filter);
+  reduce_ps_.bind_ssbo("scatter_fg_list_buf", scatter_fg_list_buf_);
+  reduce_ps_.bind_ssbo("scatter_bg_list_buf", scatter_bg_list_buf_);
+  reduce_ps_.bind_ssbo("scatter_fg_indirect_buf", scatter_fg_indirect_buf_);
+  reduce_ps_.bind_ssbo("scatter_bg_indirect_buf", scatter_bg_indirect_buf_);
+  reduce_ps_.bind_image("inout_color_lod0_img", reduced_color_tx_.mip_view(0));
+  reduce_ps_.bind_image("out_color_lod1_img", reduced_color_tx_.mip_view(1));
+  reduce_ps_.bind_image("out_color_lod2_img", reduced_color_tx_.mip_view(2));
+  reduce_ps_.bind_image("out_color_lod3_img", reduced_color_tx_.mip_view(3));
+  reduce_ps_.bind_image("in_coc_lod0_img", reduced_coc_tx_.mip_view(0));
+  reduce_ps_.bind_image("out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
+  reduce_ps_.bind_image("out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
+  reduce_ps_.bind_image("out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
+  reduce_ps_.dispatch(&dispatch_reduce_size_);
+  /* NOTE: Command buffer barrier is done automatically by the GPU backend. */
+  reduce_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
+}
+
+void DepthOfField::tiles_flatten_pass_sync()
+{
+  tiles_flatten_ps_.init();
+  tiles_flatten_ps_.shader_set(inst_.shaders.static_shader_get(DOF_TILES_FLATTEN));
+  /* NOTE(fclem): We should use the reduced_coc_tx_ as it is stable, but we need the slight focus
+   * flag from the setup pass. A better way would be to do the brute-force in focus gather without
+   * this. */
+  tiles_flatten_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter);
+  tiles_flatten_ps_.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current());
+  tiles_flatten_ps_.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current());
+  tiles_flatten_ps_.dispatch(&dispatch_tiles_flatten_size_);
+  tiles_flatten_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+}
+
+void DepthOfField::tiles_dilate_pass_sync()
+{
+  for (int pass = 0; pass < 2; pass++) {
+    PassSimple &drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_;
+    eShaderType sh_type = (pass == 0) ? DOF_TILES_DILATE_MINMAX : DOF_TILES_DILATE_MINABS;
+    drw_pass.init();
+    drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type));
+    drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.previous());
+    drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.previous());
+    drw_pass.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current());
+    drw_pass.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current());
+    drw_pass.push_constant("ring_count", &tiles_dilate_ring_count_, 1);
+    drw_pass.push_constant("ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1);
+    drw_pass.dispatch(&dispatch_tiles_dilate_size_);
+    drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+  }
+}
+
+void DepthOfField::gather_pass_sync()
+{
+  for (int pass = 0; pass < 2; pass++) {
+    PassSimple &drw_pass = (pass == 0) ? gather_fg_ps_ : gather_bg_ps_;
+    SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
+    SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
+    eShaderType sh_type = (pass == 0) ?
+                              (use_bokeh_lut_ ? DOF_GATHER_FOREGROUND_LUT :
+                                                DOF_GATHER_FOREGROUND) :
+                              (use_bokeh_lut_ ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND);
+    drw_pass.init();
+    inst_.sampling.bind_resources(&drw_pass);
+    drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type));
+    drw_pass.bind_ubo("dof_buf", data_);
+    drw_pass.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear);
+    drw_pass.bind_texture("color_tx", reduced_color_tx_, gather_nearest);
+    drw_pass.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest);
+    drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
+    drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
+    drw_pass.bind_image("out_color_img", &color_chain.current());
+    drw_pass.bind_image("out_weight_img", &weight_chain.current());
+    drw_pass.bind_image("out_occlusion_img", &occlusion_tx_);
+    drw_pass.bind_texture("bokeh_lut_tx", &bokeh_gather_lut_tx_);
+    drw_pass.dispatch(&dispatch_gather_size_);
+    drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
+  }
+}
+
+void DepthOfField::filter_pass_sync()
+{
+  for (int pass = 0; pass < 2; pass++) {
+    PassSimple &drw_pass = (pass == 0) ? filter_fg_ps_ : filter_bg_ps_;
+    SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
+    SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
+    drw_pass.init();
+    drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_FILTER));
+    drw_pass.bind_texture("color_tx", &color_chain.previous());
+    drw_pass.bind_texture("weight_tx", &weight_chain.previous());
+    drw_pass.bind_image("out_color_img", &color_chain.current());
+    drw_pass.bind_image("out_weight_img", &weight_chain.current());
+    drw_pass.dispatch(&dispatch_filter_size_);
+    drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
+  }
+}
+
+void DepthOfField::scatter_pass_sync()
+{
+  for (int pass = 0; pass < 2; pass++) {
+    PassSimple &drw_pass = (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_;
+    drw_pass.init();
+    drw_pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL);
+    drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_SCATTER));
+    drw_pass.push_constant("use_bokeh_lut", use_bokeh_lut_);
+    drw_pass.bind_texture("bokeh_lut_tx", &bokeh_scatter_lut_tx_);
+    drw_pass.bind_texture("occlusion_tx", &occlusion_tx_);
+    if (pass == 0) {
+      drw_pass.bind_ssbo("scatter_list_buf", scatter_fg_list_buf_);
+      drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_fg_indirect_buf_);
+      /* Avoid background gather pass writing to the occlusion_tx mid pass. */
+      drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+    }
+    else {
+      drw_pass.bind_ssbo("scatter_list_buf", scatter_bg_list_buf_);
+      drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_bg_indirect_buf_);
+    }
+  }
+}
+
+void DepthOfField::hole_fill_pass_sync()
+{
+  hole_fill_ps_.init();
+  inst_.sampling.bind_resources(&hole_fill_ps_);
+  hole_fill_ps_.shader_set(inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL));
+  hole_fill_ps_.bind_ubo("dof_buf", data_);
+  hole_fill_ps_.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear);
+  hole_fill_ps_.bind_texture("color_tx", reduced_color_tx_, gather_nearest);
+  hole_fill_ps_.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest);
+  hole_fill_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
+  hole_fill_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
+  hole_fill_ps_.bind_image("out_color_img", &hole_fill_color_tx_);
+  hole_fill_ps_.bind_image("out_weight_img", &hole_fill_weight_tx_);
+  hole_fill_ps_.dispatch(&dispatch_gather_size_);
+  hole_fill_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+void DepthOfField::resolve_pass_sync()
+{
+  eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
+  RenderBuffers &render_buffers = inst_.render_buffers;
+  eShaderType sh_type = use_bokeh_lut_ ? DOF_RESOLVE_LUT : DOF_RESOLVE;
+
+  resolve_ps_.init();
+  inst_.sampling.bind_resources(&resolve_ps_);
+  resolve_ps_.shader_set(inst_.shaders.static_shader_get(sh_type));
+  resolve_ps_.bind_ubo("dof_buf", data_);
+  resolve_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+  resolve_ps_.bind_texture("color_tx", &input_color_tx_, no_filter);
+  resolve_ps_.bind_texture("stable_color_tx", &resolve_stable_color_tx_, no_filter);
+  resolve_ps_.bind_texture("color_bg_tx", &color_bg_tx_.current(), with_filter);
+  resolve_ps_.bind_texture("color_fg_tx", &color_fg_tx_.current(), with_filter);
+  resolve_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
+  resolve_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
+  resolve_ps_.bind_texture("weight_bg_tx", &weight_bg_tx_.current());
+  resolve_ps_.bind_texture("weight_fg_tx", &weight_fg_tx_.current());
+  resolve_ps_.bind_texture("color_hole_fill_tx", &hole_fill_color_tx_);
+  resolve_ps_.bind_texture("weight_hole_fill_tx", &hole_fill_weight_tx_);
+  resolve_ps_.bind_texture("bokeh_lut_tx", &bokeh_resolve_lut_tx_);
+  resolve_ps_.bind_image("out_color_img", &output_color_tx_);
+  resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+  resolve_ps_.dispatch(&dispatch_resolve_size_);
+  resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Post-FX Rendering.
+ * \{ */
+
+/* Similar to Film::update_sample_table() but with constant filter radius and constant sample
+ * count. */
+void DepthOfField::update_sample_table()
+{
+  float2 subpixel_offset = inst_.film.pixel_jitter_get();
+  /* Since the film jitter is in full-screen res, divide by 2 to get the jitter in half res. */
+  subpixel_offset *= 0.5;
+
+  /* Same offsets as in dof_spatial_filtering(). */
+  const std::array<int2, 4> plus_offsets = {int2(-1, 0), int2(0, -1), int2(1, 0), int2(0, 1)};
+
+  const float radius = 1.5f;
+  int i = 0;
+  for (int2 offset : plus_offsets) {
+    float2 pixel_ofs = float2(offset) - subpixel_offset;
+    data_.filter_samples_weight[i++] = film_filter_weight(radius, math::length_squared(pixel_ofs));
+  }
+  data_.filter_center_weight = film_filter_weight(radius, math::length_squared(subpixel_offset));
+}
+
+void DepthOfField::render(View &view,
+                          GPUTexture **input_tx,
+                          GPUTexture **output_tx,
+                          DepthOfFieldBuffer &dof_buffer)
+{
+  if (fx_radius_ == 0.0f) {
+    return;
+  }
+
+  input_color_tx_ = *input_tx;
+  output_color_tx_ = *output_tx;
+  extent_ = {GPU_texture_width(input_color_tx_), GPU_texture_height(input_color_tx_)};
+
+  {
+    const CameraData &cam_data = inst_.camera.data_get();
+    data_.camera_type = cam_data.type;
+    /* OPTI(fclem) Could be optimized. */
+    float3 jitter = float3(fx_radius_, 0.0f, -focus_distance_);
+    float3 center = float3(0.0f, 0.0f, -focus_distance_);
+    mul_project_m4_v3(cam_data.winmat.ptr(), jitter);
+    mul_project_m4_v3(cam_data.winmat.ptr(), center);
+    /* Simplify CoC calculation to a simple MADD. */
+    if (inst_.camera.is_orthographic()) {
+      data_.coc_mul = (center[0] - jitter[0]) * 0.5f * extent_[0];
+      data_.coc_bias = focus_distance_ * data_.coc_mul;
+    }
+    else {
+      data_.coc_bias = -(center[0] - jitter[0]) * 0.5f * extent_[0];
+      data_.coc_mul = focus_distance_ * data_.coc_bias;
+    }
+
+    float min_fg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_near);
+    float max_bg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_far);
+    if (data_.camera_type != CAMERA_ORTHO) {
+      /* Background is at infinity so maximum CoC is the limit of coc_radius_from_camera_depth
+       * at -inf. We only do this for perspective camera since orthographic coc limit is inf. */
+      max_bg_coc = data_.coc_bias;
+    }
+    /* Clamp with user defined max. */
+    data_.coc_abs_max = min_ff(max_ff(fabsf(min_fg_coc), fabsf(max_bg_coc)), fx_max_coc_);
+    /* TODO(fclem): Make this dependent of the quality of the gather pass. */
+    data_.scatter_coc_threshold = 4.0f;
+
+    update_sample_table();
+
+    data_.push_update();
+  }
+
+  int2 half_res = math::divide_ceil(extent_, int2(2));
+  int2 quarter_res = math::divide_ceil(extent_, int2(4));
+  int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE));
+
+  dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
+  dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_STABILIZE_GROUP_SIZE)), 1);
+  dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)),
+                                   1);
+  dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1);
+  dispatch_tiles_flatten_size_ = int3(math::divide_ceil(half_res, int2(DOF_TILES_SIZE)), 1);
+  dispatch_tiles_dilate_size_ = int3(
+      math::divide_ceil(tile_res, int2(DOF_TILES_DILATE_GROUP_SIZE)), 1);
+  dispatch_gather_size_ = int3(math::divide_ceil(half_res, int2(DOF_GATHER_GROUP_SIZE)), 1);
+  dispatch_filter_size_ = int3(math::divide_ceil(half_res, int2(DOF_FILTER_GROUP_SIZE)), 1);
+  dispatch_resolve_size_ = int3(math::divide_ceil(extent_, int2(DOF_RESOLVE_GROUP_SIZE)), 1);
+
+  if (GPU_type_matches_ex(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+    /* On Mesa, there is a sync bug which can make a portion of the main pass (usually one shader)
+     * leave blocks of un-initialized memory. Doing a flush seems to alleviate the issue. */
+    GPU_flush();
+  }
+
+  DRW_stats_group_start("Depth of Field");
+
+  Manager &drw = *inst_.manager;
+
+  {
+    DRW_stats_group_start("Setup");
+    {
+      bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
+      bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
+      bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
+
+      if (use_bokeh_lut_) {
+        drw.submit(bokeh_lut_ps_, view);
+      }
+    }
+    {
+      setup_color_tx_.acquire(half_res, GPU_RGBA16F);
+      setup_coc_tx_.acquire(half_res, GPU_R16F);
+
+      drw.submit(setup_ps_, view);
+    }
+    {
+      stabilize_output_tx_.acquire(half_res, GPU_RGBA16F);
+      stabilize_valid_history_ = !dof_buffer.stabilize_history_tx_.ensure_2d(GPU_RGBA16F,
+                                                                             half_res);
+
+      if (stabilize_valid_history_ == false) {
+        /* Avoid uninitialized memory that can contain NaNs. */
+        dof_buffer.stabilize_history_tx_.clear(float4(0.0f));
+      }
+
+      stabilize_input_ = dof_buffer.stabilize_history_tx_;
+      /* Outputs to reduced_*_tx_ mip 0. */
+      drw.submit(stabilize_ps_, view);
+
+      /* WATCH(fclem): Swap Texture an TextureFromPool internal GPUTexture in order to reuse
+       * the one that we just consumed. */
+      TextureFromPool::swap(stabilize_output_tx_, dof_buffer.stabilize_history_tx_);
+
+      /* Used by stabilize pass. */
+      stabilize_output_tx_.release();
+      setup_color_tx_.release();
+    }
+    {
+      DRW_stats_group_start("Tile Prepare");
+
+      /* WARNING: If format changes, make sure dof_tile_* GLSL constants are properly encoded. */
+      tiles_fg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F);
+      tiles_bg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F);
+      tiles_fg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
+      tiles_bg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
+
+      drw.submit(tiles_flatten_ps_, view);
+
+      /* Used by tile_flatten and stabilize_ps pass. */
+      setup_coc_tx_.release();
+
+      /* Error introduced by gather center jittering. */
+      const float error_multiplier = 1.0f + 1.0f / (DOF_GATHER_RING_COUNT + 0.5f);
+      int dilation_end_radius = ceilf((fx_max_coc_ * error_multiplier) / (DOF_TILES_SIZE * 2));
+
+      /* Run dilation twice. One for minmax and one for minabs. */
+      for (int pass = 0; pass < 2; pass++) {
+        /* This algorithm produce the exact dilation radius by dividing it in multiple passes. */
+        int dilation_radius = 0;
+        while (dilation_radius < dilation_end_radius) {
+          int remainder = dilation_end_radius - dilation_radius;
+          /* Do not step over any unvisited tile. */
+          int max_multiplier = dilation_radius + 1;
+
+          int ring_count = min_ii(DOF_DILATE_RING_COUNT, ceilf(remainder / (float)max_multiplier));
+          int multiplier = min_ii(max_multiplier, floorf(remainder / (float)ring_count));
+
+          dilation_radius += ring_count * multiplier;
+
+          tiles_dilate_ring_count_ = ring_count;
+          tiles_dilate_ring_width_mul_ = multiplier;
+
+          tiles_fg_tx_.swap();
+          tiles_bg_tx_.swap();
+
+          drw.submit((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_, view);
+        }
+      }
+
+      tiles_fg_tx_.previous().release();
+      tiles_bg_tx_.previous().release();
+
+      DRW_stats_group_end();
+    }
+
+    downsample_tx_.acquire(quarter_res, GPU_RGBA16F);
+
+    drw.submit(downsample_ps_, view);
+
+    scatter_fg_indirect_buf_.clear_to_zero();
+    scatter_bg_indirect_buf_.clear_to_zero();
+
+    drw.submit(reduce_ps_, view);
+
+    /* Used by reduce pass. */
+    downsample_tx_.release();
+
+    DRW_stats_group_end();
+  }
+
+  for (int is_background = 0; is_background < 2; is_background++) {
+    DRW_stats_group_start(is_background ? "Background Convolution" : "Foreground Convolution");
+
+    SwapChain<TextureFromPool, 2> &color_tx = is_background ? color_bg_tx_ : color_fg_tx_;
+    SwapChain<TextureFromPool, 2> &weight_tx = is_background ? weight_bg_tx_ : weight_fg_tx_;
+    Framebuffer &scatter_fb = is_background ? scatter_bg_fb_ : scatter_fg_fb_;
+    PassSimple &gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_;
+    PassSimple &filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_;
+    PassSimple &scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_;
+
+    color_tx.current().acquire(half_res, GPU_RGBA16F);
+    weight_tx.current().acquire(half_res, GPU_R16F);
+    occlusion_tx_.acquire(half_res, GPU_RG16F);
+
+    drw.submit(gather_ps, view);
+
+    {
+      /* Filtering pass. */
+      color_tx.swap();
+      weight_tx.swap();
+
+      color_tx.current().acquire(half_res, GPU_RGBA16F);
+      weight_tx.current().acquire(half_res, GPU_R16F);
+
+      drw.submit(filter_ps, view);
+
+      color_tx.previous().release();
+      weight_tx.previous().release();
+    }
+
+    GPU_memory_barrier(GPU_BARRIER_FRAMEBUFFER);
+
+    scatter_fb.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(color_tx.current()));
+
+    GPU_framebuffer_bind(scatter_fb);
+    drw.submit(scatter_ps, view);
+
+    /* Used by scatter pass. */
+    occlusion_tx_.release();
+
+    DRW_stats_group_end();
+  }
+  {
+    DRW_stats_group_start("Hole Fill");
+
+    bokeh_gather_lut_tx_.release();
+    bokeh_scatter_lut_tx_.release();
+
+    hole_fill_color_tx_.acquire(half_res, GPU_RGBA16F);
+    hole_fill_weight_tx_.acquire(half_res, GPU_R16F);
+
+    drw.submit(hole_fill_ps_, view);
+
+    /* NOTE: We do not filter the hole-fill pass as effect is likely to not be noticeable. */
+
+    DRW_stats_group_end();
+  }
+  {
+    DRW_stats_group_start("Resolve");
+
+    resolve_stable_color_tx_ = dof_buffer.stabilize_history_tx_;
+
+    drw.submit(resolve_ps_, view);
+
+    color_bg_tx_.current().release();
+    color_fg_tx_.current().release();
+    weight_bg_tx_.current().release();
+    weight_fg_tx_.current().release();
+    tiles_fg_tx_.current().release();
+    tiles_bg_tx_.current().release();
+    hole_fill_color_tx_.release();
+    hole_fill_weight_tx_.release();
+    bokeh_resolve_lut_tx_.release();
+
+    DRW_stats_group_end();
+  }
+
+  DRW_stats_group_end();
+
+  /* Swap buffers so that next effect has the right input. */
+  SWAP(GPUTexture *, *input_tx, *output_tx);
+}
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
new file mode 100644
index 00000000000..bac0e394d66
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Depth of field post process effect.
+ *
+ * There are 2 methods to achieve this effect.
+ * - The first uses projection matrix offsetting and sample accumulation to give
+ * reference quality depth of field. But this needs many samples to hide the
+ * under-sampling.
+ * - The second one is a post-processing based one. It follows the
+ * implementation described in the presentation
+ * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie.
+ * There are some difference with our actual implementation that prioritize quality.
+ */
+
+#pragma once
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+struct DepthOfFieldBuffer {
+  /**
+   * Per view history texture for stabilize pass.
+   * Swapped with stabilize_output_tx_ in order to reuse the previous history during DoF
+   * processing.
+   * Note this should be private as its inner working only concerns the Depth Of Field
+   * implementation. The view itself should not touch it.
+   */
+  Texture stabilize_history_tx_ = {"dof_taa"};
+};
+
+class DepthOfField {
+ private:
+  class Instance &inst_;
+
+  /** Samplers */
+  static constexpr eGPUSamplerState gather_bilinear = GPU_SAMPLER_MIPMAP | GPU_SAMPLER_FILTER;
+  static constexpr eGPUSamplerState gather_nearest = GPU_SAMPLER_MIPMAP;
+
+  /** Input/Output texture references. */
+  GPUTexture *input_color_tx_ = nullptr;
+  GPUTexture *output_color_tx_ = nullptr;
+
+  /** Bokeh LUT precompute pass. */
+  TextureFromPool bokeh_gather_lut_tx_ = {"dof_bokeh_gather_lut"};
+  TextureFromPool bokeh_resolve_lut_tx_ = {"dof_bokeh_resolve_lut"};
+  TextureFromPool bokeh_scatter_lut_tx_ = {"dof_bokeh_scatter_lut"};
+  PassSimple bokeh_lut_ps_ = {"BokehLut"};
+
+  /** Outputs half-resolution color and Circle Of Confusion. */
+  TextureFromPool setup_coc_tx_ = {"dof_setup_coc"};
+  TextureFromPool setup_color_tx_ = {"dof_setup_color"};
+  int3 dispatch_setup_size_ = int3(-1);
+  PassSimple setup_ps_ = {"Setup"};
+
+  /** Allocated because we need mip chain. Which isn't supported by TextureFromPool. */
+  Texture reduced_coc_tx_ = {"dof_reduced_coc"};
+  Texture reduced_color_tx_ = {"dof_reduced_color"};
+
+  /** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */
+  TextureFromPool stabilize_output_tx_ = {"dof_taa"};
+  GPUTexture *stabilize_input_ = nullptr;
+  bool1 stabilize_valid_history_ = false;
+  int3 dispatch_stabilize_size_ = int3(-1);
+  PassSimple stabilize_ps_ = {"Stabilize"};
+
+  /** 1/4th res color buffer used to speedup the local contrast test in the first reduce pass. */
+  TextureFromPool downsample_tx_ = {"dof_downsample"};
+  int3 dispatch_downsample_size_ = int3(-1);
+  PassSimple downsample_ps_ = {"Downsample"};
+
+  /** Create mip-mapped color & COC textures for gather passes as well as scatter rect list. */
+  DepthOfFieldScatterListBuf scatter_fg_list_buf_;
+  DepthOfFieldScatterListBuf scatter_bg_list_buf_;
+  DrawIndirectBuf scatter_fg_indirect_buf_;
+  DrawIndirectBuf scatter_bg_indirect_buf_;
+  int3 dispatch_reduce_size_ = int3(-1);
+  PassSimple reduce_ps_ = {"Reduce"};
+
+  /** Outputs min & max COC in each 8x8 half res pixel tiles (so 1/16th of full resolution). */
+  SwapChain<TextureFromPool, 2> tiles_fg_tx_;
+  SwapChain<TextureFromPool, 2> tiles_bg_tx_;
+  int3 dispatch_tiles_flatten_size_ = int3(-1);
+  PassSimple tiles_flatten_ps_ = {"TilesFlatten"};
+
+  /** Dilates the min & max CoCs to cover maximum COC values. */
+  int tiles_dilate_ring_count_ = -1;
+  int tiles_dilate_ring_width_mul_ = -1;
+  int3 dispatch_tiles_dilate_size_ = int3(-1);
+  PassSimple tiles_dilate_minmax_ps_ = {"TilesDilateMinmax"};
+  PassSimple tiles_dilate_minabs_ps_ = {"TilesDilateMinabs"};
+
+  /** Gather convolution for low intensity pixels and low contrast areas. */
+  SwapChain<TextureFromPool, 2> color_bg_tx_;
+  SwapChain<TextureFromPool, 2> color_fg_tx_;
+  SwapChain<TextureFromPool, 2> weight_bg_tx_;
+  SwapChain<TextureFromPool, 2> weight_fg_tx_;
+  TextureFromPool occlusion_tx_ = {"dof_occlusion"};
+  int3 dispatch_gather_size_ = int3(-1);
+  PassSimple gather_fg_ps_ = {"GatherFg"};
+  PassSimple gather_bg_ps_ = {"GatherBg"};
+
+  /** Hole-fill convolution: Gather pass meant to fill areas of foreground dis-occlusion. */
+  TextureFromPool hole_fill_color_tx_ = {"dof_color_hole_fill"};
+  TextureFromPool hole_fill_weight_tx_ = {"dof_weight_hole_fill"};
+  PassSimple hole_fill_ps_ = {"HoleFill"};
+
+  /** Small Filter pass to reduce noise out of gather passes. */
+  int3 dispatch_filter_size_ = int3(-1);
+  PassSimple filter_fg_ps_ = {"FilterFg"};
+  PassSimple filter_bg_ps_ = {"FilterBg"};
+
+  /** Scatter convolution: A quad is emitted for every 4 bright enough half pixels. */
+  Framebuffer scatter_fg_fb_ = {"dof_scatter_fg"};
+  Framebuffer scatter_bg_fb_ = {"dof_scatter_bg"};
+  PassSimple scatter_fg_ps_ = {"ScatterFg"};
+  PassSimple scatter_bg_ps_ = {"ScatterBg"};
+
+  /** Recombine the results and also perform a slight out of focus gather. */
+  GPUTexture *resolve_stable_color_tx_ = nullptr;
+  int3 dispatch_resolve_size_ = int3(-1);
+  PassSimple resolve_ps_ = {"Resolve"};
+
+  DepthOfFieldDataBuf data_;
+
+  /** Scene settings that are immutable. */
+  float user_overblur_;
+  float fx_max_coc_;
+  /** Use jittered depth of field where we randomize camera location. */
+  bool do_jitter_;
+  /** Enable bokeh lookup texture. */
+  bool use_bokeh_lut_;
+
+  /** Circle of Confusion radius for FX DoF passes. Is in view X direction in [0..1] range. */
+  float fx_radius_;
+  /** Circle of Confusion radius for jittered DoF. Is in view X direction in [0..1] range. */
+  float jitter_radius_;
+  /** Focus distance in view space. */
+  float focus_distance_;
+  /** Extent of the input buffer. */
+  int2 extent_;
+
+ public:
+  DepthOfField(Instance &inst) : inst_(inst){};
+  ~DepthOfField(){};
+
+  void init();
+
+  void sync();
+
+  /**
+   * Apply Depth Of Field jittering to the view and projection matrices..
+   */
+  void jitter_apply(float4x4 &winmat, float4x4 &viewmat);
+
+  /**
+   * Will swap input and output texture if rendering happens. The actual output of this function
+   * is in input_tx.
+   */
+  void render(View &view,
+              GPUTexture **input_tx,
+              GPUTexture **output_tx,
+              DepthOfFieldBuffer &dof_buffer);
+
+  bool postfx_enabled() const
+  {
+    return fx_radius_ > 0.0f;
+  }
+
+ private:
+  void bokeh_lut_pass_sync();
+  void setup_pass_sync();
+  void stabilize_pass_sync();
+  void downsample_pass_sync();
+  void reduce_pass_sync();
+  void tiles_flatten_pass_sync();
+  void tiles_dilate_pass_sync();
+  void gather_pass_sync();
+  void filter_pass_sync();
+  void scatter_pass_sync();
+  void hole_fill_pass_sync();
+  void resolve_pass_sync();
+
+  void update_sample_table();
+};
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_engine.cc b/source/blender/draw/engines/eevee_next/eevee_engine.cc
index be0adfad568..5ef198838c9 100644
--- a/source/blender/draw/engines/eevee_next/eevee_engine.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_engine.cc
@@ -12,6 +12,8 @@
 
 #include "DRW_render.h"
 
+#include "RE_pipeline.h"
+
 #include "eevee_engine.h" /* Own include. */
 
 #include "eevee_instance.hh"
@@ -97,6 +99,8 @@ static void eevee_draw_scene(void *vedata)
   DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
   ved->instance->draw_viewport(dfbl);
   STRNCPY(ved->info, ved->instance->info.c_str());
+  /* Reset view for other following engines. */
+  DRW_view_set_active(nullptr);
 }
 
 static void eevee_cache_init(void *vedata)
@@ -136,7 +140,7 @@ static void eevee_instance_free(void *instance)
   delete reinterpret_cast<eevee::Instance *>(instance);
 }
 
-static void eevee_render_to_image(void *UNUSED(vedata),
+static void eevee_render_to_image(void *vedata,
                                   struct RenderEngine *engine,
                                   struct RenderLayer *layer,
                                   const struct rcti *UNUSED(rect))
@@ -144,7 +148,39 @@ static void eevee_render_to_image(void *UNUSED(vedata),
   if (!GPU_shader_storage_buffer_objects_support()) {
     return;
   }
-  UNUSED_VARS(engine, layer);
+
+  eevee::Instance *instance = new eevee::Instance();
+
+  Render *render = engine->re;
+  Depsgraph *depsgraph = DRW_context_state_get()->depsgraph;
+  Object *camera_original_ob = RE_GetCamera(engine->re);
+  const char *viewname = RE_GetActiveRenderView(engine->re);
+  int size[2] = {engine->resolution_x, engine->resolution_y};
+
+  rctf view_rect;
+  rcti rect;
+  RE_GetViewPlane(render, &view_rect, &rect);
+
+  instance->init(size, &rect, engine, depsgraph, nullptr, camera_original_ob, layer);
+  instance->render_frame(layer, viewname);
+
+  EEVEE_Data *ved = static_cast<EEVEE_Data *>(vedata);
+  if (ved->instance) {
+    delete ved->instance;
+  }
+  ved->instance = instance;
+}
+
+static void eevee_store_metadata(void *vedata, struct RenderResult *render_result)
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  EEVEE_Data *ved = static_cast<EEVEE_Data *>(vedata);
+  eevee::Instance *instance = ved->instance;
+  instance->store_metadata(render_result);
+  delete instance;
+  ved->instance = nullptr;
 }
 
 static void eevee_render_update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer)
@@ -152,7 +188,7 @@ static void eevee_render_update_passes(RenderEngine *engine, Scene *scene, ViewL
   if (!GPU_shader_storage_buffer_objects_support()) {
     return;
   }
-  UNUSED_VARS(engine, scene, view_layer);
+  eevee::Instance::update_passes(engine, scene, view_layer);
 }
 
 static const DrawEngineDataSize eevee_data_size = DRW_VIEWPORT_DATA_SIZE(EEVEE_Data);
@@ -174,7 +210,7 @@ DrawEngineType draw_engine_eevee_next_type = {
     nullptr,
     nullptr,
     &eevee_render_to_image,
-    nullptr,
+    &eevee_store_metadata,
 };
 
 RenderEngineType DRW_engine_viewport_eevee_next_type = {
diff --git a/source/blender/draw/engines/eevee_next/eevee_film.cc b/source/blender/draw/engines/eevee_next/eevee_film.cc
new file mode 100644
index 00000000000..b89746d99e2
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_film.cc
@@ -0,0 +1,727 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * A film is a fullscreen buffer (usually at output extent)
+ * that will be able to accumulate sample in any distorted camera_type
+ * using a pixel filter.
+ *
+ * Input needs to be jittered so that the filter converges to the right result.
+ */
+
+#include "BLI_hash.h"
+#include "BLI_rect.h"
+
+#include "GPU_framebuffer.h"
+#include "GPU_texture.h"
+
+#include "DRW_render.h"
+#include "RE_pipeline.h"
+
+#include "eevee_film.hh"
+#include "eevee_instance.hh"
+
+namespace blender::eevee {
+
+ENUM_OPERATORS(eViewLayerEEVEEPassType, 1 << EEVEE_RENDER_PASS_MAX_BIT)
+
+/* -------------------------------------------------------------------- */
+/** \name Arbitrary Output Variables
+ * \{ */
+
+void Film::init_aovs()
+{
+  Vector<ViewLayerAOV *> aovs;
+
+  aovs_info.display_id = -1;
+  aovs_info.display_is_value = false;
+  aovs_info.value_len = aovs_info.color_len = 0;
+
+  if (inst_.is_viewport()) {
+    /* Viewport case. */
+    if (inst_.v3d->shading.render_pass == EEVEE_RENDER_PASS_AOV) {
+      /* AOV display, request only a single AOV. */
+      ViewLayerAOV *aov = (ViewLayerAOV *)BLI_findstring(
+          &inst_.view_layer->aovs, inst_.v3d->shading.aov_name, offsetof(ViewLayerAOV, name));
+
+      if (aov == nullptr) {
+        /* AOV not found in view layer. */
+        return;
+      }
+
+      aovs.append(aov);
+      aovs_info.display_id = 0;
+      aovs_info.display_is_value = (aov->type == AOV_TYPE_VALUE);
+    }
+    else {
+      /* TODO(fclem): The realtime compositor could ask for several AOVs. */
+    }
+  }
+  else {
+    /* Render case. */
+    LISTBASE_FOREACH (ViewLayerAOV *, aov, &inst_.view_layer->aovs) {
+      aovs.append(aov);
+    }
+  }
+
+  if (aovs.size() > AOV_MAX) {
+    inst_.info = "Error: Too many AOVs";
+    return;
+  }
+
+  for (ViewLayerAOV *aov : aovs) {
+    bool is_value = (aov->type == AOV_TYPE_VALUE);
+    uint &index = is_value ? aovs_info.value_len : aovs_info.color_len;
+    uint &hash = is_value ? aovs_info.hash_value[index] : aovs_info.hash_color[index];
+    hash = BLI_hash_string(aov->name);
+    index++;
+  }
+}
+
+float *Film::read_aov(ViewLayerAOV *aov)
+{
+  bool is_value = (aov->type == AOV_TYPE_VALUE);
+  Texture &accum_tx = is_value ? value_accum_tx_ : color_accum_tx_;
+
+  Span<uint> aovs_hash(is_value ? aovs_info.hash_value : aovs_info.hash_color,
+                       is_value ? aovs_info.value_len : aovs_info.color_len);
+  /* Find AOV index. */
+  uint hash = BLI_hash_string(aov->name);
+  int aov_index = -1;
+  int i = 0;
+  for (uint candidate_hash : aovs_hash) {
+    if (candidate_hash == hash) {
+      aov_index = i;
+      break;
+    }
+    i++;
+  }
+
+  accum_tx.ensure_layer_views();
+
+  int index = aov_index + (is_value ? data_.aov_value_id : data_.aov_color_id);
+  GPUTexture *pass_tx = accum_tx.layer_view(index);
+
+  return (float *)GPU_texture_read(pass_tx, GPU_DATA_FLOAT, 0);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Mist Pass
+ * \{ */
+
+void Film::sync_mist()
+{
+  const CameraData &cam = inst_.camera.data_get();
+  const ::World *world = inst_.scene->world;
+  float mist_start = world ? world->miststa : cam.clip_near;
+  float mist_distance = world ? world->mistdist : fabsf(cam.clip_far - cam.clip_near);
+  int mist_type = world ? world->mistype : (int)WO_MIST_LINEAR;
+
+  switch (mist_type) {
+    case WO_MIST_QUADRATIC:
+      data_.mist_exponent = 2.0f;
+      break;
+    case WO_MIST_LINEAR:
+      data_.mist_exponent = 1.0f;
+      break;
+    case WO_MIST_INVERSE_QUADRATIC:
+      data_.mist_exponent = 0.5f;
+      break;
+  }
+
+  data_.mist_scale = 1.0 / mist_distance;
+  data_.mist_bias = -mist_start / mist_distance;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name FilmData
+ * \{ */
+
+inline bool operator==(const FilmData &a, const FilmData &b)
+{
+  return (a.extent == b.extent) && (a.offset == b.offset) &&
+         (a.filter_radius == b.filter_radius) && (a.scaling_factor == b.scaling_factor) &&
+         (a.background_opacity == b.background_opacity);
+}
+
+inline bool operator!=(const FilmData &a, const FilmData &b)
+{
+  return !(a == b);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
+static eViewLayerEEVEEPassType enabled_passes(const ViewLayer *view_layer)
+{
+  eViewLayerEEVEEPassType result = eViewLayerEEVEEPassType(view_layer->eevee.render_passes);
+
+#define ENABLE_FROM_LEGACY(name_legacy, name_eevee) \
+  SET_FLAG_FROM_TEST(result, \
+                     (view_layer->passflag & SCE_PASS_##name_legacy) != 0, \
+                     EEVEE_RENDER_PASS_##name_eevee);
+
+  ENABLE_FROM_LEGACY(COMBINED, COMBINED)
+  ENABLE_FROM_LEGACY(Z, Z)
+  ENABLE_FROM_LEGACY(MIST, MIST)
+  ENABLE_FROM_LEGACY(NORMAL, NORMAL)
+  ENABLE_FROM_LEGACY(SHADOW, SHADOW)
+  ENABLE_FROM_LEGACY(AO, AO)
+  ENABLE_FROM_LEGACY(EMIT, EMIT)
+  ENABLE_FROM_LEGACY(ENVIRONMENT, ENVIRONMENT)
+  ENABLE_FROM_LEGACY(DIFFUSE_COLOR, DIFFUSE_COLOR)
+  ENABLE_FROM_LEGACY(GLOSSY_COLOR, SPECULAR_COLOR)
+  ENABLE_FROM_LEGACY(DIFFUSE_DIRECT, DIFFUSE_LIGHT)
+  ENABLE_FROM_LEGACY(GLOSSY_DIRECT, SPECULAR_LIGHT)
+  ENABLE_FROM_LEGACY(ENVIRONMENT, ENVIRONMENT)
+  ENABLE_FROM_LEGACY(VECTOR, VECTOR)
+
+#undef ENABLE_FROM_LEGACY
+
+  SET_FLAG_FROM_TEST(result,
+                     view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_OBJECT,
+                     EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT);
+  SET_FLAG_FROM_TEST(result,
+                     view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_ASSET,
+                     EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET);
+  SET_FLAG_FROM_TEST(result,
+                     view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_MATERIAL,
+                     EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL);
+
+  return result;
+}
+
+void Film::init(const int2 &extent, const rcti *output_rect)
+{
+  Sampling &sampling = inst_.sampling;
+  Scene &scene = *inst_.scene;
+  SceneEEVEE &scene_eevee = scene.eevee;
+
+  init_aovs();
+
+  {
+    /* Enable passes that need to be rendered. */
+    eViewLayerEEVEEPassType render_passes = eViewLayerEEVEEPassType(0);
+
+    if (inst_.is_viewport()) {
+      /* Viewport Case. */
+      render_passes = eViewLayerEEVEEPassType(inst_.v3d->shading.render_pass);
+
+      if (inst_.overlays_enabled() || inst_.gpencil_engine_enabled) {
+        /* Overlays and Grease Pencil needs the depth for correct compositing.
+         * Using the render pass ensure we store the center depth. */
+        render_passes |= EEVEE_RENDER_PASS_Z;
+      }
+    }
+    else {
+      /* Render Case. */
+      render_passes = enabled_passes(inst_.view_layer);
+    }
+
+    /* Filter obsolete passes. */
+    render_passes &= ~(EEVEE_RENDER_PASS_UNUSED_8 | EEVEE_RENDER_PASS_BLOOM);
+
+    if (scene_eevee.flag & SCE_EEVEE_MOTION_BLUR_ENABLED) {
+      /* Disable motion vector pass if motion blur is enabled. */
+      render_passes &= ~EEVEE_RENDER_PASS_VECTOR;
+    }
+
+    /* TODO(@fclem): Can't we rely on depsgraph update notification? */
+    if (assign_if_different(enabled_passes_, render_passes)) {
+      sampling.reset();
+    }
+  }
+  {
+    rcti fallback_rect;
+    if (BLI_rcti_is_empty(output_rect)) {
+      BLI_rcti_init(&fallback_rect, 0, extent[0], 0, extent[1]);
+      output_rect = &fallback_rect;
+    }
+
+    FilmData data = data_;
+    data.extent = int2(BLI_rcti_size_x(output_rect), BLI_rcti_size_y(output_rect));
+    data.offset = int2(output_rect->xmin, output_rect->ymin);
+    data.extent_inv = 1.0f / float2(data.extent);
+    /* Disable filtering if sample count is 1. */
+    data.filter_radius = (sampling.sample_count() == 1) ? 0.0f :
+                                                          clamp_f(scene.r.gauss, 0.0f, 100.0f);
+    /* TODO(fclem): parameter hidden in experimental.
+     * We need to figure out LOD bias first in order to preserve texture crispiness. */
+    data.scaling_factor = 1;
+    data.cryptomatte_samples_len = inst_.view_layer->cryptomatte_levels;
+
+    data.background_opacity = (scene.r.alphamode == R_ALPHAPREMUL) ? 0.0f : 1.0f;
+    if (inst_.is_viewport() && false /* TODO(fclem): StudioLight */) {
+      data.background_opacity = inst_.v3d->shading.studiolight_background;
+    }
+
+    FilmData &data_prev_ = data_;
+    if (assign_if_different(data_prev_, data)) {
+      sampling.reset();
+    }
+
+    const eViewLayerEEVEEPassType data_passes = EEVEE_RENDER_PASS_Z | EEVEE_RENDER_PASS_NORMAL |
+                                                EEVEE_RENDER_PASS_VECTOR;
+    const eViewLayerEEVEEPassType color_passes_1 = EEVEE_RENDER_PASS_DIFFUSE_LIGHT |
+                                                   EEVEE_RENDER_PASS_SPECULAR_LIGHT |
+                                                   EEVEE_RENDER_PASS_VOLUME_LIGHT |
+                                                   EEVEE_RENDER_PASS_EMIT;
+    const eViewLayerEEVEEPassType color_passes_2 = EEVEE_RENDER_PASS_DIFFUSE_COLOR |
+                                                   EEVEE_RENDER_PASS_SPECULAR_COLOR |
+                                                   EEVEE_RENDER_PASS_ENVIRONMENT |
+                                                   EEVEE_RENDER_PASS_MIST |
+                                                   EEVEE_RENDER_PASS_SHADOW | EEVEE_RENDER_PASS_AO;
+
+    data_.exposure_scale = pow2f(scene.view_settings.exposure);
+    data_.has_data = (enabled_passes_ & data_passes) != 0;
+    data_.any_render_pass_1 = (enabled_passes_ & color_passes_1) != 0;
+    data_.any_render_pass_2 = (enabled_passes_ & color_passes_2) != 0;
+  }
+  {
+    /* Set pass offsets. */
+
+    data_.display_id = aovs_info.display_id;
+    data_.display_storage_type = aovs_info.display_is_value ? PASS_STORAGE_VALUE :
+                                                              PASS_STORAGE_COLOR;
+
+    /* Combined is in a separate buffer. */
+    data_.combined_id = (enabled_passes_ & EEVEE_RENDER_PASS_COMBINED) ? 0 : -1;
+    /* Depth is in a separate buffer. */
+    data_.depth_id = (enabled_passes_ & EEVEE_RENDER_PASS_Z) ? 0 : -1;
+
+    data_.color_len = 0;
+    data_.value_len = 0;
+
+    auto pass_index_get = [&](eViewLayerEEVEEPassType pass_type) {
+      ePassStorageType storage_type = pass_storage_type(pass_type);
+      int index = (enabled_passes_ & pass_type) ?
+                      (storage_type == PASS_STORAGE_VALUE ? data_.value_len : data_.color_len)++ :
+                      -1;
+      if (inst_.is_viewport() && inst_.v3d->shading.render_pass == pass_type) {
+        data_.display_id = index;
+        data_.display_storage_type = storage_type;
+      }
+      return index;
+    };
+
+    data_.mist_id = pass_index_get(EEVEE_RENDER_PASS_MIST);
+    data_.normal_id = pass_index_get(EEVEE_RENDER_PASS_NORMAL);
+    data_.vector_id = pass_index_get(EEVEE_RENDER_PASS_VECTOR);
+    data_.diffuse_light_id = pass_index_get(EEVEE_RENDER_PASS_DIFFUSE_LIGHT);
+    data_.diffuse_color_id = pass_index_get(EEVEE_RENDER_PASS_DIFFUSE_COLOR);
+    data_.specular_light_id = pass_index_get(EEVEE_RENDER_PASS_SPECULAR_LIGHT);
+    data_.specular_color_id = pass_index_get(EEVEE_RENDER_PASS_SPECULAR_COLOR);
+    data_.volume_light_id = pass_index_get(EEVEE_RENDER_PASS_VOLUME_LIGHT);
+    data_.emission_id = pass_index_get(EEVEE_RENDER_PASS_EMIT);
+    data_.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT);
+    data_.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW);
+    data_.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO);
+
+    data_.aov_color_id = data_.color_len;
+    data_.aov_value_id = data_.value_len;
+
+    data_.aov_color_len = aovs_info.color_len;
+    data_.aov_value_len = aovs_info.value_len;
+
+    data_.color_len += data_.aov_color_len;
+    data_.value_len += data_.aov_value_len;
+
+    int cryptomatte_id = 0;
+    auto cryptomatte_index_get = [&](eViewLayerEEVEEPassType pass_type) {
+      int index = -1;
+      if (enabled_passes_ & pass_type) {
+        index = cryptomatte_id;
+        cryptomatte_id += data_.cryptomatte_samples_len / 2;
+
+        if (inst_.is_viewport() && inst_.v3d->shading.render_pass == pass_type) {
+          data_.display_id = index;
+          data_.display_storage_type = PASS_STORAGE_CRYPTOMATTE;
+        }
+      }
+      return index;
+    };
+    data_.cryptomatte_object_id = cryptomatte_index_get(EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT);
+    data_.cryptomatte_asset_id = cryptomatte_index_get(EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET);
+    data_.cryptomatte_material_id = cryptomatte_index_get(EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL);
+  }
+  {
+    /* TODO(@fclem): Over-scans. */
+
+    data_.render_extent = math::divide_ceil(extent, int2(data_.scaling_factor));
+    int2 weight_extent = inst_.camera.is_panoramic() ? data_.extent : int2(data_.scaling_factor);
+
+    eGPUTextureFormat color_format = GPU_RGBA16F;
+    eGPUTextureFormat float_format = GPU_R16F;
+    eGPUTextureFormat weight_format = GPU_R32F;
+    eGPUTextureFormat depth_format = GPU_R32F;
+    eGPUTextureFormat cryptomatte_format = GPU_RGBA32F;
+
+    int reset = 0;
+    reset += depth_tx_.ensure_2d(depth_format, data_.extent);
+    reset += combined_tx_.current().ensure_2d(color_format, data_.extent);
+    reset += combined_tx_.next().ensure_2d(color_format, data_.extent);
+    /* Two layers, one for nearest sample weight and one for weight accumulation. */
+    reset += weight_tx_.current().ensure_2d_array(weight_format, weight_extent, 2);
+    reset += weight_tx_.next().ensure_2d_array(weight_format, weight_extent, 2);
+    reset += color_accum_tx_.ensure_2d_array(color_format,
+                                             (data_.color_len > 0) ? data_.extent : int2(1),
+                                             (data_.color_len > 0) ? data_.color_len : 1);
+    reset += value_accum_tx_.ensure_2d_array(float_format,
+                                             (data_.value_len > 0) ? data_.extent : int2(1),
+                                             (data_.value_len > 0) ? data_.value_len : 1);
+    /* Divided by two as two cryptomatte samples fit in pixel (RG, BA). */
+    int cryptomatte_array_len = cryptomatte_layer_len_get() * data_.cryptomatte_samples_len / 2;
+    reset += cryptomatte_tx_.ensure_2d_array(cryptomatte_format,
+                                             (cryptomatte_array_len > 0) ? data_.extent : int2(1),
+                                             (cryptomatte_array_len > 0) ? cryptomatte_array_len :
+                                                                           1);
+
+    if (reset > 0) {
+      sampling.reset();
+      data_.use_history = 0;
+      data_.use_reprojection = 0;
+
+      /* Avoid NaN in uninitialized texture memory making history blending dangerous. */
+      color_accum_tx_.clear(float4(0.0f));
+      value_accum_tx_.clear(float4(0.0f));
+      combined_tx_.current().clear(float4(0.0f));
+      weight_tx_.current().clear(float4(0.0f));
+      depth_tx_.clear(float4(0.0f));
+      cryptomatte_tx_.clear(float4(0.0f));
+    }
+  }
+
+  force_disable_reprojection_ = (scene_eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0;
+}
+
+void Film::sync()
+{
+  /* We use a fragment shader for viewport because we need to output the depth. */
+  bool use_compute = (inst_.is_viewport() == false);
+
+  eShaderType shader = use_compute ? FILM_COMP : FILM_FRAG;
+
+  /* TODO(fclem): Shader variation for panoramic & scaled resolution. */
+
+  RenderBuffers &rbuffers = inst_.render_buffers;
+  VelocityModule &velocity = inst_.velocity;
+
+  eGPUSamplerState filter = GPU_SAMPLER_FILTER;
+
+  /* For viewport, only previous motion is supported.
+   * Still bind previous step to avoid undefined behavior. */
+  eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
+
+  accumulate_ps_.init();
+  accumulate_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
+  accumulate_ps_.shader_set(inst_.shaders.static_shader_get(shader));
+  accumulate_ps_.bind_ubo("film_buf", &data_);
+  accumulate_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
+  accumulate_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
+  accumulate_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
+  accumulate_ps_.bind_texture("depth_tx", &rbuffers.depth_tx);
+  accumulate_ps_.bind_texture("combined_tx", &combined_final_tx_);
+  accumulate_ps_.bind_texture("normal_tx", &rbuffers.normal_tx);
+  accumulate_ps_.bind_texture("vector_tx", &rbuffers.vector_tx);
+  accumulate_ps_.bind_texture("light_tx", &rbuffers.light_tx);
+  accumulate_ps_.bind_texture("diffuse_color_tx", &rbuffers.diffuse_color_tx);
+  accumulate_ps_.bind_texture("specular_color_tx", &rbuffers.specular_color_tx);
+  accumulate_ps_.bind_texture("volume_light_tx", &rbuffers.volume_light_tx);
+  accumulate_ps_.bind_texture("emission_tx", &rbuffers.emission_tx);
+  accumulate_ps_.bind_texture("environment_tx", &rbuffers.environment_tx);
+  accumulate_ps_.bind_texture("shadow_tx", &rbuffers.shadow_tx);
+  accumulate_ps_.bind_texture("ambient_occlusion_tx", &rbuffers.ambient_occlusion_tx);
+  accumulate_ps_.bind_texture("aov_color_tx", &rbuffers.aov_color_tx);
+  accumulate_ps_.bind_texture("aov_value_tx", &rbuffers.aov_value_tx);
+  accumulate_ps_.bind_texture("cryptomatte_tx", &rbuffers.cryptomatte_tx);
+  /* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
+   * If we need more, we need to pack more of the similar passes in the same textures as arrays or
+   * use image binding instead. */
+  accumulate_ps_.bind_image("in_weight_img", &weight_tx_.current());
+  accumulate_ps_.bind_image("out_weight_img", &weight_tx_.next());
+  accumulate_ps_.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
+  accumulate_ps_.bind_image("out_combined_img", &combined_tx_.next());
+  accumulate_ps_.bind_image("depth_img", &depth_tx_);
+  accumulate_ps_.bind_image("color_accum_img", &color_accum_tx_);
+  accumulate_ps_.bind_image("value_accum_img", &value_accum_tx_);
+  accumulate_ps_.bind_image("cryptomatte_img", &cryptomatte_tx_);
+  /* Sync with rendering passes. */
+  accumulate_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
+  if (use_compute) {
+    accumulate_ps_.dispatch(int3(math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE)), 1));
+  }
+  else {
+    accumulate_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+  }
+
+  const int cryptomatte_layer_count = cryptomatte_layer_len_get();
+  const bool is_cryptomatte_pass_enabled = cryptomatte_layer_count > 0;
+  const bool do_cryptomatte_sorting = inst_.is_viewport() == false;
+  cryptomatte_post_ps_.init();
+  if (is_cryptomatte_pass_enabled && do_cryptomatte_sorting) {
+    cryptomatte_post_ps_.state_set(DRW_STATE_NO_DRAW);
+    cryptomatte_post_ps_.shader_set(inst_.shaders.static_shader_get(FILM_CRYPTOMATTE_POST));
+    cryptomatte_post_ps_.bind_image("cryptomatte_img", &cryptomatte_tx_);
+    cryptomatte_post_ps_.bind_image("weight_img", &weight_tx_.current());
+    cryptomatte_post_ps_.push_constant("cryptomatte_layer_len", cryptomatte_layer_count);
+    cryptomatte_post_ps_.push_constant("cryptomatte_samples_per_layer",
+                                       inst_.view_layer->cryptomatte_levels);
+    int2 dispatch_size = math::divide_ceil(int2(cryptomatte_tx_.size()), int2(FILM_GROUP_SIZE));
+    cryptomatte_post_ps_.dispatch(int3(UNPACK2(dispatch_size), 1));
+  }
+}
+
+void Film::end_sync()
+{
+  data_.use_reprojection = inst_.sampling.interactive_mode();
+
+  /* Just bypass the reprojection and reset the accumulation. */
+  if (force_disable_reprojection_ && inst_.sampling.is_reset()) {
+    data_.use_reprojection = false;
+    data_.use_history = false;
+  }
+
+  aovs_info.push_update();
+
+  sync_mist();
+}
+
+float2 Film::pixel_jitter_get() const
+{
+  float2 jitter = inst_.sampling.rng_2d_get(SAMPLING_FILTER_U);
+
+  if (!use_box_filter && data_.filter_radius < M_SQRT1_2 && !inst_.camera.is_panoramic()) {
+    /* For filter size less than a pixel, change sampling strategy and use a uniform disk
+     * distribution covering the filter shape. This avoids putting samples in areas without any
+     * weights. */
+    /* TODO(fclem): Importance sampling could be a better option here. */
+    jitter = Sampling::sample_disk(jitter) * data_.filter_radius;
+  }
+  else {
+    /* Jitter the size of a whole pixel. [-0.5..0.5] */
+    jitter -= 0.5f;
+  }
+  /* TODO(fclem): Mixed-resolution rendering: We need to offset to each of the target pixel covered
+   * by a render pixel, ideally, by choosing one randomly using another sampling dimension, or by
+   * repeating the same sample RNG sequence for each pixel offset. */
+  return jitter;
+}
+
+eViewLayerEEVEEPassType Film::enabled_passes_get() const
+{
+  if (inst_.is_viewport() && data_.use_reprojection) {
+    /* Enable motion vector rendering but not the accumulation buffer. */
+    return enabled_passes_ | EEVEE_RENDER_PASS_VECTOR;
+  }
+  return enabled_passes_;
+}
+
+int Film::cryptomatte_layer_len_get() const
+{
+  int result = 0;
+  result += data_.cryptomatte_object_id == -1 ? 0 : 1;
+  result += data_.cryptomatte_asset_id == -1 ? 0 : 1;
+  result += data_.cryptomatte_material_id == -1 ? 0 : 1;
+  return result;
+}
+
+int Film::cryptomatte_layer_max_get() const
+{
+  if (data_.cryptomatte_material_id != -1) {
+    return 3;
+  }
+  if (data_.cryptomatte_asset_id != -1) {
+    return 2;
+  }
+  if (data_.cryptomatte_object_id != -1) {
+    return 1;
+  }
+  return 0;
+}
+
+void Film::update_sample_table()
+{
+  data_.subpixel_offset = pixel_jitter_get();
+
+  int filter_radius_ceil = ceilf(data_.filter_radius);
+  float filter_radius_sqr = square_f(data_.filter_radius);
+
+  data_.samples_len = 0;
+  if (use_box_filter || data_.filter_radius < 0.01f) {
+    /* Disable gather filtering. */
+    data_.samples[0].texel = int2(0, 0);
+    data_.samples[0].weight = 1.0f;
+    data_.samples_weight_total = 1.0f;
+    data_.samples_len = 1;
+  }
+  /* NOTE: Threshold determined by hand until we don't hit the assert below. */
+  else if (data_.filter_radius < 2.20f) {
+    /* Small filter Size. */
+    int closest_index = 0;
+    float closest_distance = FLT_MAX;
+    data_.samples_weight_total = 0.0f;
+    /* TODO(fclem): For optimization, could try Z-tile ordering. */
+    for (int y = -filter_radius_ceil; y <= filter_radius_ceil; y++) {
+      for (int x = -filter_radius_ceil; x <= filter_radius_ceil; x++) {
+        float2 pixel_offset = float2(x, y) - data_.subpixel_offset;
+        float distance_sqr = math::length_squared(pixel_offset);
+        if (distance_sqr < filter_radius_sqr) {
+          if (data_.samples_len >= FILM_PRECOMP_SAMPLE_MAX) {
+            BLI_assert_msg(0, "Precomputed sample table is too small.");
+            break;
+          }
+          FilmSample &sample = data_.samples[data_.samples_len];
+          sample.texel = int2(x, y);
+          sample.weight = film_filter_weight(data_.filter_radius, distance_sqr);
+          data_.samples_weight_total += sample.weight;
+
+          if (distance_sqr < closest_distance) {
+            closest_distance = distance_sqr;
+            closest_index = data_.samples_len;
+          }
+          data_.samples_len++;
+        }
+      }
+    }
+    /* Put the closest one in first position. */
+    if (closest_index != 0) {
+      SWAP(FilmSample, data_.samples[closest_index], data_.samples[0]);
+    }
+  }
+  else {
+    /* Large Filter Size. */
+    MutableSpan<FilmSample> sample_table(data_.samples, FILM_PRECOMP_SAMPLE_MAX);
+    /* To avoid hitting driver TDR and slowing rendering too much we use random sampling. */
+    /* TODO(fclem): This case needs more work. We could distribute the samples better to avoid
+     * loading the same pixel twice. */
+    data_.samples_len = sample_table.size();
+    data_.samples_weight_total = 0.0f;
+
+    int i = 0;
+    for (FilmSample &sample : sample_table) {
+      /* TODO(fclem): Own RNG. */
+      float2 random_2d = inst_.sampling.rng_2d_get(SAMPLING_SSS_U);
+      /* This randomization makes sure we converge to the right result but also makes nearest
+       * neighbor filtering not converging rapidly. */
+      random_2d.x = (random_2d.x + i) / float(FILM_PRECOMP_SAMPLE_MAX);
+
+      float2 pixel_offset = math::floor(Sampling::sample_spiral(random_2d) * data_.filter_radius);
+      sample.texel = int2(pixel_offset);
+
+      float distance_sqr = math::length_squared(pixel_offset - data_.subpixel_offset);
+      sample.weight = film_filter_weight(data_.filter_radius, distance_sqr);
+      data_.samples_weight_total += sample.weight;
+      i++;
+    }
+  }
+}
+
+void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
+{
+  if (inst_.is_viewport()) {
+    DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
+    DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
+    GPU_framebuffer_bind(dfbl->default_fb);
+    /* Clear when using render borders. */
+    if (data_.extent != int2(GPU_texture_width(dtxl->color), GPU_texture_height(dtxl->color))) {
+      float4 clear_color = {0.0f, 0.0f, 0.0f, 0.0f};
+      GPU_framebuffer_clear_color(dfbl->default_fb, clear_color);
+    }
+    GPU_framebuffer_viewport_set(dfbl->default_fb, UNPACK2(data_.offset), UNPACK2(data_.extent));
+  }
+
+  update_sample_table();
+
+  combined_final_tx_ = combined_final_tx;
+
+  data_.display_only = false;
+  data_.push_update();
+
+  draw::View drw_view("MainView", view);
+
+  DRW_manager_get()->submit(accumulate_ps_, drw_view);
+
+  combined_tx_.swap();
+  weight_tx_.swap();
+
+  /* Use history after first sample. */
+  if (data_.use_history == 0) {
+    data_.use_history = 1;
+  }
+}
+
+void Film::display()
+{
+  BLI_assert(inst_.is_viewport());
+
+  /* Acquire dummy render buffers for correct binding. They will not be used. */
+  inst_.render_buffers.acquire(int2(1));
+
+  DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
+  GPU_framebuffer_bind(dfbl->default_fb);
+  GPU_framebuffer_viewport_set(dfbl->default_fb, UNPACK2(data_.offset), UNPACK2(data_.extent));
+
+  combined_final_tx_ = inst_.render_buffers.combined_tx;
+
+  data_.display_only = true;
+  data_.push_update();
+
+  draw::View drw_view("MainView", DRW_view_default_get());
+
+  DRW_manager_get()->submit(accumulate_ps_, drw_view);
+
+  inst_.render_buffers.release();
+
+  /* IMPORTANT: Do not swap! No accumulation has happened. */
+}
+
+void Film::cryptomatte_sort()
+{
+  DRW_manager_get()->submit(cryptomatte_post_ps_);
+}
+
+float *Film::read_pass(eViewLayerEEVEEPassType pass_type, int layer_offset)
+{
+  ePassStorageType storage_type = pass_storage_type(pass_type);
+  const bool is_value = storage_type == PASS_STORAGE_VALUE;
+  const bool is_cryptomatte = storage_type == PASS_STORAGE_CRYPTOMATTE;
+
+  Texture &accum_tx = (pass_type == EEVEE_RENDER_PASS_COMBINED) ?
+                          combined_tx_.current() :
+                      (pass_type == EEVEE_RENDER_PASS_Z) ?
+                          depth_tx_ :
+                          (is_cryptomatte ? cryptomatte_tx_ :
+                                            (is_value ? value_accum_tx_ : color_accum_tx_));
+
+  accum_tx.ensure_layer_views();
+
+  int index = pass_id_get(pass_type);
+  GPUTexture *pass_tx = accum_tx.layer_view(index + layer_offset);
+
+  GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
+
+  float *result = (float *)GPU_texture_read(pass_tx, GPU_DATA_FLOAT, 0);
+
+  if (pass_is_float3(pass_type)) {
+    /* Convert result in place as we cannot do this conversion on GPU. */
+    for (auto px : IndexRange(accum_tx.width() * accum_tx.height())) {
+      *(reinterpret_cast<float3 *>(result) + px) = *(reinterpret_cast<float3 *>(result + px * 4));
+    }
+  }
+
+  return result;
+}
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_film.hh b/source/blender/draw/engines/eevee_next/eevee_film.hh
new file mode 100644
index 00000000000..5478c20aff2
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_film.hh
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The film class handles accumulation of samples with any distorted camera_type
+ * using a pixel filter. Inputs needs to be jittered so that the filter converges to the right
+ * result.
+ *
+ * In viewport, we switch between 2 accumulation mode depending on the scene state.
+ * - For static scene, we use a classic weighted accumulation.
+ * - For dynamic scene (if an update is detected), we use a more temporally stable accumulation
+ *   following the Temporal Anti-Aliasing method (a.k.a. Temporal Super-Sampling). This does
+ *   history reprojection and rectification to avoid most of the flickering.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
+class Film {
+ public:
+  /** Stores indirection table of AOVs based on their name hash and their type. */
+  AOVsInfoDataBuf aovs_info;
+  /** For debugging purpose but could be a user option in the future. */
+  static constexpr bool use_box_filter = false;
+
+ private:
+  Instance &inst_;
+
+  /** Incoming combined buffer with post FX applied (motion blur + depth of field). */
+  GPUTexture *combined_final_tx_ = nullptr;
+
+  /**
+   * Main accumulation textures containing every render-pass except depth, cryptomatte and
+   * combined.
+   */
+  Texture color_accum_tx_;
+  Texture value_accum_tx_;
+  /** Depth accumulation texture. Separated because using a different format. */
+  Texture depth_tx_;
+  /** Cryptomatte texture. Separated because it requires full floats. */
+  Texture cryptomatte_tx_;
+  /** Combined "Color" buffer. Double buffered to allow re-projection. */
+  SwapChain<Texture, 2> combined_tx_;
+  /** Weight buffers. Double buffered to allow updating it during accumulation. */
+  SwapChain<Texture, 2> weight_tx_;
+  /** User setting to disable reprojection. Useful for debugging or have a more precise render. */
+  bool force_disable_reprojection_ = false;
+
+  PassSimple accumulate_ps_ = {"Film.Accumulate"};
+  PassSimple cryptomatte_post_ps_ = {"Film.Cryptomatte.Post"};
+
+  FilmDataBuf data_;
+
+  eViewLayerEEVEEPassType enabled_passes_ = eViewLayerEEVEEPassType(0);
+
+ public:
+  Film(Instance &inst) : inst_(inst){};
+  ~Film(){};
+
+  void init(const int2 &full_extent, const rcti *output_rect);
+
+  void sync();
+  void end_sync();
+
+  /** Accumulate the newly rendered sample contained in #RenderBuffers and blit to display. */
+  void accumulate(const DRWView *view, GPUTexture *combined_final_tx);
+
+  /** Sort and normalize cryptomatte samples. */
+  void cryptomatte_sort();
+
+  /** Blit to display. No rendered sample needed. */
+  void display();
+
+  float *read_pass(eViewLayerEEVEEPassType pass_type, int layer_offset);
+  float *read_aov(ViewLayerAOV *aov);
+
+  /** Returns shading views internal resolution. */
+  int2 render_extent_get() const
+  {
+    return data_.render_extent;
+  }
+
+  float2 pixel_jitter_get() const;
+
+  float background_opacity_get() const
+  {
+    return data_.background_opacity;
+  }
+
+  eViewLayerEEVEEPassType enabled_passes_get() const;
+  int cryptomatte_layer_max_get() const;
+  int cryptomatte_layer_len_get() const;
+
+  static ePassStorageType pass_storage_type(eViewLayerEEVEEPassType pass_type)
+  {
+    switch (pass_type) {
+      case EEVEE_RENDER_PASS_Z:
+      case EEVEE_RENDER_PASS_MIST:
+      case EEVEE_RENDER_PASS_SHADOW:
+      case EEVEE_RENDER_PASS_AO:
+        return PASS_STORAGE_VALUE;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT:
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET:
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL:
+        return PASS_STORAGE_CRYPTOMATTE;
+      default:
+        return PASS_STORAGE_COLOR;
+    }
+  }
+
+  static bool pass_is_float3(eViewLayerEEVEEPassType pass_type)
+  {
+    switch (pass_type) {
+      case EEVEE_RENDER_PASS_NORMAL:
+      case EEVEE_RENDER_PASS_DIFFUSE_LIGHT:
+      case EEVEE_RENDER_PASS_DIFFUSE_COLOR:
+      case EEVEE_RENDER_PASS_SPECULAR_LIGHT:
+      case EEVEE_RENDER_PASS_SPECULAR_COLOR:
+      case EEVEE_RENDER_PASS_VOLUME_LIGHT:
+      case EEVEE_RENDER_PASS_EMIT:
+      case EEVEE_RENDER_PASS_ENVIRONMENT:
+        return true;
+      default:
+        return false;
+    }
+  }
+
+  /* Returns layer offset in the accumulation texture. -1 if the pass is not enabled. */
+  int pass_id_get(eViewLayerEEVEEPassType pass_type) const
+  {
+    switch (pass_type) {
+      case EEVEE_RENDER_PASS_COMBINED:
+        return data_.combined_id;
+      case EEVEE_RENDER_PASS_Z:
+        return data_.depth_id;
+      case EEVEE_RENDER_PASS_MIST:
+        return data_.mist_id;
+      case EEVEE_RENDER_PASS_NORMAL:
+        return data_.normal_id;
+      case EEVEE_RENDER_PASS_DIFFUSE_LIGHT:
+        return data_.diffuse_light_id;
+      case EEVEE_RENDER_PASS_DIFFUSE_COLOR:
+        return data_.diffuse_color_id;
+      case EEVEE_RENDER_PASS_SPECULAR_LIGHT:
+        return data_.specular_light_id;
+      case EEVEE_RENDER_PASS_SPECULAR_COLOR:
+        return data_.specular_color_id;
+      case EEVEE_RENDER_PASS_VOLUME_LIGHT:
+        return data_.volume_light_id;
+      case EEVEE_RENDER_PASS_EMIT:
+        return data_.emission_id;
+      case EEVEE_RENDER_PASS_ENVIRONMENT:
+        return data_.environment_id;
+      case EEVEE_RENDER_PASS_SHADOW:
+        return data_.shadow_id;
+      case EEVEE_RENDER_PASS_AO:
+        return data_.ambient_occlusion_id;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT:
+        return data_.cryptomatte_object_id;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET:
+        return data_.cryptomatte_asset_id;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL:
+        return data_.cryptomatte_material_id;
+      case EEVEE_RENDER_PASS_VECTOR:
+        return data_.vector_id;
+      default:
+        return -1;
+    }
+  }
+
+  static const Vector<std::string> pass_to_render_pass_names(eViewLayerEEVEEPassType pass_type,
+                                                             const ViewLayer *view_layer)
+  {
+    Vector<std::string> result;
+
+    auto build_cryptomatte_passes = [&](const char *pass_name) {
+      const int num_cryptomatte_passes = (view_layer->cryptomatte_levels + 1) / 2;
+      for (int pass = 0; pass < num_cryptomatte_passes; pass++) {
+        std::stringstream ss;
+        ss.fill('0');
+        ss << pass_name;
+        ss.width(2);
+        ss << pass;
+        result.append(ss.str());
+      }
+    };
+
+    switch (pass_type) {
+      case EEVEE_RENDER_PASS_COMBINED:
+        result.append(RE_PASSNAME_COMBINED);
+        break;
+      case EEVEE_RENDER_PASS_Z:
+        result.append(RE_PASSNAME_Z);
+        break;
+      case EEVEE_RENDER_PASS_MIST:
+        result.append(RE_PASSNAME_MIST);
+        break;
+      case EEVEE_RENDER_PASS_NORMAL:
+        result.append(RE_PASSNAME_NORMAL);
+        break;
+      case EEVEE_RENDER_PASS_DIFFUSE_LIGHT:
+        result.append(RE_PASSNAME_DIFFUSE_DIRECT);
+        break;
+      case EEVEE_RENDER_PASS_DIFFUSE_COLOR:
+        result.append(RE_PASSNAME_DIFFUSE_COLOR);
+        break;
+      case EEVEE_RENDER_PASS_SPECULAR_LIGHT:
+        result.append(RE_PASSNAME_GLOSSY_DIRECT);
+        break;
+      case EEVEE_RENDER_PASS_SPECULAR_COLOR:
+        result.append(RE_PASSNAME_GLOSSY_COLOR);
+        break;
+      case EEVEE_RENDER_PASS_VOLUME_LIGHT:
+        result.append(RE_PASSNAME_VOLUME_LIGHT);
+        break;
+      case EEVEE_RENDER_PASS_EMIT:
+        result.append(RE_PASSNAME_EMIT);
+        break;
+      case EEVEE_RENDER_PASS_ENVIRONMENT:
+        result.append(RE_PASSNAME_ENVIRONMENT);
+        break;
+      case EEVEE_RENDER_PASS_SHADOW:
+        result.append(RE_PASSNAME_SHADOW);
+        break;
+      case EEVEE_RENDER_PASS_AO:
+        result.append(RE_PASSNAME_AO);
+        break;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT:
+        build_cryptomatte_passes(RE_PASSNAME_CRYPTOMATTE_OBJECT);
+        break;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET:
+        build_cryptomatte_passes(RE_PASSNAME_CRYPTOMATTE_ASSET);
+        break;
+      case EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL:
+        build_cryptomatte_passes(RE_PASSNAME_CRYPTOMATTE_MATERIAL);
+        break;
+      case EEVEE_RENDER_PASS_VECTOR:
+        result.append(RE_PASSNAME_VECTOR);
+        break;
+      default:
+        BLI_assert(0);
+        break;
+    }
+    return result;
+  }
+
+ private:
+  void init_aovs();
+  void sync_mist();
+
+  /**
+   * Precompute sample weights if they are uniform across the whole film extent.
+   */
+  void update_sample_table();
+};
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
new file mode 100644
index 00000000000..cf9049da514
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+#include "BKE_global.h"
+
+#include "eevee_instance.hh"
+
+#include "eevee_hizbuffer.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z buffer
+ *
+ * \{ */
+
+void HiZBuffer::sync()
+{
+  RenderBuffers &render_buffers = inst_.render_buffers;
+
+  int2 render_extent = inst_.film.render_extent_get();
+  /* Padding to avoid complexity during down-sampling and screen tracing. */
+  int2 hiz_extent = math::ceil_to_multiple(render_extent, int2(1u << (HIZ_MIP_COUNT - 1)));
+  int2 dispatch_size = math::divide_ceil(hiz_extent, int2(HIZ_GROUP_SIZE));
+
+  hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, nullptr, HIZ_MIP_COUNT);
+  hiz_tx_.ensure_mip_views();
+  GPU_texture_mipmap_mode(hiz_tx_, true, false);
+
+  data_.uv_scale = float2(render_extent) / float2(hiz_extent);
+  data_.push_update();
+
+  {
+    hiz_update_ps_.init();
+    hiz_update_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_UPDATE));
+    hiz_update_ps_.bind_ssbo("finished_tile_counter", atomic_tile_counter_);
+    hiz_update_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, with_filter);
+    hiz_update_ps_.bind_image("out_mip_0", hiz_tx_.mip_view(0));
+    hiz_update_ps_.bind_image("out_mip_1", hiz_tx_.mip_view(1));
+    hiz_update_ps_.bind_image("out_mip_2", hiz_tx_.mip_view(2));
+    hiz_update_ps_.bind_image("out_mip_3", hiz_tx_.mip_view(3));
+    hiz_update_ps_.bind_image("out_mip_4", hiz_tx_.mip_view(4));
+    hiz_update_ps_.bind_image("out_mip_5", hiz_tx_.mip_view(5));
+    hiz_update_ps_.bind_image("out_mip_6", hiz_tx_.mip_view(6));
+    hiz_update_ps_.bind_image("out_mip_7", hiz_tx_.mip_view(7));
+    /* TODO(@fclem): There might be occasions where we might not want to
+     * copy mip 0 for performance reasons if there is no need for it. */
+    hiz_update_ps_.push_constant("update_mip_0", true);
+    hiz_update_ps_.dispatch(int3(dispatch_size, 1));
+    hiz_update_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+  }
+
+  if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
+    debug_draw_ps_.init();
+    debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
+    debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_DEBUG));
+    this->bind_resources(&debug_draw_ps_);
+    debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+  }
+}
+
+void HiZBuffer::update()
+{
+  if (!is_dirty_) {
+    return;
+  }
+
+  /* Bind another framebuffer in order to avoid triggering the feedback loop check.
+   * This is safe because we only use compute shaders in this section of the code.
+   * Ideally the check should be smarter. */
+  GPUFrameBuffer *fb = GPU_framebuffer_active_get();
+  if (G.debug & G_DEBUG_GPU) {
+    GPU_framebuffer_restore();
+  }
+
+  inst_.manager->submit(hiz_update_ps_);
+
+  if (G.debug & G_DEBUG_GPU) {
+    GPU_framebuffer_bind(fb);
+  }
+}
+
+void HiZBuffer::debug_draw(View &view, GPUFrameBuffer *view_fb)
+{
+  if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
+    inst_.info =
+        "Debug Mode: HiZ Validation\n"
+        " - Red: pixel in front of HiZ tile value.\n"
+        " - Blue: No error.";
+    inst_.hiz_buffer.update();
+    GPU_framebuffer_bind(view_fb);
+    inst_.manager->submit(debug_draw_ps_, view);
+  }
+}
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh
new file mode 100644
index 00000000000..8b8e4de55b1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The Hierarchical-Z buffer is texture containing a copy of the depth buffer with mipmaps.
+ * Each mip contains the maximum depth of each 4 pixels on the upper level.
+ * The size of the texture is padded to avoid messing with the mipmap pixels alignments.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z buffer
+ * \{ */
+
+class HiZBuffer {
+ private:
+  Instance &inst_;
+
+  /** The texture containing the hiz mip chain. */
+  Texture hiz_tx_ = {"hiz_tx_"};
+  /**
+   * Atomic counter counting the number of tile that have finished down-sampling.
+   * The last one will process the last few mip level.
+   */
+  draw::StorageBuffer<uint4, true> atomic_tile_counter_ = {"atomic_tile_counter"};
+  /** Single pass recursive downsample. */
+  PassSimple hiz_update_ps_ = {"HizUpdate"};
+  /** Debug pass. */
+  PassSimple debug_draw_ps_ = {"HizUpdate.Debug"};
+  /** Dirty flag to check if the update is necessary. */
+  bool is_dirty_ = true;
+
+  HiZDataBuf data_;
+
+ public:
+  HiZBuffer(Instance &inst) : inst_(inst)
+  {
+    atomic_tile_counter_.clear_to_zero();
+  };
+
+  void sync();
+
+  /**
+   * Tag the buffer for update if needed.
+   */
+  void set_dirty()
+  {
+    is_dirty_ = true;
+  }
+
+  /**
+   * Update the content of the HiZ buffer with the depth render target.
+   * Noop if the buffer has not been tagged as dirty.
+   * Should be called before each passes that needs to read the hiz buffer.
+   */
+  void update();
+
+  void debug_draw(View &view, GPUFrameBuffer *view_fb);
+
+  void bind_resources(DRWShadingGroup *grp)
+  {
+    DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", &hiz_tx_);
+    DRW_shgroup_uniform_block_ref(grp, "hiz_buf", &data_);
+  }
+
+  /* TODO(fclem): Hardcoded bind slots. */
+  template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+  {
+    pass->bind_texture("hiz_tx", &hiz_tx_);
+    pass->bind_ubo("hiz_buf", &data_);
+  }
+};
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc
index 606630bcdef..8005b27c30e 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc
@@ -17,6 +17,7 @@
 #include "DNA_ID.h"
 #include "DNA_lightprobe_types.h"
 #include "DNA_modifier_types.h"
+#include "RE_pipeline.h"
 
 #include "eevee_instance.hh"
 
@@ -43,7 +44,7 @@ void Instance::init(const int2 &output_res,
                     const View3D *v3d_,
                     const RegionView3D *rv3d_)
 {
-  UNUSED_VARS(light_probe_, output_rect);
+  UNUSED_VARS(light_probe_);
   render = render_;
   depsgraph = depsgraph_;
   camera_orig_object = camera_object_;
@@ -51,12 +52,23 @@ void Instance::init(const int2 &output_res,
   drw_view = drw_view_;
   v3d = v3d_;
   rv3d = rv3d_;
+  manager = DRW_manager_get();
+
+  if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) {
+    sampling.reset();
+  }
 
   info = "";
 
   update_eval_members();
 
-  main_view.init(output_res);
+  sampling.init(scene);
+  camera.init();
+  film.init(output_res, output_rect);
+  velocity.init();
+  depth_of_field.init();
+  motion_blur.init();
+  main_view.init();
 }
 
 void Instance::set_time(float time)
@@ -88,16 +100,24 @@ void Instance::update_eval_members()
 void Instance::begin_sync()
 {
   materials.begin_sync();
-  velocity.begin_sync();
+  velocity.begin_sync(); /* NOTE: Also syncs camera. */
+  lights.begin_sync();
+  cryptomatte.begin_sync();
 
+  gpencil_engine_enabled = false;
+
+  depth_of_field.sync();
+  motion_blur.sync();
+  hiz_buffer.sync();
   pipelines.sync();
   main_view.sync();
   world.sync();
+  film.sync();
 }
 
 void Instance::object_sync(Object *ob)
 {
-  const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH);
+  const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH, OB_LAMP);
   const int ob_visibility = DRW_object_visibility_in_active_context(ob);
   const bool partsys_is_visible = (ob_visibility & OB_VISIBLE_PARTICLES) != 0 &&
                                   (ob->type == OB_MESH);
@@ -108,12 +128,16 @@ void Instance::object_sync(Object *ob)
     return;
   }
 
+  /* TODO cleanup. */
+  ObjectRef ob_ref = DRW_object_ref_get(ob);
+  ResourceHandle res_handle = manager->resource_handle(ob_ref);
+
   ObjectHandle &ob_handle = sync.sync_object(ob);
 
   if (partsys_is_visible && ob != DRW_context_state_get()->object_edit) {
     LISTBASE_FOREACH (ModifierData *, md, &ob->modifiers) {
       if (md->type == eModifierType_ParticleSystem) {
-        sync.sync_curves(ob, ob_handle, md);
+        sync.sync_curves(ob, ob_handle, res_handle, md);
       }
     }
   }
@@ -121,22 +145,18 @@ void Instance::object_sync(Object *ob)
   if (object_is_visible) {
     switch (ob->type) {
       case OB_LAMP:
+        lights.sync_light(ob, ob_handle);
         break;
       case OB_MESH:
-      case OB_CURVES_LEGACY:
-      case OB_SURF:
-      case OB_FONT:
-      case OB_MBALL: {
-        sync.sync_mesh(ob, ob_handle);
+        sync.sync_mesh(ob, ob_handle, res_handle, ob_ref);
         break;
-      }
       case OB_VOLUME:
         break;
       case OB_CURVES:
-        sync.sync_curves(ob, ob_handle);
+        sync.sync_curves(ob, ob_handle, res_handle);
         break;
       case OB_GPENCIL:
-        sync.sync_gpencil(ob, ob_handle);
+        sync.sync_gpencil(ob, ob_handle, res_handle);
         break;
       default:
         break;
@@ -146,13 +166,38 @@ void Instance::object_sync(Object *ob)
   ob_handle.reset_recalc_flag();
 }
 
+/* Wrapper to use with DRW_render_object_iter. */
+void Instance::object_sync_render(void *instance_,
+                                  Object *ob,
+                                  RenderEngine *engine,
+                                  Depsgraph *depsgraph)
+{
+  UNUSED_VARS(engine, depsgraph);
+  Instance &inst = *reinterpret_cast<Instance *>(instance_);
+  inst.object_sync(ob);
+}
+
 void Instance::end_sync()
 {
   velocity.end_sync();
+  lights.end_sync();
+  sampling.end_sync();
+  film.end_sync();
+  cryptomatte.end_sync();
 }
 
 void Instance::render_sync()
 {
+  DRW_cache_restart();
+
+  begin_sync();
+  DRW_render_object_iter(this, render, depsgraph, object_sync_render);
+  end_sync();
+
+  DRW_render_instance_buffer_finish();
+  /* Also we weed to have a correct FBO bound for #DRW_hair_update */
+  // GPU_framebuffer_bind();
+  // DRW_hair_update();
 }
 
 /** \} */
@@ -167,7 +212,65 @@ void Instance::render_sync()
  **/
 void Instance::render_sample()
 {
+  if (sampling.finished_viewport()) {
+    film.display();
+    return;
+  }
+
+  /* Motion blur may need to do re-sync after a certain number of sample. */
+  if (!is_viewport() && sampling.do_render_sync()) {
+    render_sync();
+  }
+
+  sampling.step();
+
   main_view.render();
+
+  motion_blur.step();
+}
+
+void Instance::render_read_result(RenderLayer *render_layer, const char *view_name)
+{
+  eViewLayerEEVEEPassType pass_bits = film.enabled_passes_get();
+  for (auto i : IndexRange(EEVEE_RENDER_PASS_MAX_BIT)) {
+    eViewLayerEEVEEPassType pass_type = eViewLayerEEVEEPassType(pass_bits & (1 << i));
+    if (pass_type == 0) {
+      continue;
+    }
+
+    Vector<std::string> pass_names = Film::pass_to_render_pass_names(pass_type, view_layer);
+    for (int64_t pass_offset : IndexRange(pass_names.size())) {
+      RenderPass *rp = RE_pass_find_by_name(
+          render_layer, pass_names[pass_offset].c_str(), view_name);
+      if (!rp) {
+        continue;
+      }
+      float *result = film.read_pass(pass_type, pass_offset);
+
+      if (result) {
+        BLI_mutex_lock(&render->update_render_passes_mutex);
+        /* WORKAROUND: We use texture read to avoid using a framebuffer to get the render result.
+         * However, on some implementation, we need a buffer with a few extra bytes for the read to
+         * happen correctly (see GLTexture::read()). So we need a custom memory allocation. */
+        /* Avoid memcpy(), replace the pointer directly. */
+        MEM_SAFE_FREE(rp->rect);
+        rp->rect = result;
+        BLI_mutex_unlock(&render->update_render_passes_mutex);
+      }
+    }
+  }
+
+  /* The vector pass is initialized to weird values. Set it to neutral value if not rendered. */
+  if ((pass_bits & EEVEE_RENDER_PASS_VECTOR) == 0) {
+    for (std::string vector_pass_name :
+         Film::pass_to_render_pass_names(EEVEE_RENDER_PASS_VECTOR, view_layer)) {
+      RenderPass *vector_rp = RE_pass_find_by_name(
+          render_layer, vector_pass_name.c_str(), view_name);
+      if (vector_rp) {
+        memset(vector_rp->rect, 0, sizeof(float) * 4 * vector_rp->rectx * vector_rp->recty);
+      }
+    }
+  }
 }
 
 /** \} */
@@ -178,7 +281,28 @@ void Instance::render_sample()
 
 void Instance::render_frame(RenderLayer *render_layer, const char *view_name)
 {
-  UNUSED_VARS(render_layer, view_name);
+  while (!sampling.finished()) {
+    this->render_sample();
+
+    /* TODO(fclem) print progression. */
+#if 0
+    /* TODO(fclem): Does not currently work. But would be better to just display to 2D view like
+     * cycles does. */
+    if (G.background == false && first_read) {
+      /* Allow to preview the first sample. */
+      /* TODO(fclem): Might want to not do this during animation render to avoid too much stall. */
+      this->render_read_result(render_layer, view_name);
+      first_read = false;
+      DRW_render_context_disable(render->re);
+      /* Allow the 2D viewport to grab the ticket mutex to display the render. */
+      DRW_render_context_enable(render->re);
+    }
+#endif
+  }
+
+  this->film.cryptomatte_sort();
+
+  this->render_read_result(render_layer, view_name);
 }
 
 void Instance::draw_viewport(DefaultFramebufferList *dfbl)
@@ -187,6 +311,13 @@ void Instance::draw_viewport(DefaultFramebufferList *dfbl)
   render_sample();
   velocity.step_swap();
 
+  /* Do not request redraw during viewport animation to lock the framerate to the animation
+   * playback rate. This is in order to preserve motion blur aspect and also to avoid TAA reset
+   * that can show flickering. */
+  if (!sampling.finished_viewport() && !DRW_state_is_playback()) {
+    DRW_viewport_request_redraw();
+  }
+
   if (materials.queued_shaders_count > 0) {
     std::stringstream ss;
     ss << "Compiling Shaders " << materials.queued_shaders_count;
@@ -194,6 +325,76 @@ void Instance::draw_viewport(DefaultFramebufferList *dfbl)
   }
 }
 
+void Instance::store_metadata(RenderResult *render_result)
+{
+  cryptomatte.store_metadata(render_result);
+}
+
+void Instance::update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer)
+{
+  RE_engine_register_pass(engine, scene, view_layer, RE_PASSNAME_COMBINED, 4, "RGBA", SOCK_RGBA);
+
+#define CHECK_PASS_LEGACY(name, type, channels, chanid) \
+  if (view_layer->passflag & (SCE_PASS_##name)) { \
+    RE_engine_register_pass( \
+        engine, scene, view_layer, RE_PASSNAME_##name, channels, chanid, type); \
+  } \
+  ((void)0)
+#define CHECK_PASS_EEVEE(name, type, channels, chanid) \
+  if (view_layer->eevee.render_passes & (EEVEE_RENDER_PASS_##name)) { \
+    RE_engine_register_pass( \
+        engine, scene, view_layer, RE_PASSNAME_##name, channels, chanid, type); \
+  } \
+  ((void)0)
+
+  CHECK_PASS_LEGACY(Z, SOCK_FLOAT, 1, "Z");
+  CHECK_PASS_LEGACY(MIST, SOCK_FLOAT, 1, "Z");
+  CHECK_PASS_LEGACY(NORMAL, SOCK_VECTOR, 3, "XYZ");
+  CHECK_PASS_LEGACY(DIFFUSE_DIRECT, SOCK_RGBA, 3, "RGB");
+  CHECK_PASS_LEGACY(DIFFUSE_COLOR, SOCK_RGBA, 3, "RGB");
+  CHECK_PASS_LEGACY(GLOSSY_DIRECT, SOCK_RGBA, 3, "RGB");
+  CHECK_PASS_LEGACY(GLOSSY_COLOR, SOCK_RGBA, 3, "RGB");
+  CHECK_PASS_EEVEE(VOLUME_LIGHT, SOCK_RGBA, 3, "RGB");
+  CHECK_PASS_LEGACY(EMIT, SOCK_RGBA, 3, "RGB");
+  CHECK_PASS_LEGACY(ENVIRONMENT, SOCK_RGBA, 3, "RGB");
+  /* TODO: CHECK_PASS_LEGACY(SHADOW, SOCK_RGBA, 3, "RGB");
+   * CHECK_PASS_LEGACY(AO, SOCK_RGBA, 3, "RGB");
+   * When available they should be converted from Value textures to RGB. */
+
+  LISTBASE_FOREACH (ViewLayerAOV *, aov, &view_layer->aovs) {
+    if ((aov->flag & AOV_CONFLICT) != 0) {
+      continue;
+    }
+    switch (aov->type) {
+      case AOV_TYPE_COLOR:
+        RE_engine_register_pass(engine, scene, view_layer, aov->name, 4, "RGBA", SOCK_RGBA);
+        break;
+      case AOV_TYPE_VALUE:
+        RE_engine_register_pass(engine, scene, view_layer, aov->name, 1, "X", SOCK_FLOAT);
+        break;
+      default:
+        break;
+    }
+  }
+
+  /* NOTE: Name channels lowercase `rgba` so that compression rules check in OpenEXR DWA code uses
+   * lossless compression. Reportedly this naming is the only one which works good from the
+   * interoperability point of view. Using `xyzw` naming is not portable. */
+  auto register_cryptomatte_passes = [&](eViewLayerCryptomatteFlags cryptomatte_layer,
+                                         eViewLayerEEVEEPassType eevee_pass) {
+    if (view_layer->cryptomatte_flag & cryptomatte_layer) {
+      for (std::string pass_name : Film::pass_to_render_pass_names(eevee_pass, view_layer)) {
+        RE_engine_register_pass(
+            engine, scene, view_layer, pass_name.c_str(), 4, "rgba", SOCK_RGBA);
+      }
+    }
+  };
+  register_cryptomatte_passes(VIEW_LAYER_CRYPTOMATTE_OBJECT, EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT);
+  register_cryptomatte_passes(VIEW_LAYER_CRYPTOMATTE_ASSET, EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET);
+  register_cryptomatte_passes(VIEW_LAYER_CRYPTOMATTE_MATERIAL,
+                              EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL);
+}
+
 /** \} */
 
 }  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh
index 84be59fc5f0..c8eecbd812d 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh
@@ -16,8 +16,16 @@
 #include "DRW_render.h"
 
 #include "eevee_camera.hh"
+#include "eevee_cryptomatte.hh"
+#include "eevee_depth_of_field.hh"
+#include "eevee_film.hh"
+#include "eevee_hizbuffer.hh"
+#include "eevee_light.hh"
 #include "eevee_material.hh"
+#include "eevee_motion_blur.hh"
 #include "eevee_pipeline.hh"
+#include "eevee_renderbuffers.hh"
+#include "eevee_sampling.hh"
 #include "eevee_shader.hh"
 #include "eevee_sync.hh"
 #include "eevee_view.hh"
@@ -31,19 +39,29 @@ namespace blender::eevee {
  */
 class Instance {
   friend VelocityModule;
+  friend MotionBlurModule;
 
  public:
   ShaderModule &shaders;
   SyncModule sync;
   MaterialModule materials;
   PipelineModule pipelines;
+  LightModule lights;
   VelocityModule velocity;
+  MotionBlurModule motion_blur;
+  DepthOfField depth_of_field;
+  Cryptomatte cryptomatte;
+  HiZBuffer hiz_buffer;
+  Sampling sampling;
   Camera camera;
+  Film film;
+  RenderBuffers render_buffers;
   MainView main_view;
   World world;
 
   /** Input data. */
   Depsgraph *depsgraph;
+  Manager *manager;
   /** Evaluated IDs. */
   Scene *scene;
   ViewLayer *view_layer;
@@ -57,8 +75,13 @@ class Instance {
   const View3D *v3d;
   const RegionView3D *rv3d;
 
-  /* Info string displayed at the top of the render / viewport. */
+  /** True if the grease pencil engine might be running. */
+  bool gpencil_engine_enabled;
+
+  /** Info string displayed at the top of the render / viewport. */
   std::string info = "";
+  /** Debug mode from debug value. */
+  eDebugMode debug_mode = eDebugMode::DEBUG_NONE;
 
  public:
   Instance()
@@ -66,8 +89,16 @@ class Instance {
         sync(*this),
         materials(*this),
         pipelines(*this),
+        lights(*this),
         velocity(*this),
+        motion_blur(*this),
+        depth_of_field(*this),
+        cryptomatte(*this),
+        hiz_buffer(*this),
+        sampling(*this),
         camera(*this),
+        film(*this),
+        render_buffers(*this),
         main_view(*this),
         world(*this){};
   ~Instance(){};
@@ -89,15 +120,23 @@ class Instance {
 
   void render_sync();
   void render_frame(RenderLayer *render_layer, const char *view_name);
+  void store_metadata(RenderResult *render_result);
 
   void draw_viewport(DefaultFramebufferList *dfbl);
 
-  bool is_viewport(void)
+  static void update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer);
+
+  bool is_viewport() const
+  {
+    return render == nullptr;
+  }
+
+  bool overlays_enabled() const
   {
-    return !DRW_state_is_scene_render();
+    return v3d && ((v3d->flag2 & V3D_HIDE_OVERLAYS) == 0);
   }
 
-  bool use_scene_lights(void) const
+  bool use_scene_lights() const
   {
     return (!v3d) ||
            ((v3d->shading.type == OB_MATERIAL) &&
@@ -107,7 +146,7 @@ class Instance {
   }
 
   /* Light the scene using the selected HDRI in the viewport shading pop-over. */
-  bool use_studio_light(void) const
+  bool use_studio_light() const
   {
     return (v3d) && (((v3d->shading.type == OB_MATERIAL) &&
                       ((v3d->shading.flag & V3D_SHADING_SCENE_WORLD) == 0)) ||
@@ -116,7 +155,12 @@ class Instance {
   }
 
  private:
+  static void object_sync_render(void *instance_,
+                                 Object *ob,
+                                 RenderEngine *engine,
+                                 Depsgraph *depsgraph);
   void render_sample();
+  void render_read_result(RenderLayer *render_layer, const char *view_name);
 
   void mesh_sync(Object *ob, ObjectHandle &ob_handle);
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc
new file mode 100644
index 00000000000..b60246fa3ab
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_light.cc
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The light module manages light data buffers and light culling system.
+ */
+
+#include "draw_debug.hh"
+
+#include "eevee_instance.hh"
+
+#include "eevee_light.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name LightData
+ * \{ */
+
+static eLightType to_light_type(short blender_light_type, short blender_area_type)
+{
+  switch (blender_light_type) {
+    default:
+    case LA_LOCAL:
+      return LIGHT_POINT;
+    case LA_SUN:
+      return LIGHT_SUN;
+    case LA_SPOT:
+      return LIGHT_SPOT;
+    case LA_AREA:
+      return ELEM(blender_area_type, LA_AREA_DISK, LA_AREA_ELLIPSE) ? LIGHT_ELLIPSE : LIGHT_RECT;
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Light Object
+ * \{ */
+
+void Light::sync(/* ShadowModule &shadows , */ const Object *ob, float threshold)
+{
+  const ::Light *la = (const ::Light *)ob->data;
+  float scale[3];
+
+  float max_power = max_fff(la->r, la->g, la->b) * fabsf(la->energy / 100.0f);
+  float surface_max_power = max_ff(la->diff_fac, la->spec_fac) * max_power;
+  float volume_max_power = la->volume_fac * max_power;
+
+  float influence_radius_surface = attenuation_radius_get(la, threshold, surface_max_power);
+  float influence_radius_volume = attenuation_radius_get(la, threshold, volume_max_power);
+
+  this->influence_radius_max = max_ff(influence_radius_surface, influence_radius_volume);
+  this->influence_radius_invsqr_surface = 1.0f / square_f(max_ff(influence_radius_surface, 1e-8f));
+  this->influence_radius_invsqr_volume = 1.0f / square_f(max_ff(influence_radius_volume, 1e-8f));
+
+  this->color = float3(&la->r) * la->energy;
+  normalize_m4_m4_ex(this->object_mat.ptr(), ob->obmat, scale);
+  /* Make sure we have consistent handedness (in case of negatively scaled Z axis). */
+  float3 cross = math::cross(float3(this->_right), float3(this->_up));
+  if (math::dot(cross, float3(this->_back)) < 0.0f) {
+    negate_v3(this->_up);
+  }
+
+  shape_parameters_set(la, scale);
+
+  float shape_power = shape_power_get(la);
+  float point_power = point_power_get(la);
+  this->diffuse_power = la->diff_fac * shape_power;
+  this->transmit_power = la->diff_fac * point_power;
+  this->specular_power = la->spec_fac * shape_power;
+  this->volume_power = la->volume_fac * point_power;
+
+  eLightType new_type = to_light_type(la->type, la->area_shape);
+  if (this->type != new_type) {
+    /* shadow_discard_safe(shadows); */
+    this->type = new_type;
+  }
+
+#if 0
+  if (la->mode & LA_SHADOW) {
+    if (la->type == LA_SUN) {
+      if (this->shadow_id == LIGHT_NO_SHADOW) {
+        this->shadow_id = shadows.directionals.alloc();
+      }
+
+      ShadowDirectional &shadow = shadows.directionals[this->shadow_id];
+      shadow.sync(this->object_mat, la->bias * 0.05f, 1.0f);
+    }
+    else {
+      float cone_aperture = DEG2RAD(360.0);
+      if (la->type == LA_SPOT) {
+        cone_aperture = min_ff(DEG2RAD(179.9), la->spotsize);
+      }
+      else if (la->type == LA_AREA) {
+        cone_aperture = DEG2RAD(179.9);
+      }
+
+      if (this->shadow_id == LIGHT_NO_SHADOW) {
+        this->shadow_id = shadows.punctuals.alloc();
+      }
+
+      ShadowPunctual &shadow = shadows.punctuals[this->shadow_id];
+      shadow.sync(this->type,
+                  this->object_mat,
+                  cone_aperture,
+                  la->clipsta,
+                  this->influence_radius_max,
+                  la->bias * 0.05f);
+    }
+  }
+  else {
+    shadow_discard_safe(shadows);
+  }
+#endif
+
+  this->initialized = true;
+}
+
+#if 0
+void Light::shadow_discard_safe(ShadowModule &shadows)
+{
+  if (shadow_id != LIGHT_NO_SHADOW) {
+    if (this->type != LIGHT_SUN) {
+      shadows.punctuals.free(shadow_id);
+    }
+    else {
+      shadows.directionals.free(shadow_id);
+    }
+    shadow_id = LIGHT_NO_SHADOW;
+  }
+}
+#endif
+
+/* Returns attenuation radius inverted & squared for easy bound checking inside the shader. */
+float Light::attenuation_radius_get(const ::Light *la, float light_threshold, float light_power)
+{
+  if (la->type == LA_SUN) {
+    return (light_power > 1e-5f) ? 1e16f : 0.0f;
+  }
+
+  if (la->mode & LA_CUSTOM_ATTENUATION) {
+    return la->att_dist;
+  }
+  /* Compute the distance (using the inverse square law)
+   * at which the light power reaches the light_threshold. */
+  /* TODO take area light scale into account. */
+  return sqrtf(light_power / light_threshold);
+}
+
+void Light::shape_parameters_set(const ::Light *la, const float scale[3])
+{
+  if (la->type == LA_AREA) {
+    float area_size_y = (ELEM(la->area_shape, LA_AREA_RECT, LA_AREA_ELLIPSE)) ? la->area_sizey :
+                                                                                la->area_size;
+    _area_size_x = max_ff(0.003f, la->area_size * scale[0] * 0.5f);
+    _area_size_y = max_ff(0.003f, area_size_y * scale[1] * 0.5f);
+    /* For volume point lighting. */
+    radius_squared = max_ff(0.001f, hypotf(_area_size_x, _area_size_y) * 0.5f);
+    radius_squared = square_f(radius_squared);
+  }
+  else {
+    if (la->type == LA_SPOT) {
+      /* Spot size & blend */
+      spot_size_inv[0] = scale[2] / scale[0];
+      spot_size_inv[1] = scale[2] / scale[1];
+      float spot_size = cosf(la->spotsize * 0.5f);
+      float spot_blend = (1.0f - spot_size) * la->spotblend;
+      _spot_mul = 1.0f / max_ff(1e-8f, spot_blend);
+      _spot_bias = -spot_size * _spot_mul;
+      spot_tan = tanf(min_ff(la->spotsize * 0.5f, M_PI_2 - 0.0001f));
+    }
+
+    if (la->type == LA_SUN) {
+      _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f);
+    }
+    else {
+      _area_size_x = la->area_size;
+    }
+    _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x);
+    radius_squared = square_f(_area_size_x);
+  }
+}
+
+float Light::shape_power_get(const ::Light *la)
+{
+  /* Make illumination power constant */
+  switch (la->type) {
+    case LA_AREA: {
+      float area = _area_size_x * _area_size_y;
+      float power = 1.0f / (area * 4.0f * float(M_PI));
+      /* FIXME : Empirical, Fit cycles power */
+      power *= 0.8f;
+      if (ELEM(la->area_shape, LA_AREA_DISK, LA_AREA_ELLIPSE)) {
+        /* Scale power to account for the lower area of the ellipse compared to the surrounding
+         * rectangle. */
+        power *= 4.0f / M_PI;
+      }
+      return power;
+    }
+    case LA_SPOT:
+    case LA_LOCAL: {
+      return 1.0f / (4.0f * square_f(_radius) * float(M_PI * M_PI));
+    }
+    default:
+    case LA_SUN: {
+      float power = 1.0f / (square_f(_radius) * float(M_PI));
+      /* Make illumination power closer to cycles for bigger radii. Cycles uses a cos^3 term that
+       * we cannot reproduce so we account for that by scaling the light power. This function is
+       * the result of a rough manual fitting. */
+      /* Simplification of: power *= 1 + r²/2 */
+      power += 1.0f / (2.0f * M_PI);
+
+      return power;
+    }
+  }
+}
+
+float Light::point_power_get(const ::Light *la)
+{
+  /* Volume light is evaluated as point lights. Remove the shape power. */
+  switch (la->type) {
+    case LA_AREA: {
+      /* Match cycles. Empirical fit... must correspond to some constant. */
+      float power = 0.0792f * M_PI;
+
+      /* This corrects for area light most representative point trick. The fit was found by
+       * reducing the average error compared to cycles. */
+      float area = _area_size_x * _area_size_y;
+      float tmp = M_PI_2 / (M_PI_2 + sqrtf(area));
+      /* Lerp between 1.0 and the limit (1 / pi). */
+      power *= tmp + (1.0f - tmp) * M_1_PI;
+
+      return power;
+    }
+    case LA_SPOT:
+    case LA_LOCAL: {
+      /* Match cycles. Empirical fit... must correspond to some constant. */
+      return 0.0792f;
+    }
+    default:
+    case LA_SUN: {
+      return 1.0f;
+    }
+  }
+}
+
+void Light::debug_draw()
+{
+#ifdef DEBUG
+  drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name LightModule
+ * \{ */
+
+void LightModule::begin_sync()
+{
+  use_scene_lights_ = inst_.use_scene_lights();
+
+  /* In begin_sync so it can be animated. */
+  if (assign_if_different(light_threshold_, max_ff(1e-16f, inst_.scene->eevee.light_threshold))) {
+    inst_.sampling.reset();
+  }
+
+  sun_lights_len_ = 0;
+  local_lights_len_ = 0;
+}
+
+void LightModule::sync_light(const Object *ob, ObjectHandle &handle)
+{
+  if (use_scene_lights_ == false) {
+    return;
+  }
+  Light &light = light_map_.lookup_or_add_default(handle.object_key);
+  light.used = true;
+  if (handle.recalc != 0 || !light.initialized) {
+    light.sync(/* inst_.shadows, */ ob, light_threshold_);
+  }
+  sun_lights_len_ += int(light.type == LIGHT_SUN);
+  local_lights_len_ += int(light.type != LIGHT_SUN);
+}
+
+void LightModule::end_sync()
+{
+  // ShadowModule &shadows = inst_.shadows;
+
+  /* NOTE: We resize this buffer before removing deleted lights. */
+  int lights_allocated = ceil_to_multiple_u(max_ii(light_map_.size(), 1), LIGHT_CHUNK);
+  light_buf_.resize(lights_allocated);
+
+  /* Track light deletion. */
+  Vector<ObjectKey, 0> deleted_keys;
+  /* Indices inside GPU data array. */
+  int sun_lights_idx = 0;
+  int local_lights_idx = sun_lights_len_;
+
+  /* Fill GPU data with scene data. */
+  for (auto item : light_map_.items()) {
+    Light &light = item.value;
+
+    if (!light.used) {
+      /* Deleted light. */
+      deleted_keys.append(item.key);
+      // light.shadow_discard_safe(shadows);
+      continue;
+    }
+
+    int dst_idx = (light.type == LIGHT_SUN) ? sun_lights_idx++ : local_lights_idx++;
+    /* Put all light data into global data SSBO. */
+    light_buf_[dst_idx] = light;
+
+#if 0
+    if (light.shadow_id != LIGHT_NO_SHADOW) {
+      if (light.type == LIGHT_SUN) {
+        light_buf_[dst_idx].shadow_data = shadows.directionals[light.shadow_id];
+      }
+      else {
+        light_buf_[dst_idx].shadow_data = shadows.punctuals[light.shadow_id];
+      }
+    }
+#endif
+    /* Untag for next sync. */
+    light.used = false;
+  }
+  /* This scene data buffer is then immutable after this point. */
+  light_buf_.push_update();
+
+  for (auto &key : deleted_keys) {
+    light_map_.remove(key);
+  }
+
+  /* Update sampling on deletion or un-hiding (use_scene_lights). */
+  if (assign_if_different(light_map_size_, light_map_.size())) {
+    inst_.sampling.reset();
+  }
+
+  /* If exceeding the limit, just trim off the excess to avoid glitchy rendering. */
+  if (sun_lights_len_ + local_lights_len_ > CULLING_MAX_ITEM) {
+    sun_lights_len_ = min_ii(sun_lights_len_, CULLING_MAX_ITEM);
+    local_lights_len_ = min_ii(local_lights_len_, CULLING_MAX_ITEM - sun_lights_len_);
+    inst_.info = "Error: Too many lights in the scene.";
+  }
+  lights_len_ = sun_lights_len_ + local_lights_len_;
+
+  /* Resize to the actual number of lights after pruning. */
+  lights_allocated = ceil_to_multiple_u(max_ii(lights_len_, 1), LIGHT_CHUNK);
+  culling_key_buf_.resize(lights_allocated);
+  culling_zdist_buf_.resize(lights_allocated);
+  culling_light_buf_.resize(lights_allocated);
+
+  {
+    /* Compute tile size and total word count. */
+    uint word_per_tile = divide_ceil_u(max_ii(lights_len_, 1), 32);
+    int2 render_extent = inst_.film.render_extent_get();
+    int2 tiles_extent;
+    /* Default to 32 as this is likely to be the maximum
+     * tile size used by hardware or compute shading. */
+    uint tile_size = 16;
+    do {
+      tile_size *= 2;
+      tiles_extent = math::divide_ceil(render_extent, int2(tile_size));
+      uint tile_count = tiles_extent.x * tiles_extent.y;
+      if (tile_count > max_tile_count_threshold) {
+        continue;
+      }
+      total_word_count_ = tile_count * word_per_tile;
+
+    } while (total_word_count_ > max_word_count_threshold);
+    /* Keep aligned with storage buffer requirements. */
+    total_word_count_ = ceil_to_multiple_u(total_word_count_, 32);
+
+    culling_data_buf_.tile_word_len = word_per_tile;
+    culling_data_buf_.tile_size = tile_size;
+    culling_data_buf_.tile_x_len = tiles_extent.x;
+    culling_data_buf_.tile_y_len = tiles_extent.y;
+    culling_data_buf_.items_count = lights_len_;
+    culling_data_buf_.local_lights_len = local_lights_len_;
+    culling_data_buf_.sun_lights_len = sun_lights_len_;
+  }
+  culling_tile_buf_.resize(total_word_count_);
+
+  culling_pass_sync();
+  debug_pass_sync();
+}
+
+void LightModule::culling_pass_sync()
+{
+  uint safe_lights_len = max_ii(lights_len_, 1);
+  uint culling_select_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SELECT_GROUP_SIZE);
+  uint culling_sort_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SORT_GROUP_SIZE);
+  uint culling_tile_dispatch_size = divide_ceil_u(total_word_count_, CULLING_TILE_GROUP_SIZE);
+
+  /* NOTE: We reference the buffers that may be resized or updated later. */
+
+  culling_ps_.init();
+  {
+    auto &sub = culling_ps_.sub("Select");
+    sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT));
+    sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+    sub.bind_ssbo("in_light_buf", light_buf_);
+    sub.bind_ssbo("out_light_buf", culling_light_buf_);
+    sub.bind_ssbo("out_zdist_buf", culling_zdist_buf_);
+    sub.bind_ssbo("out_key_buf", culling_key_buf_);
+    sub.dispatch(int3(culling_select_dispatch_size, 1, 1));
+    sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+  {
+    auto &sub = culling_ps_.sub("Sort");
+    sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SORT));
+    sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+    sub.bind_ssbo("in_light_buf", light_buf_);
+    sub.bind_ssbo("out_light_buf", culling_light_buf_);
+    sub.bind_ssbo("in_zdist_buf", culling_zdist_buf_);
+    sub.bind_ssbo("in_key_buf", culling_key_buf_);
+    sub.dispatch(int3(culling_sort_dispatch_size, 1, 1));
+    sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+  {
+    auto &sub = culling_ps_.sub("Zbin");
+    sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN));
+    sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+    sub.bind_ssbo("light_buf", culling_light_buf_);
+    sub.bind_ssbo("out_zbin_buf", culling_zbin_buf_);
+    sub.dispatch(int3(1, 1, 1));
+    sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+  {
+    auto &sub = culling_ps_.sub("Tiles");
+    sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_TILE));
+    sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+    sub.bind_ssbo("light_buf", culling_light_buf_);
+    sub.bind_ssbo("out_light_tile_buf", culling_tile_buf_);
+    sub.dispatch(int3(culling_tile_dispatch_size, 1, 1));
+    sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+}
+
+void LightModule::debug_pass_sync()
+{
+  if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) {
+    debug_draw_ps_.init();
+    debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
+    debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG));
+    inst_.hiz_buffer.bind_resources(&debug_draw_ps_);
+    debug_draw_ps_.bind_ssbo("light_buf", &culling_light_buf_);
+    debug_draw_ps_.bind_ssbo("light_cull_buf", &culling_data_buf_);
+    debug_draw_ps_.bind_ssbo("light_zbin_buf", &culling_zbin_buf_);
+    debug_draw_ps_.bind_ssbo("light_tile_buf", &culling_tile_buf_);
+    debug_draw_ps_.bind_texture("depth_tx", &inst_.render_buffers.depth_tx);
+    debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+  }
+}
+
+void LightModule::set_view(View &view, const int2 extent)
+{
+  float far_z = view.far_clip();
+  float near_z = view.near_clip();
+
+  culling_data_buf_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
+  culling_data_buf_.zbin_bias = -near_z * culling_data_buf_.zbin_scale;
+  culling_data_buf_.tile_to_uv_fac = (culling_data_buf_.tile_size / float2(extent));
+  culling_data_buf_.visible_count = 0;
+  culling_data_buf_.push_update();
+
+  inst_.manager->submit(culling_ps_, view);
+}
+
+void LightModule::debug_draw(View &view, GPUFrameBuffer *view_fb)
+{
+  if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) {
+    inst_.info = "Debug Mode: Light Culling Validation";
+    inst_.hiz_buffer.update();
+    GPU_framebuffer_bind(view_fb);
+    inst_.manager->submit(debug_draw_ps_, view);
+  }
+}
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.hh b/source/blender/draw/engines/eevee_next/eevee_light.hh
new file mode 100644
index 00000000000..9bacc180ea8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_light.hh
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The light module manages light data buffers and light culling system.
+ *
+ * The culling follows the principles of Tiled Culling + Z binning from:
+ * "Improved Culling for Tiled and Clustered Rendering"
+ * by Michal Drobot
+ * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
+ *
+ * The culling is separated in 4 compute phases:
+ * - View Culling (select pass): Create a z distance and a index buffer of visible lights.
+ * - Light sorting: Outputs visible lights sorted by Z distance.
+ * - Z binning: Compute the Z bins min/max light indices.
+ * - Tile intersection: Fine grained 2D culling of each lights outputting a bitmap per tile.
+ */
+
+#pragma once
+
+#include "BLI_bitmap.h"
+#include "BLI_vector.hh"
+#include "DNA_light_types.h"
+
+#include "eevee_camera.hh"
+#include "eevee_sampling.hh"
+#include "eevee_shader.hh"
+#include "eevee_shader_shared.hh"
+#include "eevee_sync.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Light Object
+ * \{ */
+
+struct Light : public LightData {
+ public:
+  bool initialized = false;
+  bool used = false;
+
+ public:
+  Light()
+  {
+    shadow_id = LIGHT_NO_SHADOW;
+  }
+
+  void sync(/* ShadowModule &shadows, */ const Object *ob, float threshold);
+
+  // void shadow_discard_safe(ShadowModule &shadows);
+
+  void debug_draw();
+
+ private:
+  float attenuation_radius_get(const ::Light *la, float light_threshold, float light_power);
+  void shape_parameters_set(const ::Light *la, const float scale[3]);
+  float shape_power_get(const ::Light *la);
+  float point_power_get(const ::Light *la);
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name LightModule
+ * \{ */
+
+/**
+ * The light module manages light data buffers and light culling system.
+ */
+class LightModule {
+  // friend ShadowModule;
+
+ private:
+  /* Keep tile count reasonable for memory usage and 2D culling performance. */
+  static constexpr uint max_memory_threshold = 32 * 1024 * 1024; /* 32 MiB */
+  static constexpr uint max_word_count_threshold = max_memory_threshold / sizeof(uint);
+  static constexpr uint max_tile_count_threshold = 8192;
+
+  Instance &inst_;
+
+  /** Map of light objects data. Converted to flat array each frame. */
+  Map<ObjectKey, Light> light_map_;
+  /** Flat array sent to GPU, populated from light_map_. Source buffer for light culling. */
+  LightDataBuf light_buf_ = {"Lights_no_cull"};
+  /** Recorded size of light_map_ (after pruning) to detect deletion. */
+  int64_t light_map_size_ = 0;
+  /** Luminous intensity to consider the light boundary at. Used for culling. */
+  float light_threshold_ = 0.01f;
+  /** If false, will prevent all scene light from being synced. */
+  bool use_scene_lights_ = false;
+  /** Number of sun lights synced during the last sync. Used as offset. */
+  int sun_lights_len_ = 0;
+  int local_lights_len_ = 0;
+  /** Sun plus local lights count for convenience. */
+  int lights_len_ = 0;
+
+  /**
+   * Light Culling
+   */
+
+  /** LightData buffer used for rendering. Filled by the culling pass. */
+  LightDataBuf culling_light_buf_ = {"Lights_culled"};
+  /** Culling infos. */
+  LightCullingDataBuf culling_data_buf_ = {"LightCull_data"};
+  /** Z-distance matching the key for each visible lights. Used for sorting. */
+  LightCullingZdistBuf culling_zdist_buf_ = {"LightCull_zdist"};
+  /** Key buffer containing only visible lights indices. Used for sorting. */
+  LightCullingKeyBuf culling_key_buf_ = {"LightCull_key"};
+  /** Zbins containing min and max light index for each Z bin. */
+  LightCullingZbinBuf culling_zbin_buf_ = {"LightCull_zbin"};
+  /** Bitmap of lights touching each tiles. */
+  LightCullingTileBuf culling_tile_buf_ = {"LightCull_tile"};
+  /** Culling compute passes. */
+  PassSimple culling_ps_ = {"LightCulling"};
+  /** Total number of words the tile buffer needs to contain for the render resolution. */
+  uint total_word_count_ = 0;
+
+  /** Debug Culling visualization. */
+  PassSimple debug_draw_ps_ = {"LightCulling.Debug"};
+
+ public:
+  LightModule(Instance &inst) : inst_(inst){};
+  ~LightModule(){};
+
+  void begin_sync();
+  void sync_light(const Object *ob, ObjectHandle &handle);
+  void end_sync();
+
+  /**
+   * Update acceleration structure for the given view.
+   */
+  void set_view(View &view, const int2 extent);
+
+  void debug_draw(View &view, GPUFrameBuffer *view_fb);
+
+  void bind_resources(DRWShadingGroup *grp)
+  {
+    DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_);
+    DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+    DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_);
+    DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_);
+#if 0
+    DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
+    DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get());
+#endif
+  }
+
+  template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+  {
+    /* Storage Buf. */
+    pass->bind_ssbo(LIGHT_CULL_BUF_SLOT, &culling_data_buf_);
+    pass->bind_ssbo(LIGHT_BUF_SLOT, &culling_light_buf_);
+    pass->bind_ssbo(LIGHT_ZBIN_BUF_SLOT, &culling_zbin_buf_);
+    pass->bind_ssbo(LIGHT_TILE_BUF_SLOT, &culling_tile_buf_);
+  }
+
+ private:
+  void culling_pass_sync();
+  void debug_pass_sync();
+};
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_material.cc b/source/blender/draw/engines/eevee_next/eevee_material.cc
index 1676c89d679..a92f96e8c70 100644
--- a/source/blender/draw/engines/eevee_next/eevee_material.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_material.cc
@@ -72,10 +72,9 @@ bNodeTree *DefaultSurfaceNodeTree::nodetree_get(::Material *ma)
 MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
 {
   {
-    bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname);
-
     diffuse_mat = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default diffuse");
-    diffuse_mat->nodetree = ntree;
+    bNodeTree *ntree = ntreeAddTreeEmbedded(
+        nullptr, &diffuse_mat->id, "Shader Nodetree", ntreeType_Shader->idname);
     diffuse_mat->use_nodes = true;
     /* To use the forward pipeline. */
     diffuse_mat->blend_method = MA_BM_BLEND;
@@ -95,10 +94,9 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
     nodeSetActive(ntree, output);
   }
   {
-    bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname);
-
     glossy_mat = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default metal");
-    glossy_mat->nodetree = ntree;
+    bNodeTree *ntree = ntreeAddTreeEmbedded(
+        nullptr, &glossy_mat->id, "Shader Nodetree", ntreeType_Shader->idname);
     glossy_mat->use_nodes = true;
     /* To use the forward pipeline. */
     glossy_mat->blend_method = MA_BM_BLEND;
@@ -120,10 +118,9 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
     nodeSetActive(ntree, output);
   }
   {
-    bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname);
-
     error_mat_ = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default error");
-    error_mat_->nodetree = ntree;
+    bNodeTree *ntree = ntreeAddTreeEmbedded(
+        nullptr, &error_mat_->id, "Shader Nodetree", ntreeType_Shader->idname);
     error_mat_->use_nodes = true;
 
     /* Use emission and output material to be compatible with both World and Material. */
@@ -145,9 +142,6 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
 
 MaterialModule::~MaterialModule()
 {
-  for (Material *mat : material_map_.values()) {
-    delete mat;
-  }
   BKE_id_free(nullptr, glossy_mat);
   BKE_id_free(nullptr, diffuse_mat);
   BKE_id_free(nullptr, error_mat_);
@@ -157,13 +151,12 @@ void MaterialModule::begin_sync()
 {
   queued_shaders_count = 0;
 
-  for (Material *mat : material_map_.values()) {
-    mat->init = false;
-  }
+  material_map_.clear();
   shader_map_.clear();
 }
 
-MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
+MaterialPass MaterialModule::material_pass_get(Object *ob,
+                                               ::Material *blender_mat,
                                                eMaterialPipeline pipeline_type,
                                                eMaterialGeometry geometry_type)
 {
@@ -195,7 +188,7 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
   BLI_assert(GPU_material_status(matpass.gpumat) == GPU_MAT_SUCCESS);
 
   if (GPU_material_recalc_flag_get(matpass.gpumat)) {
-    // inst_.sampling.reset();
+    inst_.sampling.reset();
   }
 
   if ((pipeline_type == MAT_PIPE_DEFERRED) &&
@@ -203,35 +196,34 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
     pipeline_type = MAT_PIPE_FORWARD;
   }
 
-  if ((pipeline_type == MAT_PIPE_FORWARD) &&
+  if (ELEM(pipeline_type,
+           MAT_PIPE_FORWARD,
+           MAT_PIPE_FORWARD_PREPASS,
+           MAT_PIPE_FORWARD_PREPASS_VELOCITY) &&
       GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT)) {
-    /* Transparent needs to use one shgroup per object to support reordering. */
-    matpass.shgrp = inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type);
+    /* Transparent pass is generated later. */
+    matpass.sub_pass = nullptr;
   }
   else {
     ShaderKey shader_key(matpass.gpumat, geometry_type, pipeline_type);
 
-    auto add_cb = [&]() -> DRWShadingGroup * {
-      /* First time encountering this shader. Create a shading group. */
-      return inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type);
-    };
-    DRWShadingGroup *grp = shader_map_.lookup_or_add_cb(shader_key, add_cb);
-
-    if (grp != nullptr) {
-      /* Shading group for this shader already exists. Create a sub one for this material. */
-      /* IMPORTANT: We always create a subgroup so that all subgroups are inserted after the
-       * first "empty" shgroup. This avoids messing the order of subgroups when there is more
-       * nested subgroup (i.e: hair drawing). */
-      /* TODO(@fclem): Remove material resource binding from the first group creation. */
-      matpass.shgrp = DRW_shgroup_create_sub(grp);
-      DRW_shgroup_add_material_resources(matpass.shgrp, matpass.gpumat);
+    PassMain::Sub *shader_sub = shader_map_.lookup_or_add_cb(shader_key, [&]() {
+      /* First time encountering this shader. Create a sub that will contain materials using it. */
+      return inst_.pipelines.material_add(ob, blender_mat, matpass.gpumat, pipeline_type);
+    });
+
+    if (shader_sub != nullptr) {
+      /* Create a sub for this material as `shader_sub` is for sharing shader between materials. */
+      matpass.sub_pass = &shader_sub->sub(GPU_material_get_name(matpass.gpumat));
+      matpass.sub_pass->material_set(*inst_.manager, matpass.gpumat);
     }
   }
 
   return matpass;
 }
 
-Material &MaterialModule::material_sync(::Material *blender_mat,
+Material &MaterialModule::material_sync(Object *ob,
+                                        ::Material *blender_mat,
                                         eMaterialGeometry geometry_type,
                                         bool has_motion)
 {
@@ -249,27 +241,32 @@ Material &MaterialModule::material_sync(::Material *blender_mat,
 
   MaterialKey material_key(blender_mat, geometry_type, surface_pipe);
 
-  /* TODO: allocate in blocks to avoid memory fragmentation. */
-  auto add_cb = [&]() { return new Material(); };
-  Material &mat = *material_map_.lookup_or_add_cb(material_key, add_cb);
-
-  /* Forward pipeline needs to use one shgroup per object. */
-  if (mat.init == false || (surface_pipe == MAT_PIPE_FORWARD)) {
-    mat.init = true;
+  Material &mat = material_map_.lookup_or_add_cb(material_key, [&]() {
+    Material mat;
     /* Order is important for transparent. */
-    mat.prepass = material_pass_get(blender_mat, prepass_pipe, geometry_type);
-    mat.shading = material_pass_get(blender_mat, surface_pipe, geometry_type);
+    mat.prepass = material_pass_get(ob, blender_mat, prepass_pipe, geometry_type);
+    mat.shading = material_pass_get(ob, blender_mat, surface_pipe, geometry_type);
     if (blender_mat->blend_shadow == MA_BS_NONE) {
       mat.shadow = MaterialPass();
     }
     else {
-      mat.shadow = material_pass_get(blender_mat, MAT_PIPE_SHADOW, geometry_type);
+      mat.shadow = material_pass_get(ob, blender_mat, MAT_PIPE_SHADOW, geometry_type);
     }
-
     mat.is_alpha_blend_transparent = (blender_mat->blend_method == MA_BM_BLEND) &&
-                                     GPU_material_flag_get(mat.prepass.gpumat,
+                                     GPU_material_flag_get(mat.shading.gpumat,
                                                            GPU_MATFLAG_TRANSPARENT);
+    return mat;
+  });
+
+  if (mat.is_alpha_blend_transparent) {
+    /* Transparent needs to use one sub pass per object to support reordering.
+     * NOTE: Pre-pass needs to be created first in order to be sorted first. */
+    mat.prepass.sub_pass = inst_.pipelines.forward.prepass_transparent_add(
+        ob, blender_mat, mat.shading.gpumat);
+    mat.shading.sub_pass = inst_.pipelines.forward.material_transparent_add(
+        ob, blender_mat, mat.shading.gpumat);
   }
+
   return mat;
 }
 
@@ -297,7 +294,7 @@ MaterialArray &MaterialModule::material_array_get(Object *ob, bool has_motion)
 
   for (auto i : IndexRange(materials_len)) {
     ::Material *blender_mat = material_from_slot(ob, i);
-    Material &mat = material_sync(blender_mat, to_material_geometry(ob), has_motion);
+    Material &mat = material_sync(ob, blender_mat, to_material_geometry(ob), has_motion);
     material_array_.materials.append(&mat);
     material_array_.gpu_materials.append(mat.shading.gpumat);
   }
@@ -310,7 +307,7 @@ Material &MaterialModule::material_get(Object *ob,
                                        eMaterialGeometry geometry_type)
 {
   ::Material *blender_mat = material_from_slot(ob, mat_nr);
-  Material &mat = material_sync(blender_mat, geometry_type, has_motion);
+  Material &mat = material_sync(ob, blender_mat, geometry_type, has_motion);
   return mat;
 }
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_material.hh b/source/blender/draw/engines/eevee_next/eevee_material.hh
index 23165a741b9..ad0c293926b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_material.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_material.hh
@@ -203,12 +203,11 @@ class DefaultSurfaceNodeTree {
  * \{ */
 
 struct MaterialPass {
-  GPUMaterial *gpumat = nullptr;
-  DRWShadingGroup *shgrp = nullptr;
+  GPUMaterial *gpumat;
+  PassMain::Sub *sub_pass;
 };
 
 struct Material {
-  bool init = false;
   bool is_alpha_blend_transparent;
   MaterialPass shadow, shading, prepass;
 };
@@ -228,8 +227,8 @@ class MaterialModule {
  private:
   Instance &inst_;
 
-  Map<MaterialKey, Material *> material_map_;
-  Map<ShaderKey, DRWShadingGroup *> shader_map_;
+  Map<MaterialKey, Material> material_map_;
+  Map<ShaderKey, PassMain::Sub *> shader_map_;
 
   MaterialArray material_array_;
 
@@ -254,13 +253,15 @@ class MaterialModule {
   Material &material_get(Object *ob, bool has_motion, int mat_nr, eMaterialGeometry geometry_type);
 
  private:
-  Material &material_sync(::Material *blender_mat,
+  Material &material_sync(Object *ob,
+                          ::Material *blender_mat,
                           eMaterialGeometry geometry_type,
                           bool has_motion);
 
   /** Return correct material or empty default material if slot is empty. */
   ::Material *material_from_slot(Object *ob, int slot);
-  MaterialPass material_pass_get(::Material *blender_mat,
+  MaterialPass material_pass_get(Object *ob,
+                                 ::Material *blender_mat,
                                  eMaterialPipeline pipeline_type,
                                  eMaterialGeometry geometry_type);
 };
diff --git a/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc b/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc
new file mode 100644
index 00000000000..f68abafa3d4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ */
+
+// #include "BLI_map.hh"
+#include "DEG_depsgraph_query.h"
+
+#include "eevee_instance.hh"
+#include "eevee_motion_blur.hh"
+// #include "eevee_sampling.hh"
+// #include "eevee_shader_shared.hh"
+// #include "eevee_velocity.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name MotionBlurModule
+ *
+ * \{ */
+
+void MotionBlurModule::init()
+{
+  const Scene *scene = inst_.scene;
+
+  enabled_ = (scene->eevee.flag & SCE_EEVEE_MOTION_BLUR_ENABLED) != 0;
+
+  if (!enabled_) {
+    motion_blur_fx_enabled_ = false;
+    return;
+  }
+
+  /* Take into account the steps needed for fx motion blur. */
+  int steps_count = max_ii(1, scene->eevee.motion_blur_steps) * 2 + 1;
+
+  time_steps_.resize(steps_count);
+
+  initial_frame_ = scene->r.cfra;
+  initial_subframe_ = scene->r.subframe;
+  frame_time_ = initial_frame_ + initial_subframe_;
+  shutter_position_ = scene->eevee.motion_blur_position;
+  shutter_time_ = scene->eevee.motion_blur_shutter;
+
+  data_.depth_scale = scene->eevee.motion_blur_depth_scale;
+  motion_blur_fx_enabled_ = true; /* TODO(fclem): UI option. */
+
+  /* Viewport stops here. We only do Post-FX motion blur. */
+  if (inst_.is_viewport()) {
+    enabled_ = false;
+    return;
+  }
+
+  /* Without this there is the possibility of the curve table not being allocated. */
+  BKE_curvemapping_changed((struct CurveMapping *)&scene->r.mblur_shutter_curve, false);
+
+  Vector<float> cdf(CM_TABLE);
+  Sampling::cdf_from_curvemapping(scene->r.mblur_shutter_curve, cdf);
+  Sampling::cdf_invert(cdf, time_steps_);
+
+  for (float &time : time_steps_) {
+    time = this->shutter_time_to_scene_time(time);
+  }
+
+  step_id_ = 1;
+
+  if (motion_blur_fx_enabled_) {
+    /* A bit weird but we have to sync the first 2 steps here because the step()
+     * function is only called after rendering a sample. */
+    inst_.velocity.step_sync(STEP_PREVIOUS, time_steps_[0]);
+    inst_.velocity.step_sync(STEP_NEXT, time_steps_[2]);
+  }
+  inst_.set_time(time_steps_[1]);
+}
+
+/* Runs after rendering a sample. */
+void MotionBlurModule::step()
+{
+  if (!enabled_) {
+    return;
+  }
+
+  if (inst_.sampling.finished()) {
+    /* Restore original frame number. This is because the render pipeline expects it. */
+    RE_engine_frame_set(inst_.render, initial_frame_, initial_subframe_);
+  }
+  else if (inst_.sampling.do_render_sync()) {
+    /* Time to change motion step. */
+    BLI_assert(time_steps_.size() > step_id_ + 2);
+    step_id_ += 2;
+
+    if (motion_blur_fx_enabled_) {
+      inst_.velocity.step_swap();
+      inst_.velocity.step_sync(eVelocityStep::STEP_NEXT, time_steps_[step_id_ + 1]);
+    }
+    inst_.set_time(time_steps_[step_id_]);
+  }
+}
+
+float MotionBlurModule::shutter_time_to_scene_time(float time)
+{
+  switch (shutter_position_) {
+    case SCE_EEVEE_MB_START:
+      /* No offset. */
+      break;
+    case SCE_EEVEE_MB_CENTER:
+      time -= 0.5f;
+      break;
+    case SCE_EEVEE_MB_END:
+      time -= 1.0;
+      break;
+    default:
+      BLI_assert(!"Invalid motion blur position enum!");
+      break;
+  }
+  time *= shutter_time_;
+  time += frame_time_;
+  return time;
+}
+
+void MotionBlurModule::sync()
+{
+  /* Disable motion blur in viewport when changing camera projection type.
+   * Avoids really high velocities. */
+  if (inst_.velocity.camera_changed_projection()) {
+    motion_blur_fx_enabled_ = false;
+  }
+
+  if (!motion_blur_fx_enabled_) {
+    return;
+  }
+
+  eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
+  RenderBuffers &render_buffers = inst_.render_buffers;
+
+  motion_blur_ps_.init();
+  inst_.velocity.bind_resources(&motion_blur_ps_);
+  inst_.sampling.bind_resources(&motion_blur_ps_);
+  {
+    /* Create max velocity tiles. */
+    PassSimple::Sub &sub = motion_blur_ps_.sub("TilesFlatten");
+    eShaderType shader = (inst_.is_viewport()) ? MOTION_BLUR_TILE_FLATTEN_VIEWPORT :
+                                                 MOTION_BLUR_TILE_FLATTEN_RENDER;
+    sub.shader_set(inst_.shaders.static_shader_get(shader));
+    sub.bind_ubo("motion_blur_buf", data_);
+    sub.bind_texture("depth_tx", &render_buffers.depth_tx);
+    sub.bind_image("velocity_img", &render_buffers.vector_tx);
+    sub.bind_image("out_tiles_img", &tiles_tx_);
+    sub.dispatch(&dispatch_flatten_size_);
+    sub.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH);
+  }
+  {
+    /* Expand max velocity tiles by spreading them in their neighborhood. */
+    PassSimple::Sub &sub = motion_blur_ps_.sub("TilesDilate");
+    sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_TILE_DILATE));
+    sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_);
+    sub.bind_image("in_tiles_img", &tiles_tx_);
+    sub.dispatch(&dispatch_dilate_size_);
+    sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+  {
+    /* Do the motion blur gather algorithm. */
+    PassSimple::Sub &sub = motion_blur_ps_.sub("ConvolveGather");
+    sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_GATHER));
+    sub.bind_ubo("motion_blur_buf", data_);
+    sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_);
+    sub.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+    sub.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter);
+    sub.bind_texture("in_color_tx", &input_color_tx_, no_filter);
+    sub.bind_image("in_tiles_img", &tiles_tx_);
+    sub.bind_image("out_color_img", &output_color_tx_);
+
+    sub.dispatch(&dispatch_gather_size_);
+    sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
+  }
+}
+
+void MotionBlurModule::render(View &view, GPUTexture **input_tx, GPUTexture **output_tx)
+{
+  if (!motion_blur_fx_enabled_) {
+    return;
+  }
+
+  const Texture &depth_tx = inst_.render_buffers.depth_tx;
+
+  int2 extent = {depth_tx.width(), depth_tx.height()};
+  int2 tiles_extent = math::divide_ceil(extent, int2(MOTION_BLUR_TILE_SIZE));
+
+  if (inst_.is_viewport()) {
+    float frame_delta = fabsf(inst_.velocity.step_time_delta_get(STEP_PREVIOUS, STEP_CURRENT));
+    /* Avoid highly disturbing blurs, during navigation with high shutter time. */
+    if (frame_delta > 0.0f && !DRW_state_is_navigating()) {
+      /* Rescale motion blur intensity to be shutter time relative and avoid long streak when we
+       * have frame skipping. Always try to stick to what the render frame would look like. */
+      data_.motion_scale = float2(shutter_time_ / frame_delta);
+    }
+    else {
+      /* There is no time change. Motion only comes from viewport navigation and object transform.
+       * Apply motion blur as smoothing and only blur towards last frame. */
+      data_.motion_scale = float2(1.0f, 0.0f);
+
+      if (was_navigating_ != DRW_state_is_navigating()) {
+        /* Special case for navigation events that only last for one frame (for instance mouse
+         * scroll for zooming). For this case we have to wait for the next frame before enabling
+         * the navigation motion blur. */
+        was_navigating_ = DRW_state_is_navigating();
+        return;
+      }
+    }
+    was_navigating_ = DRW_state_is_navigating();
+
+    /* Change texture swizzling to avoid complexity in gather pass shader. */
+    GPU_texture_swizzle_set(inst_.render_buffers.vector_tx, "rgrg");
+  }
+  else {
+    data_.motion_scale = float2(1.0f);
+  }
+  /* Second motion vector is stored inverted. */
+  data_.motion_scale.y = -data_.motion_scale.y;
+  data_.target_size_inv = 1.0f / float2(extent);
+  data_.push_update();
+
+  input_color_tx_ = *input_tx;
+  output_color_tx_ = *output_tx;
+
+  dispatch_flatten_size_ = int3(tiles_extent, 1);
+  dispatch_dilate_size_ = int3(math::divide_ceil(tiles_extent, int2(MOTION_BLUR_GROUP_SIZE)), 1);
+  dispatch_gather_size_ = int3(math::divide_ceil(extent, int2(MOTION_BLUR_GROUP_SIZE)), 1);
+
+  DRW_stats_group_start("Motion Blur");
+
+  tiles_tx_.acquire(tiles_extent, GPU_RGBA16F);
+
+  GPU_storagebuf_clear_to_zero(tile_indirection_buf_);
+
+  inst_.manager->submit(motion_blur_ps_, view);
+
+  tiles_tx_.release();
+
+  DRW_stats_group_end();
+
+  if (inst_.is_viewport()) {
+    /* Reset swizzle since this texture might be reused in other places. */
+    GPU_texture_swizzle_set(inst_.render_buffers.vector_tx, "rgba");
+  }
+
+  /* Swap buffers so that next effect has the right input. */
+  *input_tx = output_color_tx_;
+  *output_tx = input_color_tx_;
+}
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh b/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh
new file mode 100644
index 00000000000..056c2e323d5
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Motion blur is done by accumulating scene samples over shutter time.
+ * Since the number of step is discrete, quite low, and not per pixel randomized,
+ * we couple this with a post processing motion blur.
+ *
+ * The post-fx motion blur is done in two directions, from the previous step and to the next.
+ *
+ * For a scene with 3 motion steps, a flat shutter curve and shutter time of 2 frame
+ * centered on frame we have:
+ *
+ * |--------------------|--------------------|
+ * -1                   0                    1  Frames
+ *
+ * |-------------|-------------|-------------|
+ *        1             2             3         Motion steps
+ *
+ * |------|------|------|------|------|------|
+ * 0      1      2      4      5      6      7  Time Steps
+ *
+ * |-------------| One motion step blurs this range.
+ * -1     |     +1 Objects and geometry steps are recorded here.
+ *        0 Scene is rendered here.
+ *
+ * Since motion step N and N+1 share one time step we reuse it to avoid an extra scene evaluation.
+ *
+ * Note that we have to evaluate -1 and +1 time steps before rendering so eval order is -1, +1, 0.
+ * This is because all GPUBatches from the DRWCache are being free when changing a frame.
+ *
+ * For viewport, we only have the current and previous step data to work with. So we center the
+ * blur on the current frame and extrapolate the motion.
+ *
+ * The Post-FX motion blur is based on:
+ * "A Fast and Stable Feature-Aware Motion Blur Filter"
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ */
+
+#pragma once
+
+#include "BLI_map.hh"
+#include "DEG_depsgraph_query.h"
+
+#include "eevee_sampling.hh"
+#include "eevee_shader_shared.hh"
+#include "eevee_velocity.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name MotionBlur
+ *
+ * \{ */
+
+/**
+ * Manages time-steps evaluations and accumulation Motion blur.
+ * Also handles Post process motion blur.
+ */
+class MotionBlurModule {
+ private:
+  Instance &inst_;
+
+  /**
+   * Array containing all steps (in scene time) we need to evaluate (not render).
+   * Only odd steps are rendered. The even ones are evaluated for fx motion blur.
+   */
+  Vector<float> time_steps_;
+
+  /** Copy of input frame and sub-frame to restore after render. */
+  int initial_frame_;
+  float initial_subframe_;
+  /** Time of the frame we are rendering. */
+  float frame_time_;
+  /** Enum controlling when the shutter opens. See SceneEEVEE.motion_blur_position. */
+  int shutter_position_;
+  /** Time in scene frame the shutter is open. Controls the amount of blur. */
+  float shutter_time_;
+
+  /** True if motion blur is enabled as a module. */
+  bool enabled_ = false;
+  /** True if motion blur post-fx is enabled. */
+  float motion_blur_fx_enabled_ = false;
+  /** True if last viewport redraw state was already in navigation state. */
+  bool was_navigating_ = false;
+
+  int step_id_ = 0;
+
+  /** Velocity tiles used to guide and speedup the gather pass. */
+  TextureFromPool tiles_tx_;
+
+  GPUTexture *input_color_tx_ = nullptr;
+  GPUTexture *output_color_tx_ = nullptr;
+
+  PassSimple motion_blur_ps_ = {"MotionBlur"};
+
+  MotionBlurTileIndirectionBuf tile_indirection_buf_;
+  MotionBlurDataBuf data_;
+  /** Dispatch size for full-screen passes. */
+  int3 dispatch_flatten_size_ = int3(0);
+  int3 dispatch_dilate_size_ = int3(0);
+  int3 dispatch_gather_size_ = int3(0);
+
+ public:
+  MotionBlurModule(Instance &inst) : inst_(inst){};
+  ~MotionBlurModule(){};
+
+  void init();
+
+  void step();
+
+  void sync();
+
+  bool postfx_enabled() const
+  {
+    return motion_blur_fx_enabled_;
+  }
+
+  void render(View &view, GPUTexture **input_tx, GPUTexture **output_tx);
+
+ private:
+  float shutter_time_to_scene_time(float time);
+};
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
index 33853eba06c..33978518ffc 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
@@ -24,21 +24,36 @@ namespace blender::eevee {
 
 void WorldPipeline::sync(GPUMaterial *gpumat)
 {
-  DRWState state = DRW_STATE_WRITE_COLOR;
-  world_ps_ = DRW_pass_create("World", state);
-
-  /* Push a matrix at the same location as the camera. */
-  float4x4 camera_mat = float4x4::identity();
-  // copy_v3_v3(camera_mat[3], inst_.camera.data_get().viewinv[3]);
-
-  DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, world_ps_);
-  DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
-  DRW_shgroup_call_obmat(grp, DRW_cache_fullscreen_quad_get(), camera_mat.ptr());
+  Manager &manager = *inst_.manager;
+  RenderBuffers &rbufs = inst_.render_buffers;
+
+  ResourceHandle handle = manager.resource_handle(float4x4::identity().ptr());
+
+  world_ps_.init();
+  world_ps_.state_set(DRW_STATE_WRITE_COLOR);
+  world_ps_.material_set(manager, gpumat);
+  world_ps_.push_constant("world_opacity_fade", inst_.film.background_opacity_get());
+  world_ps_.bind_texture("utility_tx", inst_.pipelines.utility_tx);
+  /* AOVs. */
+  world_ps_.bind_image("aov_color_img", &rbufs.aov_color_tx);
+  world_ps_.bind_image("aov_value_img", &rbufs.aov_value_tx);
+  world_ps_.bind_ssbo("aov_buf", &inst_.film.aovs_info);
+  /* RenderPasses. Cleared by background (even if bad practice). */
+  world_ps_.bind_image("rp_normal_img", &rbufs.normal_tx);
+  world_ps_.bind_image("rp_light_img", &rbufs.light_tx);
+  world_ps_.bind_image("rp_diffuse_color_img", &rbufs.diffuse_color_tx);
+  world_ps_.bind_image("rp_specular_color_img", &rbufs.specular_color_tx);
+  world_ps_.bind_image("rp_emission_img", &rbufs.emission_tx);
+  world_ps_.bind_image("rp_cryptomatte_img", &rbufs.cryptomatte_tx);
+
+  world_ps_.draw(DRW_cache_fullscreen_quad_get(), handle);
+  /* To allow opaque pass rendering over it. */
+  world_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
 }
 
-void WorldPipeline::render()
+void WorldPipeline::render(View &view)
 {
-  DRW_draw_pass(world_ps_);
+  inst_.manager->submit(world_ps_, view);
 }
 
 /** \} */
@@ -51,182 +66,167 @@ void WorldPipeline::render()
 
 void ForwardPipeline::sync()
 {
+  camera_forward_ = inst_.camera.forward();
+
+  DRWState state_depth_only = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
+  DRWState state_depth_color = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS |
+                               DRW_STATE_WRITE_COLOR;
   {
-    DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
-    prepass_ps_ = DRW_pass_create("Forward.Opaque.Prepass", state);
-    prepass_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity",
-                                           state | DRW_STATE_WRITE_COLOR);
+    prepass_ps_.init();
 
-    state |= DRW_STATE_CULL_BACK;
-    prepass_culled_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Culled", state);
-    prepass_culled_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity",
-                                                  state | DRW_STATE_WRITE_COLOR);
+    {
+      /* Common resources. */
 
-    DRW_pass_link(prepass_ps_, prepass_velocity_ps_);
-    DRW_pass_link(prepass_velocity_ps_, prepass_culled_ps_);
-    DRW_pass_link(prepass_culled_ps_, prepass_culled_velocity_ps_);
-  }
-  {
-    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;
-    opaque_ps_ = DRW_pass_create("Forward.Opaque", state);
+      /* Textures. */
+      prepass_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
 
-    state |= DRW_STATE_CULL_BACK;
-    opaque_culled_ps_ = DRW_pass_create("Forward.Opaque.Culled", state);
+      inst_.velocity.bind_resources(&prepass_ps_);
+      inst_.sampling.bind_resources(&prepass_ps_);
+    }
+
+    prepass_double_sided_static_ps_ = &prepass_ps_.sub("DoubleSided.Static");
+    prepass_double_sided_static_ps_->state_set(state_depth_only);
+
+    prepass_single_sided_static_ps_ = &prepass_ps_.sub("SingleSided.Static");
+    prepass_single_sided_static_ps_->state_set(state_depth_only | DRW_STATE_CULL_BACK);
 
-    DRW_pass_link(opaque_ps_, opaque_culled_ps_);
+    prepass_double_sided_moving_ps_ = &prepass_ps_.sub("DoubleSided.Moving");
+    prepass_double_sided_moving_ps_->state_set(state_depth_color);
+
+    prepass_single_sided_moving_ps_ = &prepass_ps_.sub("SingleSided.Moving");
+    prepass_single_sided_moving_ps_->state_set(state_depth_color | DRW_STATE_CULL_BACK);
   }
   {
-    DRWState state = DRW_STATE_DEPTH_LESS_EQUAL;
-    transparent_ps_ = DRW_pass_create("Forward.Transparent", state);
+    opaque_ps_.init();
+
+    {
+      /* Common resources. */
+
+      /* RenderPasses. */
+      opaque_ps_.bind_image(RBUFS_NORMAL_SLOT, &inst_.render_buffers.normal_tx);
+      opaque_ps_.bind_image(RBUFS_LIGHT_SLOT, &inst_.render_buffers.light_tx);
+      opaque_ps_.bind_image(RBUFS_DIFF_COLOR_SLOT, &inst_.render_buffers.diffuse_color_tx);
+      opaque_ps_.bind_image(RBUFS_SPEC_COLOR_SLOT, &inst_.render_buffers.specular_color_tx);
+      opaque_ps_.bind_image(RBUFS_EMISSION_SLOT, &inst_.render_buffers.emission_tx);
+      /* AOVs. */
+      opaque_ps_.bind_image(RBUFS_AOV_COLOR_SLOT, &inst_.render_buffers.aov_color_tx);
+      opaque_ps_.bind_image(RBUFS_AOV_VALUE_SLOT, &inst_.render_buffers.aov_value_tx);
+      /* Cryptomatte. */
+      opaque_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx);
+      /* Storage Buf. */
+      opaque_ps_.bind_ssbo(RBUFS_AOV_BUF_SLOT, &inst_.film.aovs_info);
+      /* Textures. */
+      opaque_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
+
+      inst_.lights.bind_resources(&opaque_ps_);
+      inst_.sampling.bind_resources(&opaque_ps_);
+      inst_.cryptomatte.bind_resources(&opaque_ps_);
+    }
+
+    opaque_single_sided_ps_ = &opaque_ps_.sub("SingleSided");
+    opaque_single_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL |
+                                       DRW_STATE_CULL_BACK);
+
+    opaque_double_sided_ps_ = &opaque_ps_.sub("DoubleSided");
+    opaque_double_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL);
   }
-}
+  {
+    transparent_ps_.init();
+    /* Workaround limitation of PassSortable. Use dummy pass that will be sorted first in all
+     * circumstances. */
+    PassMain::Sub &sub = transparent_ps_.sub("ResourceBind", -FLT_MAX);
 
-DRWShadingGroup *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat)
-{
-  DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_;
-  // LightModule &lights = inst_.lights;
-  // LightProbeModule &lightprobes = inst_.lightprobes;
-  // RaytracingModule &raytracing = inst_.raytracing;
-  // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
-  DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
-  // lights.shgroup_resources(grp);
-  // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
-  // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
-  // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
-  // DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get());
-  // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
-  // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
-  DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
-  /* TODO(fclem): Make this only needed if material uses it ... somehow. */
-  // if (true) {
-  //   DRW_shgroup_uniform_texture_ref(
-  //       grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get());
-  // }
-  // if (raytracing.enabled()) {
-  // DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data);
-  // DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data);
-  // DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data);
-  // DRW_shgroup_uniform_texture_ref_ex(grp, "radiance_tx", &input_screen_radiance_tx_,
-  // no_interp);
-  // }
-  // if (raytracing.enabled()) {
-  // DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get());
-  // DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get());
-  // }
-  return grp;
-}
+    /* Common resources. */
 
-DRWShadingGroup *ForwardPipeline::prepass_opaque_add(::Material *blender_mat,
-                                                     GPUMaterial *gpumat,
-                                                     bool has_motion)
-{
-  DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
-                      (has_motion ? prepass_culled_velocity_ps_ : prepass_culled_ps_) :
-                      (has_motion ? prepass_velocity_ps_ : prepass_ps_);
-  DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
-  if (has_motion) {
-    inst_.velocity.bind_resources(grp);
+    /* Textures. */
+    sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
+
+    inst_.lights.bind_resources(&sub);
+    inst_.sampling.bind_resources(&sub);
   }
-  return grp;
 }
 
-DRWShadingGroup *ForwardPipeline::material_transparent_add(::Material *blender_mat,
-                                                           GPUMaterial *gpumat)
+PassMain::Sub *ForwardPipeline::prepass_opaque_add(::Material *blender_mat,
+                                                   GPUMaterial *gpumat,
+                                                   bool has_motion)
 {
-  // LightModule &lights = inst_.lights;
-  // LightProbeModule &lightprobes = inst_.lightprobes;
-  // RaytracingModule &raytracing = inst_.raytracing;
-  // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
-  DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
-  // lights.shgroup_resources(grp);
-  // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
-  // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
-  // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
-  // DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get());
-  // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
-  // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
-  // DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
-  /* TODO(fclem): Make this only needed if material uses it ... somehow. */
-  // if (true) {
-  // DRW_shgroup_uniform_texture_ref(
-  //     grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get());
-  // }
-  // if (raytracing.enabled()) {
-  // DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data);
-  // DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data);
-  // DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data);
-  // DRW_shgroup_uniform_texture_ref_ex(
-  //     grp, "rt_radiance_tx", &input_screen_radiance_tx_, no_interp);
-  // }
-  // if (raytracing.enabled()) {
-  // DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get());
-  // DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get());
-  // }
+  PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
+                            (has_motion ? prepass_single_sided_moving_ps_ :
+                                          prepass_single_sided_static_ps_) :
+                            (has_motion ? prepass_double_sided_moving_ps_ :
+                                          prepass_double_sided_static_ps_);
+  return &pass->sub(GPU_material_get_name(gpumat));
+}
 
-  DRWState state_disable = DRW_STATE_WRITE_DEPTH;
-  DRWState state_enable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
-  if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
-    state_enable |= DRW_STATE_CULL_BACK;
-  }
-  DRW_shgroup_state_disable(grp, state_disable);
-  DRW_shgroup_state_enable(grp, state_enable);
-  return grp;
+PassMain::Sub *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat)
+{
+  PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_single_sided_ps_ :
+                                                                          opaque_double_sided_ps_;
+  return &pass->sub(GPU_material_get_name(gpumat));
 }
 
-DRWShadingGroup *ForwardPipeline::prepass_transparent_add(::Material *blender_mat,
-                                                          GPUMaterial *gpumat)
+PassMain::Sub *ForwardPipeline::prepass_transparent_add(const Object *ob,
+                                                        ::Material *blender_mat,
+                                                        GPUMaterial *gpumat)
 {
   if ((blender_mat->blend_flag & MA_BL_HIDE_BACKFACE) == 0) {
     return nullptr;
   }
+  DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS_EQUAL;
+  if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) {
+    state |= DRW_STATE_CULL_BACK;
+  }
+  float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_);
+  PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value);
+  pass->state_set(state);
+  pass->material_set(*inst_.manager, gpumat);
+  return pass;
+}
 
-  DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
-
-  DRWState state_disable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
-  DRWState state_enable = DRW_STATE_WRITE_DEPTH;
-  if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
-    state_enable |= DRW_STATE_CULL_BACK;
+PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
+                                                         ::Material *blender_mat,
+                                                         GPUMaterial *gpumat)
+{
+  DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_LESS_EQUAL;
+  if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) {
+    state |= DRW_STATE_CULL_BACK;
   }
-  DRW_shgroup_state_disable(grp, state_disable);
-  DRW_shgroup_state_enable(grp, state_enable);
-  return grp;
+  float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_);
+  PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value);
+  pass->state_set(state);
+  pass->material_set(*inst_.manager, gpumat);
+  return pass;
 }
 
-void ForwardPipeline::render(const DRWView *view,
+void ForwardPipeline::render(View &view,
                              Framebuffer &prepass_fb,
                              Framebuffer &combined_fb,
-                             GPUTexture *depth_tx,
                              GPUTexture *UNUSED(combined_tx))
 {
-  UNUSED_VARS(view, depth_tx, prepass_fb, combined_fb);
-  // HiZBuffer &hiz = inst_.hiz_front;
+  UNUSED_VARS(view);
 
-  DRW_stats_group_start("ForwardOpaque");
+  DRW_stats_group_start("Forward.Opaque");
 
   GPU_framebuffer_bind(prepass_fb);
-  DRW_draw_pass(prepass_ps_);
+  inst_.manager->submit(prepass_ps_, view);
 
-  // hiz.set_dirty();
+  // if (!DRW_pass_is_empty(prepass_ps_)) {
+  inst_.hiz_buffer.set_dirty();
+  // }
 
   // if (inst_.raytracing.enabled()) {
   //   rt_buffer.radiance_copy(combined_tx);
-  //   hiz.update(depth_tx);
+  //   inst_.hiz_buffer.update();
   // }
 
   // inst_.shadows.set_view(view, depth_tx);
 
   GPU_framebuffer_bind(combined_fb);
-  DRW_draw_pass(opaque_ps_);
+  inst_.manager->submit(opaque_ps_, view);
 
   DRW_stats_group_end();
 
-  DRW_stats_group_start("ForwardTransparent");
-  /* TODO(fclem) This is suboptimal. We could sort during sync. */
-  /* FIXME(fclem) This wont work for panoramic, where we need
-   * to sort by distance to camera, not by z. */
-  DRW_pass_sort_shgroup_z(transparent_ps_);
-  DRW_draw_pass(transparent_ps_);
-  DRW_stats_group_end();
+  inst_.manager->submit(transparent_ps_, view);
 
   // if (inst_.raytracing.enabled()) {
   //   gbuffer.ray_radiance_tx.release();
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
index 3bdc718767b..0614a963dec 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
@@ -13,6 +13,7 @@
 #pragma once
 
 #include "DRW_render.h"
+#include "draw_shader_shared.h"
 
 /* TODO(fclem): Move it to GPU/DRAW. */
 #include "../eevee/eevee_lut.h"
@@ -31,13 +32,13 @@ class WorldPipeline {
  private:
   Instance &inst_;
 
-  DRWPass *world_ps_ = nullptr;
+  PassSimple world_ps_ = {"World.Background"};
 
  public:
   WorldPipeline(Instance &inst) : inst_(inst){};
 
   void sync(GPUMaterial *gpumat);
-  void render();
+  void render(View &view);
 };
 
 /** \} */
@@ -52,13 +53,18 @@ class ForwardPipeline {
  private:
   Instance &inst_;
 
-  DRWPass *prepass_ps_ = nullptr;
-  DRWPass *prepass_velocity_ps_ = nullptr;
-  DRWPass *prepass_culled_ps_ = nullptr;
-  DRWPass *prepass_culled_velocity_ps_ = nullptr;
-  DRWPass *opaque_ps_ = nullptr;
-  DRWPass *opaque_culled_ps_ = nullptr;
-  DRWPass *transparent_ps_ = nullptr;
+  PassMain prepass_ps_ = {"Prepass"};
+  PassMain::Sub *prepass_single_sided_static_ps_ = nullptr;
+  PassMain::Sub *prepass_single_sided_moving_ps_ = nullptr;
+  PassMain::Sub *prepass_double_sided_static_ps_ = nullptr;
+  PassMain::Sub *prepass_double_sided_moving_ps_ = nullptr;
+
+  PassMain opaque_ps_ = {"Shading"};
+  PassMain::Sub *opaque_single_sided_ps_ = nullptr;
+  PassMain::Sub *opaque_double_sided_ps_ = nullptr;
+
+  PassSortable transparent_ps_ = {"Forward.Transparent"};
+  float3 camera_forward_;
 
   // GPUTexture *input_screen_radiance_tx_ = nullptr;
 
@@ -67,31 +73,19 @@ class ForwardPipeline {
 
   void sync();
 
-  DRWShadingGroup *material_add(::Material *blender_mat, GPUMaterial *gpumat)
-  {
-    return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ?
-               material_transparent_add(blender_mat, gpumat) :
-               material_opaque_add(blender_mat, gpumat);
-  }
+  PassMain::Sub *prepass_opaque_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion);
+  PassMain::Sub *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat);
 
-  DRWShadingGroup *prepass_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion)
-  {
-    return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ?
-               prepass_transparent_add(blender_mat, gpumat) :
-               prepass_opaque_add(blender_mat, gpumat, has_motion);
-  }
-
-  DRWShadingGroup *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat);
-  DRWShadingGroup *prepass_opaque_add(::Material *blender_mat,
-                                      GPUMaterial *gpumat,
-                                      bool has_motion);
-  DRWShadingGroup *material_transparent_add(::Material *blender_mat, GPUMaterial *gpumat);
-  DRWShadingGroup *prepass_transparent_add(::Material *blender_mat, GPUMaterial *gpumat);
+  PassMain::Sub *prepass_transparent_add(const Object *ob,
+                                         ::Material *blender_mat,
+                                         GPUMaterial *gpumat);
+  PassMain::Sub *material_transparent_add(const Object *ob,
+                                          ::Material *blender_mat,
+                                          GPUMaterial *gpumat);
 
-  void render(const DRWView *view,
+  void render(View &view,
               Framebuffer &prepass_fb,
               Framebuffer &combined_fb,
-              GPUTexture *depth_tx,
               GPUTexture *combined_tx);
 };
 
@@ -193,26 +187,36 @@ class PipelineModule {
     // velocity.sync();
   }
 
-  DRWShadingGroup *material_add(::Material *blender_mat,
-                                GPUMaterial *gpumat,
-                                eMaterialPipeline pipeline_type)
+  PassMain::Sub *material_add(Object *ob,
+                              ::Material *blender_mat,
+                              GPUMaterial *gpumat,
+                              eMaterialPipeline pipeline_type)
   {
     switch (pipeline_type) {
       case MAT_PIPE_DEFERRED_PREPASS:
         // return deferred.prepass_add(blender_mat, gpumat, false);
-        break;
+      case MAT_PIPE_FORWARD_PREPASS:
+        if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
+          return forward.prepass_transparent_add(ob, blender_mat, gpumat);
+        }
+        return forward.prepass_opaque_add(blender_mat, gpumat, false);
+
       case MAT_PIPE_DEFERRED_PREPASS_VELOCITY:
         // return deferred.prepass_add(blender_mat, gpumat, true);
-        break;
-      case MAT_PIPE_FORWARD_PREPASS:
-        return forward.prepass_add(blender_mat, gpumat, false);
       case MAT_PIPE_FORWARD_PREPASS_VELOCITY:
-        return forward.prepass_add(blender_mat, gpumat, true);
+        if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
+          return forward.prepass_transparent_add(ob, blender_mat, gpumat);
+        }
+        return forward.prepass_opaque_add(blender_mat, gpumat, true);
+
       case MAT_PIPE_DEFERRED:
         // return deferred.material_add(blender_mat, gpumat);
-        break;
       case MAT_PIPE_FORWARD:
-        return forward.material_add(blender_mat, gpumat);
+        if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
+          return forward.material_transparent_add(ob, blender_mat, gpumat);
+        }
+        return forward.material_opaque_add(blender_mat, gpumat);
+
       case MAT_PIPE_VOLUME:
         /* TODO(fclem) volume pass. */
         return nullptr;
diff --git a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc
new file mode 100644
index 00000000000..8e36e1d071c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * A film is a fullscreen buffer (usually at output extent)
+ * that will be able to accumulate sample in any distorted camera_type
+ * using a pixel filter.
+ *
+ * Input needs to be jittered so that the filter converges to the right result.
+ */
+
+#include "BLI_rect.h"
+
+#include "GPU_framebuffer.h"
+#include "GPU_texture.h"
+
+#include "DRW_render.h"
+
+#include "eevee_film.hh"
+#include "eevee_instance.hh"
+
+namespace blender::eevee {
+
+void RenderBuffers::acquire(int2 extent)
+{
+  const eViewLayerEEVEEPassType enabled_passes = inst_.film.enabled_passes_get();
+
+  auto pass_extent = [&](eViewLayerEEVEEPassType pass_bit) -> int2 {
+    /* Use dummy texture for disabled passes. Allows correct bindings. */
+    return (enabled_passes & pass_bit) ? extent : int2(1);
+  };
+
+  eGPUTextureFormat color_format = GPU_RGBA16F;
+  eGPUTextureFormat float_format = GPU_R16F;
+
+  /* Depth and combined are always needed. */
+  depth_tx.acquire(extent, GPU_DEPTH24_STENCIL8);
+  combined_tx.acquire(extent, color_format);
+
+  bool do_vector_render_pass = (enabled_passes & EEVEE_RENDER_PASS_VECTOR) ||
+                               (inst_.motion_blur.postfx_enabled() && !inst_.is_viewport());
+  uint32_t max_light_color_layer = max_ii(enabled_passes & EEVEE_RENDER_PASS_DIFFUSE_LIGHT ?
+                                              (int)RENDER_PASS_LAYER_DIFFUSE_LIGHT :
+                                              -1,
+                                          enabled_passes & EEVEE_RENDER_PASS_SPECULAR_LIGHT ?
+                                              (int)RENDER_PASS_LAYER_SPECULAR_LIGHT :
+                                              -1) +
+                                   1;
+  /* Only RG16F when only doing only reprojection or motion blur. */
+  eGPUTextureFormat vector_format = do_vector_render_pass ? GPU_RGBA16F : GPU_RG16F;
+  /* TODO(fclem): Make vector pass allocation optional if no TAA or motion blur is needed. */
+  vector_tx.acquire(extent, vector_format);
+
+  normal_tx.acquire(pass_extent(EEVEE_RENDER_PASS_NORMAL), color_format);
+  diffuse_color_tx.acquire(pass_extent(EEVEE_RENDER_PASS_DIFFUSE_COLOR), color_format);
+  specular_color_tx.acquire(pass_extent(EEVEE_RENDER_PASS_SPECULAR_COLOR), color_format);
+  volume_light_tx.acquire(pass_extent(EEVEE_RENDER_PASS_VOLUME_LIGHT), color_format);
+  emission_tx.acquire(pass_extent(EEVEE_RENDER_PASS_EMIT), color_format);
+  environment_tx.acquire(pass_extent(EEVEE_RENDER_PASS_ENVIRONMENT), color_format);
+  shadow_tx.acquire(pass_extent(EEVEE_RENDER_PASS_SHADOW), float_format);
+  ambient_occlusion_tx.acquire(pass_extent(EEVEE_RENDER_PASS_AO), float_format);
+
+  light_tx.ensure_2d_array(color_format,
+                           max_light_color_layer > 0 ? extent : int2(1),
+                           max_ii(1, max_light_color_layer));
+
+  const AOVsInfoData &aovs = inst_.film.aovs_info;
+  aov_color_tx.ensure_2d_array(
+      color_format, (aovs.color_len > 0) ? extent : int2(1), max_ii(1, aovs.color_len));
+  aov_value_tx.ensure_2d_array(
+      float_format, (aovs.value_len > 0) ? extent : int2(1), max_ii(1, aovs.value_len));
+
+  eGPUTextureFormat cryptomatte_format = GPU_R32F;
+  const int cryptomatte_layer_len = inst_.film.cryptomatte_layer_max_get();
+  if (cryptomatte_layer_len == 2) {
+    cryptomatte_format = GPU_RG32F;
+  }
+  else if (cryptomatte_layer_len == 3) {
+    cryptomatte_format = GPU_RGBA32F;
+  }
+  cryptomatte_tx.acquire(
+      pass_extent(static_cast<eViewLayerEEVEEPassType>(EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT |
+                                                       EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET |
+                                                       EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL)),
+      cryptomatte_format);
+}
+
+void RenderBuffers::release()
+{
+  depth_tx.release();
+  combined_tx.release();
+
+  normal_tx.release();
+  vector_tx.release();
+  diffuse_color_tx.release();
+  specular_color_tx.release();
+  volume_light_tx.release();
+  emission_tx.release();
+  environment_tx.release();
+  shadow_tx.release();
+  ambient_occlusion_tx.release();
+  cryptomatte_tx.release();
+}
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh
new file mode 100644
index 00000000000..ae5d7fbae5c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Render buffers are textures that are filled during a view rendering.
+ * Their content is then added to the accumulation buffers of the film class.
+ * They are short lived and can be reused when doing multi view rendering.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+class RenderBuffers {
+ public:
+  TextureFromPool depth_tx;
+  TextureFromPool combined_tx;
+
+  // TextureFromPool mist_tx; /* Derived from depth_tx during accumulation. */
+  TextureFromPool normal_tx;
+  TextureFromPool vector_tx;
+  TextureFromPool diffuse_color_tx;
+  TextureFromPool specular_color_tx;
+  TextureFromPool volume_light_tx;
+  TextureFromPool emission_tx;
+  TextureFromPool environment_tx;
+  TextureFromPool shadow_tx;
+  TextureFromPool ambient_occlusion_tx;
+  TextureFromPool cryptomatte_tx;
+  /* TODO(fclem): Use texture from pool once they support texture array. */
+  Texture light_tx;
+  Texture aov_color_tx;
+  Texture aov_value_tx;
+
+ private:
+  Instance &inst_;
+
+ public:
+  RenderBuffers(Instance &inst) : inst_(inst){};
+
+  /* Acquires (also ensures) the render buffer before rendering to them. */
+  void acquire(int2 extent);
+  void release();
+};
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.cc b/source/blender/draw/engines/eevee_next/eevee_sampling.cc
new file mode 100644
index 00000000000..76a0e98638b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_sampling.cc
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Random number generator, contains persistent state and sample count logic.
+ */
+
+#include "BLI_rand.h"
+
+#include "eevee_instance.hh"
+#include "eevee_sampling.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling
+ * \{ */
+
+void Sampling::init(const Scene *scene)
+{
+  sample_count_ = inst_.is_viewport() ? scene->eevee.taa_samples : scene->eevee.taa_render_samples;
+
+  if (sample_count_ == 0) {
+    BLI_assert(inst_.is_viewport());
+    sample_count_ = infinite_sample_count_;
+  }
+
+  motion_blur_steps_ = !inst_.is_viewport() ? scene->eevee.motion_blur_steps : 1;
+  sample_count_ = divide_ceil_u(sample_count_, motion_blur_steps_);
+
+  if (scene->eevee.flag & SCE_EEVEE_DOF_JITTER) {
+    if (sample_count_ == infinite_sample_count_) {
+      /* Special case for viewport continuous rendering. We clamp to a max sample
+       * to avoid the jittered dof never converging. */
+      dof_ring_count_ = 6;
+    }
+    else {
+      dof_ring_count_ = sampling_web_ring_count_get(dof_web_density_, sample_count_);
+    }
+    dof_sample_count_ = sampling_web_sample_count_get(dof_web_density_, dof_ring_count_);
+    /* Change total sample count to fill the web pattern entirely. */
+    sample_count_ = divide_ceil_u(sample_count_, dof_sample_count_) * dof_sample_count_;
+  }
+  else {
+    dof_ring_count_ = 0;
+    dof_sample_count_ = 1;
+  }
+
+  /* Only multiply after to have full the full DoF web pattern for each time steps. */
+  sample_count_ *= motion_blur_steps_;
+}
+
+void Sampling::end_sync()
+{
+  if (reset_) {
+    viewport_sample_ = 0;
+  }
+
+  if (inst_.is_viewport()) {
+
+    interactive_mode_ = viewport_sample_ < interactive_mode_threshold;
+
+    bool interactive_mode_disabled = (inst_.scene->eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0;
+    if (interactive_mode_disabled) {
+      interactive_mode_ = false;
+      sample_ = viewport_sample_;
+    }
+    else if (interactive_mode_) {
+      int interactive_sample_count = min_ii(interactive_sample_max_, sample_count_);
+
+      if (viewport_sample_ < interactive_sample_count) {
+        /* Loop over the same starting samples. */
+        sample_ = sample_ % interactive_sample_count;
+      }
+      else {
+        /* Break out of the loop and resume normal pattern. */
+        sample_ = interactive_sample_count;
+      }
+    }
+  }
+}
+
+void Sampling::step()
+{
+  {
+    /* TODO(fclem) we could use some persistent states to speedup the computation. */
+    double2 r, offset = {0, 0};
+    /* Using 2,3 primes as per UE4 Temporal AA presentation.
+     * http://advances.realtimerendering.com/s2014/epic/TemporalAA.pptx (slide 14) */
+    uint2 primes = {2, 3};
+    BLI_halton_2d(primes, offset, sample_ + 1, r);
+    /* WORKAROUND: We offset the distribution to make the first sample (0,0). This way, we are
+     * assured that at least one of the samples inside the TAA rotation will match the one from the
+     * draw manager. This makes sure overlays are correctly composited in static scene. */
+    data_.dimensions[SAMPLING_FILTER_U] = fractf(r[0] + (1.0 / 2.0));
+    data_.dimensions[SAMPLING_FILTER_V] = fractf(r[1] + (2.0 / 3.0));
+    /* TODO de-correlate. */
+    data_.dimensions[SAMPLING_TIME] = r[0];
+    data_.dimensions[SAMPLING_CLOSURE] = r[1];
+    data_.dimensions[SAMPLING_RAYTRACE_X] = r[0];
+  }
+  {
+    double2 r, offset = {0, 0};
+    uint2 primes = {5, 7};
+    BLI_halton_2d(primes, offset, sample_ + 1, r);
+    data_.dimensions[SAMPLING_LENS_U] = r[0];
+    data_.dimensions[SAMPLING_LENS_V] = r[1];
+    /* TODO de-correlate. */
+    data_.dimensions[SAMPLING_LIGHTPROBE] = r[0];
+    data_.dimensions[SAMPLING_TRANSPARENCY] = r[1];
+  }
+  {
+    /* Using leaped Halton sequence so we can reused the same primes as lens. */
+    double3 r, offset = {0, 0, 0};
+    uint64_t leap = 11;
+    uint3 primes = {5, 4, 7};
+    BLI_halton_3d(primes, offset, sample_ * leap, r);
+    data_.dimensions[SAMPLING_SHADOW_U] = r[0];
+    data_.dimensions[SAMPLING_SHADOW_V] = r[1];
+    data_.dimensions[SAMPLING_SHADOW_W] = r[2];
+    /* TODO de-correlate. */
+    data_.dimensions[SAMPLING_RAYTRACE_U] = r[0];
+    data_.dimensions[SAMPLING_RAYTRACE_V] = r[1];
+    data_.dimensions[SAMPLING_RAYTRACE_W] = r[2];
+  }
+  {
+    /* Using leaped Halton sequence so we can reused the same primes. */
+    double2 r, offset = {0, 0};
+    uint64_t leap = 5;
+    uint2 primes = {2, 3};
+    BLI_halton_2d(primes, offset, sample_ * leap, r);
+    data_.dimensions[SAMPLING_SHADOW_X] = r[0];
+    data_.dimensions[SAMPLING_SHADOW_Y] = r[1];
+    /* TODO de-correlate. */
+    data_.dimensions[SAMPLING_SSS_U] = r[0];
+    data_.dimensions[SAMPLING_SSS_V] = r[1];
+  }
+
+  data_.push_update();
+
+  viewport_sample_++;
+  sample_++;
+
+  reset_ = false;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling patterns
+ * \{ */
+
+float3 Sampling::sample_ball(const float3 &rand)
+{
+  float3 sample;
+  sample.z = rand.x * 2.0f - 1.0f; /* cos theta */
+
+  float r = sqrtf(fmaxf(0.0f, 1.0f - square_f(sample.z))); /* sin theta */
+
+  float omega = rand.y * 2.0f * M_PI;
+  sample.x = r * cosf(omega);
+  sample.y = r * sinf(omega);
+
+  sample *= sqrtf(sqrtf(rand.z));
+  return sample;
+}
+
+float2 Sampling::sample_disk(const float2 &rand)
+{
+  float omega = rand.y * 2.0f * M_PI;
+  return sqrtf(rand.x) * float2(cosf(omega), sinf(omega));
+}
+
+float2 Sampling::sample_spiral(const float2 &rand)
+{
+  /* Fibonacci spiral. */
+  float omega = 4.0f * M_PI * (1.0f + sqrtf(5.0f)) * rand.x;
+  float r = sqrtf(rand.x);
+  /* Random rotation. */
+  omega += rand.y * 2.0f * M_PI;
+  return r * float2(cosf(omega), sinf(omega));
+}
+
+void Sampling::dof_disk_sample_get(float *r_radius, float *r_theta) const
+{
+  if (dof_ring_count_ == 0) {
+    *r_radius = *r_theta = 0.0f;
+    return;
+  }
+
+  int s = sample_ - 1;
+  int ring = 0;
+  int ring_sample_count = 1;
+  int ring_sample = 1;
+
+  s = s * (dof_web_density_ - 1);
+  s = s % dof_sample_count_;
+
+  /* Choosing sample to we get faster convergence.
+   * The issue here is that we cannot map a low discrepancy sequence to this sampling pattern
+   * because the same sample could be chosen twice in relatively short intervals. */
+  /* For now just use an ascending sequence with an offset. This gives us relatively quick
+   * initial coverage and relatively high distance between samples. */
+  /* TODO(@fclem) We can try to order samples based on a LDS into a table to avoid duplicates.
+   * The drawback would be some memory consumption and initialize time. */
+  int samples_passed = 1;
+  while (s >= samples_passed) {
+    ring++;
+    ring_sample_count = ring * dof_web_density_;
+    ring_sample = s - samples_passed;
+    ring_sample = (ring_sample + 1) % ring_sample_count;
+    samples_passed += ring_sample_count;
+  }
+
+  *r_radius = ring / (float)dof_ring_count_;
+  *r_theta = 2.0f * M_PI * ring_sample / (float)ring_sample_count;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Cumulative Distribution Function (CDF)
+ * \{ */
+
+/* Creates a discrete cumulative distribution function table from a given curvemapping.
+ * Output cdf vector is expected to already be sized according to the wanted resolution. */
+void Sampling::cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf)
+{
+  BLI_assert(cdf.size() > 1);
+  cdf[0] = 0.0f;
+  /* Actual CDF evaluation. */
+  for (int u : IndexRange(cdf.size() - 1)) {
+    float x = (float)(u + 1) / (float)(cdf.size() - 1);
+    cdf[u + 1] = cdf[u] + BKE_curvemapping_evaluateF(&curve, 0, x);
+  }
+  /* Normalize the CDF. */
+  for (int u : cdf.index_range()) {
+    cdf[u] /= cdf.last();
+  }
+  /* Just to make sure. */
+  cdf.last() = 1.0f;
+}
+
+/* Inverts a cumulative distribution function.
+ * Output vector is expected to already be sized according to the wanted resolution. */
+void Sampling::cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf)
+{
+  for (int u : inverted_cdf.index_range()) {
+    float x = (float)u / (float)(inverted_cdf.size() - 1);
+    for (int i : cdf.index_range()) {
+      if (i == cdf.size() - 1) {
+        inverted_cdf[u] = 1.0f;
+      }
+      else if (cdf[i] >= x) {
+        float t = (x - cdf[i]) / (cdf[i + 1] - cdf[i]);
+        inverted_cdf[u] = ((float)i + t) / (float)(cdf.size() - 1);
+        break;
+      }
+    }
+  }
+}
+
+/** \} */
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.hh b/source/blender/draw/engines/eevee_next/eevee_sampling.hh
new file mode 100644
index 00000000000..c2bf23d20fc
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_sampling.hh
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Random number generator, contains persistent state and sample count logic.
+ */
+
+#pragma once
+
+#include "BKE_colortools.h"
+#include "BLI_system.h"
+#include "BLI_vector.hh"
+#include "DNA_scene_types.h"
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+class Sampling {
+ private:
+  Instance &inst_;
+
+  /* Number of samples in the first ring of jittered depth of field. */
+  static constexpr uint64_t dof_web_density_ = 6;
+  /* High number of sample for viewport infinite rendering. */
+  static constexpr uint64_t infinite_sample_count_ = 0xFFFFFFu;
+  /* During interactive rendering, loop over the first few samples. */
+  static constexpr uint64_t interactive_sample_max_ = 8;
+
+  /** 0 based current sample. Might not increase sequentially in viewport. */
+  uint64_t sample_ = 0;
+  /** Target sample count. */
+  uint64_t sample_count_ = 64;
+  /** Number of ring in the web pattern of the jittered Depth of Field. */
+  uint64_t dof_ring_count_ = 0;
+  /** Number of samples in the web pattern of the jittered Depth of Field. */
+  uint64_t dof_sample_count_ = 1;
+  /** Motion blur steps. */
+  uint64_t motion_blur_steps_ = 1;
+  /** Increases if the view and the scene is static. Does increase sequentially. */
+  int64_t viewport_sample_ = 0;
+  /** Tag to reset sampling for the next sample. */
+  bool reset_ = false;
+  /**
+   * Switch between interactive and static accumulation.
+   * In interactive mode, image stability is prioritized over quality.
+   */
+  bool interactive_mode_ = false;
+  /**
+   * Sample count after which we use the static accumulation.
+   * Interactive sampling from sample 0 to (interactive_mode_threshold - 1).
+   * Accumulation sampling from sample interactive_mode_threshold to sample_count_.
+   */
+  static constexpr int interactive_mode_threshold = 3;
+
+  SamplingDataBuf data_;
+
+ public:
+  Sampling(Instance &inst) : inst_(inst){};
+  ~Sampling(){};
+
+  void init(const Scene *scene);
+  void end_sync();
+  void step();
+
+  /* Viewport Only: Function to call to notify something in the scene changed.
+   * This will reset accumulation. Do not call after end_sync() or during sample rendering. */
+  void reset()
+  {
+    reset_ = true;
+  }
+
+  /* Viewport Only: true if an update happened in the scene and accumulation needs reset. */
+  bool is_reset() const
+  {
+    return reset_;
+  }
+
+  void bind_resources(DRWShadingGroup *grp)
+  {
+    DRW_shgroup_storage_block_ref(grp, "sampling_buf", &data_);
+  }
+
+  template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+  {
+    /* Storage Buf. */
+    pass->bind_ssbo(SAMPLING_BUF_SLOT, &data_);
+  }
+
+  /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
+  float rng_get(eSamplingDimension dimension) const
+  {
+    return data_.dimensions[dimension];
+  }
+
+  /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
+  float2 rng_2d_get(eSamplingDimension starting_dimension) const
+  {
+    return *reinterpret_cast<const float2 *>(&data_.dimensions[starting_dimension]);
+  }
+
+  /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
+  float3 rng_3d_get(eSamplingDimension starting_dimension) const
+  {
+    return *reinterpret_cast<const float3 *>(&data_.dimensions[starting_dimension]);
+  }
+
+  /* Returns true if rendering has finished. */
+  bool finished() const
+  {
+    return (sample_ >= sample_count_);
+  }
+
+  /* Returns true if viewport smoothing and sampling has finished. */
+  bool finished_viewport() const
+  {
+    return (viewport_sample_ >= sample_count_) && !interactive_mode_;
+  }
+
+  /* Returns true if viewport renderer is in interactive mode and should use TAA. */
+  bool interactive_mode() const
+  {
+    return interactive_mode_;
+  }
+
+  uint64_t sample_count() const
+  {
+    return sample_count_;
+  }
+
+  /* Return true if we are starting a new motion blur step. We need to run sync again since
+   * depsgraph was updated by MotionBlur::step(). */
+  bool do_render_sync() const
+  {
+    return ((sample_ % (sample_count_ / motion_blur_steps_)) == 0);
+  }
+
+  /**
+   * Special ball distribution:
+   * Point are distributed in a way that when they are orthogonally
+   * projected into any plane, the resulting distribution is (close to)
+   * a uniform disc distribution.
+   * \a rand is 3 random float in the [0..1] range.
+   * Returns point in a ball of radius 1 and centered on the origin.
+   */
+  static float3 sample_ball(const float3 &rand);
+
+  /**
+   * Uniform disc distribution.
+   * \a rand is 2 random float in the [0..1] range.
+   * Returns point in a disk of radius 1 and centered on the origin.
+   */
+  static float2 sample_disk(const float2 &rand);
+
+  /**
+   * Uniform disc distribution using Fibonacci spiral sampling.
+   * \a rand is 2 random float in the [0..1] range.
+   * Returns point in a disk of radius 1 and centered on the origin.
+   */
+  static float2 sample_spiral(const float2 &rand);
+
+  /**
+   * Special RNG for depth of field.
+   * Returns \a radius and \a theta angle offset to apply to the web sampling pattern.
+   */
+  void dof_disk_sample_get(float *r_radius, float *r_theta) const;
+
+  /**
+   * Returns sample count inside the jittered depth of field web pattern.
+   */
+  uint64_t dof_ring_count_get() const
+  {
+    return dof_ring_count_;
+  }
+
+  /**
+   * Returns sample count inside the jittered depth of field web pattern.
+   */
+  uint64_t dof_sample_count_get() const
+  {
+    return dof_sample_count_;
+  }
+
+  /* Cumulative Distribution Function Utils. */
+  static void cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf);
+  static void cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf);
+};
+
+}  // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc
index 09aa97e49e9..64b1d4891a9 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -9,6 +9,8 @@
  * and static shader usage.
  */
 
+#include "GPU_capabilities.h"
+
 #include "gpu_shader_create_info.hh"
 
 #include "eevee_shader.hh"
@@ -78,8 +80,68 @@ ShaderModule::~ShaderModule()
 const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type)
 {
   switch (shader_type) {
-    case VELOCITY_RESOLVE:
-      return "eevee_velocity_resolve";
+    case FILM_FRAG:
+      return "eevee_film_frag";
+    case FILM_COMP:
+      return "eevee_film_comp";
+    case FILM_CRYPTOMATTE_POST:
+      return "eevee_film_cryptomatte_post";
+    case HIZ_DEBUG:
+      return "eevee_hiz_debug";
+    case HIZ_UPDATE:
+      return "eevee_hiz_update";
+    case MOTION_BLUR_GATHER:
+      return "eevee_motion_blur_gather";
+    case MOTION_BLUR_TILE_DILATE:
+      return "eevee_motion_blur_tiles_dilate";
+    case MOTION_BLUR_TILE_FLATTEN_RENDER:
+      return "eevee_motion_blur_tiles_flatten_render";
+    case MOTION_BLUR_TILE_FLATTEN_VIEWPORT:
+      return "eevee_motion_blur_tiles_flatten_viewport";
+    case DOF_BOKEH_LUT:
+      return "eevee_depth_of_field_bokeh_lut";
+    case DOF_DOWNSAMPLE:
+      return "eevee_depth_of_field_downsample";
+    case DOF_FILTER:
+      return "eevee_depth_of_field_filter";
+    case DOF_GATHER_FOREGROUND_LUT:
+      return "eevee_depth_of_field_gather_foreground_lut";
+    case DOF_GATHER_FOREGROUND:
+      return "eevee_depth_of_field_gather_foreground_no_lut";
+    case DOF_GATHER_BACKGROUND_LUT:
+      return "eevee_depth_of_field_gather_background_lut";
+    case DOF_GATHER_BACKGROUND:
+      return "eevee_depth_of_field_gather_background_no_lut";
+    case DOF_GATHER_HOLE_FILL:
+      return "eevee_depth_of_field_hole_fill";
+    case DOF_REDUCE:
+      return "eevee_depth_of_field_reduce";
+    case DOF_RESOLVE:
+      return "eevee_depth_of_field_resolve_no_lut";
+    case DOF_RESOLVE_LUT:
+      return "eevee_depth_of_field_resolve_lut";
+    case DOF_SETUP:
+      return "eevee_depth_of_field_setup";
+    case DOF_SCATTER:
+      return "eevee_depth_of_field_scatter";
+    case DOF_STABILIZE:
+      return "eevee_depth_of_field_stabilize";
+    case DOF_TILES_DILATE_MINABS:
+      return "eevee_depth_of_field_tiles_dilate_minabs";
+    case DOF_TILES_DILATE_MINMAX:
+      return "eevee_depth_of_field_tiles_dilate_minmax";
+    case DOF_TILES_FLATTEN:
+      return "eevee_depth_of_field_tiles_flatten";
+    case LIGHT_CULLING_DEBUG:
+      return "eevee_light_culling_debug";
+    case LIGHT_CULLING_SELECT:
+      return "eevee_light_culling_select";
+    case LIGHT_CULLING_SORT:
+      return "eevee_light_culling_sort";
+    case LIGHT_CULLING_TILE:
+      return "eevee_light_culling_tile";
+    case LIGHT_CULLING_ZBIN:
+      return "eevee_light_culling_zbin";
     /* To avoid compiler warning about missing case. */
     case MAX_SHADER_TYPE:
       return "";
@@ -122,11 +184,41 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
   GPUCodegenOutput &codegen = *codegen_;
   ShaderCreateInfo &info = *reinterpret_cast<ShaderCreateInfo *>(codegen.create_info);
 
-  info.auto_resource_location(true);
+  /* WORKAROUND: Replace by new ob info. */
+  int64_t ob_info_index = info.additional_infos_.first_index_of_try("draw_object_infos");
+  if (ob_info_index != -1) {
+    info.additional_infos_[ob_info_index] = "draw_object_infos_new";
+  }
+
+  /* WORKAROUND: Add new ob attr buffer. */
+  if (GPU_material_uniform_attributes(gpumat) != nullptr) {
+    info.additional_info("draw_object_attribute_new");
+  }
+
+  /* WORKAROUND: Avoid utility texture merge error. TODO: find a cleaner fix. */
+  for (auto &resource : info.batch_resources_) {
+    if (resource.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
+      if (resource.slot == RBUFS_UTILITY_TEX_SLOT) {
+        resource.slot = GPU_max_textures_frag() - 1;
+      }
+    }
+  }
 
   if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
     info.define("MAT_TRANSPARENT");
+    /* Transparent material do not have any velocity specific pipeline. */
+    if (pipeline_type == MAT_PIPE_FORWARD_PREPASS_VELOCITY) {
+      pipeline_type = MAT_PIPE_FORWARD_PREPASS;
+    }
   }
+
+  if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT) == false &&
+      pipeline_type == MAT_PIPE_FORWARD) {
+    /* Opaque forward do support AOVs and render pass. */
+    info.additional_info("eevee_aov_out");
+    info.additional_info("eevee_render_pass_out");
+  }
+
   if (GPU_material_flag_get(gpumat, GPU_MATFLAG_BARYCENTRIC)) {
     switch (geometry_type) {
       case MAT_GEOM_MESH:
@@ -161,7 +253,6 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
         }
       }
       info.vertex_inputs_.clear();
-      info.additional_info("draw_curves_infos");
       break;
     case MAT_GEOM_WORLD:
       /**
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh
index 0f42e880a10..88538557c07 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh
@@ -26,7 +26,41 @@ namespace blender::eevee {
 
 /* Keep alphabetical order and clean prefix. */
 enum eShaderType {
-  VELOCITY_RESOLVE = 0,
+  FILM_FRAG = 0,
+  FILM_COMP,
+  FILM_CRYPTOMATTE_POST,
+
+  DOF_BOKEH_LUT,
+  DOF_DOWNSAMPLE,
+  DOF_FILTER,
+  DOF_GATHER_BACKGROUND_LUT,
+  DOF_GATHER_BACKGROUND,
+  DOF_GATHER_FOREGROUND_LUT,
+  DOF_GATHER_FOREGROUND,
+  DOF_GATHER_HOLE_FILL,
+  DOF_REDUCE,
+  DOF_RESOLVE_LUT,
+  DOF_RESOLVE,
+  DOF_SCATTER,
+  DOF_SETUP,
+  DOF_STABILIZE,
+  DOF_TILES_DILATE_MINABS,
+  DOF_TILES_DILATE_MINMAX,
+  DOF_TILES_FLATTEN,
+
+  HIZ_UPDATE,
+  HIZ_DEBUG,
+
+  LIGHT_CULLING_DEBUG,
+  LIGHT_CULLING_SELECT,
+  LIGHT_CULLING_SORT,
+  LIGHT_CULLING_TILE,
+  LIGHT_CULLING_ZBIN,
+
+  MOTION_BLUR_GATHER,
+  MOTION_BLUR_TILE_DILATE,
+  MOTION_BLUR_TILE_FLATTEN_RENDER,
+  MOTION_BLUR_TILE_FLATTEN_VIEWPORT,
 
   MAX_SHADER_TYPE,
 };
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index eb409f076f3..8e96445d6b9 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -12,22 +12,132 @@
 #  include "BLI_memory_utils.hh"
 #  include "DRW_gpu_wrapper.hh"
 
-// #  include "eevee_defines.hh"
+#  include "draw_manager.hh"
+#  include "draw_pass.hh"
+
+#  include "eevee_defines.hh"
 
 #  include "GPU_shader_shared.h"
 
 namespace blender::eevee {
 
-using draw::Framebuffer;
-using draw::SwapChain;
-using draw::Texture;
-using draw::TextureFromPool;
+using namespace draw;
+
+constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
+constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
 
 #endif
 
 #define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14
 
 /* -------------------------------------------------------------------- */
+/** \name Debug Mode
+ * \{ */
+
+/** These are just to make more sense of G.debug_value's values. Reserved range is 1-30. */
+enum eDebugMode : uint32_t {
+  DEBUG_NONE = 0u,
+  /**
+   * Gradient showing light evaluation hot-spots.
+   */
+  DEBUG_LIGHT_CULLING = 1u,
+  /**
+   * Show incorrectly downsample tiles in red.
+   */
+  DEBUG_HIZ_VALIDATION = 2u,
+  /**
+   * Tile-maps to screen. Is also present in other modes.
+   * - Black pixels, no pages allocated.
+   * - Green pixels, pages cached.
+   * - Red pixels, pages allocated.
+   */
+  DEBUG_SHADOW_TILEMAPS = 10u,
+  /**
+   * Random color per pages. Validates page density allocation and sampling.
+   */
+  DEBUG_SHADOW_PAGES = 11u,
+  /**
+   * Outputs random color per tile-map (or tile-map level). Validates tile-maps coverage.
+   * Black means not covered by any tile-maps LOD of the shadow.
+   */
+  DEBUG_SHADOW_LOD = 12u,
+  /**
+   * Outputs white pixels for pages allocated and black pixels for unused pages.
+   * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work.
+   */
+  DEBUG_SHADOW_PAGE_ALLOCATION = 13u,
+  /**
+   * Outputs the tile-map atlas. Default tile-map is too big for the usual screen resolution.
+   * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
+   */
+  DEBUG_SHADOW_TILE_ALLOCATION = 14u,
+  /**
+   * Visualize linear depth stored in the atlas regions of the active light.
+   * This way, one can check if the rendering, the copying and the shadow sampling functions works.
+   */
+  DEBUG_SHADOW_SHADOW_DEPTH = 15u
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling
+ * \{ */
+
+enum eSamplingDimension : uint32_t {
+  SAMPLING_FILTER_U = 0u,
+  SAMPLING_FILTER_V = 1u,
+  SAMPLING_LENS_U = 2u,
+  SAMPLING_LENS_V = 3u,
+  SAMPLING_TIME = 4u,
+  SAMPLING_SHADOW_U = 5u,
+  SAMPLING_SHADOW_V = 6u,
+  SAMPLING_SHADOW_W = 7u,
+  SAMPLING_SHADOW_X = 8u,
+  SAMPLING_SHADOW_Y = 9u,
+  SAMPLING_CLOSURE = 10u,
+  SAMPLING_LIGHTPROBE = 11u,
+  SAMPLING_TRANSPARENCY = 12u,
+  SAMPLING_SSS_U = 13u,
+  SAMPLING_SSS_V = 14u,
+  SAMPLING_RAYTRACE_U = 15u,
+  SAMPLING_RAYTRACE_V = 16u,
+  SAMPLING_RAYTRACE_W = 17u,
+  SAMPLING_RAYTRACE_X = 18u
+};
+
+/**
+ * IMPORTANT: Make sure the array can contain all sampling dimensions.
+ * Also note that it needs to be multiple of 4.
+ */
+#define SAMPLING_DIMENSION_COUNT 20
+
+/* NOTE(@fclem): Needs to be used in #StorageBuffer because of arrays of scalar. */
+struct SamplingData {
+  /** Array containing random values from Low Discrepancy Sequence in [0..1) range. */
+  float dimensions[SAMPLING_DIMENSION_COUNT];
+};
+BLI_STATIC_ASSERT_ALIGN(SamplingData, 16)
+
+/* Returns total sample count in a web pattern of the given size. */
+static inline int sampling_web_sample_count_get(int web_density, int ring_count)
+{
+  return ((ring_count * ring_count + ring_count) / 2) * web_density + 1;
+}
+
+/* Returns lowest possible ring count that contains at least sample_count samples. */
+static inline int sampling_web_ring_count_get(int web_density, int sample_count)
+{
+  /* Inversion of web_sample_count_get(). */
+  float x = 2.0f * (float(sample_count) - 1.0f) / float(web_density);
+  /* Solving polynomial. We only search positive solution. */
+  float discriminant = 1.0f + 4.0f * x;
+  return int(ceilf(0.5f * (sqrtf(discriminant) - 1.0f)));
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Camera
  * \{ */
 
@@ -65,15 +175,176 @@ struct CameraData {
   /** Clipping distances. */
   float clip_near;
   float clip_far;
-  /** Film pixel filter radius. */
-  float filter_size;
   eCameraType type;
+
+  bool1 initialized;
+
+#ifdef __cplusplus
+  /* Small constructor to allow detecting new buffers. */
+  CameraData() : initialized(false){};
+#endif
 };
 BLI_STATIC_ASSERT_ALIGN(CameraData, 16)
 
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
+#define FILM_PRECOMP_SAMPLE_MAX 16
+
+enum eFilmWeightLayerIndex : uint32_t {
+  FILM_WEIGHT_LAYER_ACCUMULATION = 0u,
+  FILM_WEIGHT_LAYER_DISTANCE = 1u,
+};
+
+enum ePassStorageType : uint32_t {
+  PASS_STORAGE_COLOR = 0u,
+  PASS_STORAGE_VALUE = 1u,
+  PASS_STORAGE_CRYPTOMATTE = 2u,
+};
+
+struct FilmSample {
+  int2 texel;
+  float weight;
+  /** Used for accumulation. */
+  float weight_sum_inv;
+};
+BLI_STATIC_ASSERT_ALIGN(FilmSample, 16)
+
+struct FilmData {
+  /** Size of the film in pixels. */
+  int2 extent;
+  /** Offset of the film in the full-res frame, in pixels. */
+  int2 offset;
+  /** Extent used by the render buffers when rendering the main views. */
+  int2 render_extent;
+  /** Sub-pixel offset applied to the window matrix.
+   * NOTE: In final film pixel unit.
+   * NOTE: Positive values makes the view translate in the negative axes direction.
+   * NOTE: The origin is the center of the lower left film pixel of the area covered by a render
+   * pixel if using scaled resolution rendering.
+   */
+  float2 subpixel_offset;
+  /** Scaling factor to convert texel to uvs. */
+  float2 extent_inv;
+  /** Is true if history is valid and can be sampled. Bypass history to resets accumulation. */
+  bool1 use_history;
+  /** Is true if combined buffer is valid and can be re-projected to reduce variance. */
+  bool1 use_reprojection;
+  /** Is true if accumulation of non-filtered passes is needed. */
+  bool1 has_data;
+  /** Is true if accumulation of filtered passes is needed. */
+  bool1 any_render_pass_1;
+  bool1 any_render_pass_2;
+  /** Controlled by user in lookdev mode or by render settings. */
+  float background_opacity;
+  float _pad0;
+  /** Output counts per type. */
+  int color_len, value_len;
+  /** Index in color_accum_img or value_accum_img of each pass. -1 if pass is not enabled. */
+  int mist_id;
+  int normal_id;
+  int vector_id;
+  int diffuse_light_id;
+  int diffuse_color_id;
+  int specular_light_id;
+  int specular_color_id;
+  int volume_light_id;
+  int emission_id;
+  int environment_id;
+  int shadow_id;
+  int ambient_occlusion_id;
+  /** Not indexed but still not -1 if enabled. */
+  int depth_id;
+  int combined_id;
+  /** Id of the render-pass to be displayed. -1 for combined. */
+  int display_id;
+  /** Storage type of the render-pass to be displayed. */
+  ePassStorageType display_storage_type;
+  /** True if we bypass the accumulation and directly output the accumulation buffer. */
+  bool1 display_only;
+  /** Start of AOVs and number of aov. */
+  int aov_color_id, aov_color_len;
+  int aov_value_id, aov_value_len;
+  /** Start of cryptomatte per layer (-1 if pass is not enabled). */
+  int cryptomatte_object_id;
+  int cryptomatte_asset_id;
+  int cryptomatte_material_id;
+  /** Max number of samples stored per layer (is even number). */
+  int cryptomatte_samples_len;
+  /** Settings to render mist pass */
+  float mist_scale, mist_bias, mist_exponent;
+  /** Scene exposure used for better noise reduction. */
+  float exposure_scale;
+  /** Scaling factor for scaled resolution rendering. */
+  int scaling_factor;
+  /** Film pixel filter radius. */
+  float filter_radius;
+  /** Precomputed samples. First in the table is the closest one. The rest is unordered. */
+  int samples_len;
+  /** Sum of the weights of all samples in the sample table. */
+  float samples_weight_total;
+  FilmSample samples[FILM_PRECOMP_SAMPLE_MAX];
+};
+BLI_STATIC_ASSERT_ALIGN(FilmData, 16)
+
+static inline float film_filter_weight(float filter_radius, float sample_distance_sqr)
+{
+#if 1 /* Faster */
+  /* Gaussian fitted to Blackman-Harris. */
+  float r = sample_distance_sqr / (filter_radius * filter_radius);
+  const float sigma = 0.284;
+  const float fac = -0.5 / (sigma * sigma);
+  float weight = expf(fac * r);
+#else
+  /* Blackman-Harris filter. */
+  float r = M_2PI * saturate(0.5 + sqrtf(sample_distance_sqr) / (2.0 * filter_radius));
+  float weight = 0.35875 - 0.48829 * cosf(r) + 0.14128 * cosf(2.0 * r) - 0.01168 * cosf(3.0 * r);
+#endif
+  return weight;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Render passes
+ * \{ */
+
+enum eRenderPassLayerIndex : uint32_t {
+  RENDER_PASS_LAYER_DIFFUSE_LIGHT = 0u,
+  RENDER_PASS_LAYER_SPECULAR_LIGHT = 1u,
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Arbitrary Output Variables
+ * \{ */
+
+/* Theoretical max is 128 as we are using texture array and VRAM usage.
+ * However, the output_aov() function perform a linear search inside all the hashes.
+ * If we find a way to avoid this we could bump this number up. */
+#define AOV_MAX 16
+
+/* NOTE(@fclem): Needs to be used in #StorageBuffer because of arrays of scalar. */
+struct AOVsInfoData {
+  uint hash_value[AOV_MAX];
+  uint hash_color[AOV_MAX];
+  /* Length of used data. */
+  uint color_len;
+  uint value_len;
+  /** Id of the AOV to be displayed (from the start of the AOV array). -1 for combined. */
+  int display_id;
+  /** True if the AOV to be displayed is from the value accum buffer. */
+  bool1 display_is_value;
+};
+BLI_STATIC_ASSERT_ALIGN(AOVsInfoData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name VelocityModule
  * \{ */
 
@@ -122,6 +393,272 @@ BLI_STATIC_ASSERT_ALIGN(VelocityGeometryIndex, 16)
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Motion Blur
+ * \{ */
+
+#define MOTION_BLUR_TILE_SIZE 32
+#define MOTION_BLUR_MAX_TILE 512 /* 16384 / MOTION_BLUR_TILE_SIZE */
+struct MotionBlurData {
+  /** As the name suggests. Used to avoid a division in the sampling. */
+  float2 target_size_inv;
+  /** Viewport motion scaling factor. Make blur relative to frame time not render time. */
+  float2 motion_scale;
+  /** Depth scaling factor. Avoid blurring background behind moving objects. */
+  float depth_scale;
+
+  float _pad0, _pad1, _pad2;
+};
+BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16)
+
+/* For some reasons some GLSL compilers do not like this struct.
+ * So we declare it as a uint array instead and do indexing ourselves. */
+#ifdef __cplusplus
+struct MotionBlurTileIndirection {
+  /**
+   * Stores indirection to the tile with the highest velocity covering each tile.
+   * This is stored using velocity in the MSB to be able to use atomicMax operations.
+   */
+  uint prev[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE];
+  uint next[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE];
+};
+BLI_STATIC_ASSERT_ALIGN(MotionBlurTileIndirection, 16)
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+/* 5% error threshold. */
+#define DOF_FAST_GATHER_COC_ERROR 0.05
+#define DOF_GATHER_RING_COUNT 5
+#define DOF_DILATE_RING_COUNT 3
+
+struct DepthOfFieldData {
+  /** Size of the render targets for gather & scatter passes. */
+  int2 extent;
+  /** Size of a pixel in uv space (1.0 / extent). */
+  float2 texel_size;
+  /** Scale factor for anisotropic bokeh. */
+  float2 bokeh_anisotropic_scale;
+  float2 bokeh_anisotropic_scale_inv;
+  /* Correction factor to align main target pixels with the filtered mipmap chain texture. */
+  float2 gather_uv_fac;
+  /** Scatter parameters. */
+  float scatter_coc_threshold;
+  float scatter_color_threshold;
+  float scatter_neighbor_max_color;
+  int scatter_sprite_per_row;
+  /** Number of side the bokeh shape has. */
+  float bokeh_blades;
+  /** Rotation of the bokeh shape. */
+  float bokeh_rotation;
+  /** Multiplier and bias to apply to linear depth to Circle of confusion (CoC). */
+  float coc_mul, coc_bias;
+  /** Maximum absolute allowed Circle of confusion (CoC). Min of computed max and user max. */
+  float coc_abs_max;
+  /** Copy of camera type. */
+  eCameraType camera_type;
+  /** Weights of spatial filtering in stabilize pass. Not array to avoid alignment restriction. */
+  float4 filter_samples_weight;
+  float filter_center_weight;
+  /** Max number of sprite in the scatter pass for each ground. */
+  int scatter_max_rect;
+
+  int _pad0, _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(DepthOfFieldData, 16)
+
+struct ScatterRect {
+  /** Color and CoC of the 4 pixels the scatter sprite represents. */
+  float4 color_and_coc[4];
+  /** Rect center position in half pixel space. */
+  float2 offset;
+  /** Rect half extent in half pixel space. */
+  float2 half_extent;
+};
+BLI_STATIC_ASSERT_ALIGN(ScatterRect, 16)
+
+/** WORKAROUND(@fclem): This is because this file is included before common_math_lib.glsl. */
+#ifndef M_PI
+#  define EEVEE_PI
+#  define M_PI 3.14159265358979323846 /* pi */
+#endif
+
+static inline float coc_radius_from_camera_depth(DepthOfFieldData dof, float depth)
+{
+  depth = (dof.camera_type != CAMERA_ORTHO) ? 1.0f / depth : depth;
+  return dof.coc_mul * depth + dof.coc_bias;
+}
+
+static inline float regular_polygon_side_length(float sides_count)
+{
+  return 2.0f * sinf(M_PI / sides_count);
+}
+
+/* Returns intersection ratio between the radius edge at theta and the regular polygon edge.
+ * Start first corners at theta == 0. */
+static inline float circle_to_polygon_radius(float sides_count, float theta)
+{
+  /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide
+   * 36). */
+  float side_angle = (2.0f * M_PI) / sides_count;
+  return cosf(side_angle * 0.5f) /
+         cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI)));
+}
+
+/* Remap input angle to have homogenous spacing of points along a polygon edge.
+ * Expects theta to be in [0..2pi] range. */
+static inline float circle_to_polygon_angle(float sides_count, float theta)
+{
+  float side_angle = (2.0f * M_PI) / sides_count;
+  float halfside_angle = side_angle * 0.5f;
+  float side = floorf(theta / side_angle);
+  /* Length of segment from center to the middle of polygon side. */
+  float adjacent = circle_to_polygon_radius(sides_count, 0.0f);
+
+  /* This is the relative position of the sample on the polygon half side. */
+  float local_theta = theta - side * side_angle;
+  float ratio = (local_theta - halfside_angle) / halfside_angle;
+
+  float halfside_len = regular_polygon_side_length(sides_count) * 0.5f;
+  float opposite = ratio * halfside_len;
+
+  /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */
+  float final_local_theta = atanf(opposite / adjacent);
+
+  return side * side_angle + final_local_theta;
+}
+
+#ifdef EEVEE_PI
+#  undef M_PI
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Light Culling
+ * \{ */
+
+/* Number of items we can cull. Limited by how we store CullingZBin. */
+#define CULLING_MAX_ITEM 65536
+/* Fine grained subdivision in the Z direction. Limited by the LDS in z-binning compute shader. */
+#define CULLING_ZBIN_COUNT 4096
+/* Max tile map resolution per axes. */
+#define CULLING_TILE_RES 16
+
+struct LightCullingData {
+  /** Scale applied to tile pixel coordinates to get target UV coordinate. */
+  float2 tile_to_uv_fac;
+  /** Scale and bias applied to linear Z to get zbin. */
+  float zbin_scale;
+  float zbin_bias;
+  /** Valid item count in the source data array. */
+  uint items_count;
+  /** Items that are processed by the 2.5D culling. */
+  uint local_lights_len;
+  /** Items that are **NOT** processed by the 2.5D culling (i.e: Sun Lights). */
+  uint sun_lights_len;
+  /** Number of items that passes the first culling test. */
+  uint visible_count;
+  /** Extent of one square tile in pixels. */
+  float tile_size;
+  /** Number of tiles on the X/Y axis. */
+  uint tile_x_len;
+  uint tile_y_len;
+  /** Number of word per tile. Depends on the maximum number of lights. */
+  uint tile_word_len;
+};
+BLI_STATIC_ASSERT_ALIGN(LightCullingData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Lights
+ * \{ */
+
+#define LIGHT_NO_SHADOW -1
+
+enum eLightType : uint32_t {
+  LIGHT_SUN = 0u,
+  LIGHT_POINT = 1u,
+  LIGHT_SPOT = 2u,
+  LIGHT_RECT = 3u,
+  LIGHT_ELLIPSE = 4u
+};
+
+static inline bool is_area_light(eLightType type)
+{
+  return type >= LIGHT_RECT;
+}
+
+struct LightData {
+  /** Normalized object matrix. Last column contains data accessible using the following macros. */
+  float4x4 object_mat;
+  /** Packed data in the last column of the object_mat. */
+#define _area_size_x object_mat[0][3]
+#define _area_size_y object_mat[1][3]
+#define _radius _area_size_x
+#define _spot_mul object_mat[2][3]
+#define _spot_bias object_mat[3][3]
+  /** Aliases for axes. */
+#ifndef USE_GPU_SHADER_CREATE_INFO
+#  define _right object_mat[0]
+#  define _up object_mat[1]
+#  define _back object_mat[2]
+#  define _position object_mat[3]
+#else
+#  define _right object_mat[0].xyz
+#  define _up object_mat[1].xyz
+#  define _back object_mat[2].xyz
+#  define _position object_mat[3].xyz
+#endif
+  /** Influence radius (inverted and squared) adjusted for Surface / Volume power. */
+  float influence_radius_invsqr_surface;
+  float influence_radius_invsqr_volume;
+  /** Maximum influence radius. Used for culling. */
+  float influence_radius_max;
+  /** Index of the shadow struct on CPU. -1 means no shadow. */
+  int shadow_id;
+  /** NOTE: It is ok to use float3 here. A float is declared right after it.
+   * float3 is also aligned to 16 bytes. */
+  float3 color;
+  /** Power depending on shader type. */
+  float diffuse_power;
+  float specular_power;
+  float volume_power;
+  float transmit_power;
+  /** Special radius factor for point lighting. */
+  float radius_squared;
+  /** Light Type. */
+  eLightType type;
+  /** Spot angle tangent. */
+  float spot_tan;
+  /** Spot size. Aligned to size of float2. */
+  float2 spot_size_inv;
+  /** Associated shadow data. Only valid if shadow_id is not LIGHT_NO_SHADOW. */
+  // ShadowData shadow_data;
+};
+BLI_STATIC_ASSERT_ALIGN(LightData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z Buffer
+ * \{ */
+
+struct HiZData {
+  /** Scale factor to remove HiZBuffer padding. */
+  float2 uv_scale;
+
+  float2 _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(HiZData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Ray-Tracing
  * \{ */
 
@@ -142,6 +679,34 @@ enum eClosureBits : uint32_t {
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Subsurface
+ * \{ */
+
+#define SSS_SAMPLE_MAX 64
+#define SSS_BURLEY_TRUNCATE 16.0
+#define SSS_BURLEY_TRUNCATE_CDF 0.9963790093708328
+#define SSS_TRANSMIT_LUT_SIZE 64.0
+#define SSS_TRANSMIT_LUT_RADIUS 1.218
+#define SSS_TRANSMIT_LUT_SCALE ((SSS_TRANSMIT_LUT_SIZE - 1.0) / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_BIAS (0.5 / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_STEP_RES 64.0
+
+struct SubsurfaceData {
+  /** xy: 2D sample position [-1..1], zw: sample_bounds. */
+  /* NOTE(fclem) Using float4 for alignment. */
+  float4 samples[SSS_SAMPLE_MAX];
+  /** Sample index after which samples are not randomly rotated anymore. */
+  int jitter_threshold;
+  /** Number of samples precomputed in the set. */
+  int sample_len;
+  int _pad0;
+  int _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Utility Texture
  * \{ */
 
@@ -178,10 +743,26 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer)
 
 #ifdef __cplusplus
 
+using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
 using CameraDataBuf = draw::UniformBuffer<CameraData>;
+using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
+using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
+using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
+using FilmDataBuf = draw::UniformBuffer<FilmData>;
+using HiZDataBuf = draw::UniformBuffer<HiZData>;
+using LightCullingDataBuf = draw::StorageBuffer<LightCullingData>;
+using LightCullingKeyBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingTileBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingZbinBuf = draw::StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
+using LightCullingZdistBuf = draw::StorageArrayBuffer<float, LIGHT_CHUNK, true>;
+using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>;
+using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>;
+using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>;
+using SamplingDataBuf = draw::StorageBuffer<SamplingData>;
+using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
 using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>;
 using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>;
-using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
+using CryptomatteObjectBuf = draw::StorageArrayBuffer<float2, 16>;
 
 }  // namespace blender::eevee
 #endif
diff --git a/source/blender/draw/engines/eevee_next/eevee_sync.cc b/source/blender/draw/engines/eevee_next/eevee_sync.cc
index 42af251d770..09ea7c9ec3d 100644
--- a/source/blender/draw/engines/eevee_next/eevee_sync.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_sync.cc
@@ -47,7 +47,7 @@ ObjectHandle &SyncModule::sync_object(Object *ob)
   const int recalc_flags = ID_RECALC_COPY_ON_WRITE | ID_RECALC_TRANSFORM | ID_RECALC_SHADING |
                            ID_RECALC_GEOMETRY;
   if ((eevee_dd.recalc & recalc_flags) != 0) {
-    // inst_.sampling.reset();
+    inst_.sampling.reset();
     UNUSED_VARS(inst_);
   }
 
@@ -63,7 +63,7 @@ WorldHandle &SyncModule::sync_world(::World *world)
 
   const int recalc_flags = ID_RECALC_ALL;
   if ((eevee_dd.recalc & recalc_flags) != 0) {
-    // inst_.sampling.reset();
+    inst_.sampling.reset();
   }
   return eevee_dd;
 }
@@ -74,25 +74,12 @@ WorldHandle &SyncModule::sync_world(::World *world)
 /** \name Common
  * \{ */
 
-static inline void shgroup_geometry_call(DRWShadingGroup *grp,
-                                         Object *ob,
-                                         GPUBatch *geom,
-                                         int v_first = -1,
-                                         int v_count = -1,
-                                         bool use_instancing = false)
+static inline void geometry_call(PassMain::Sub *sub_pass,
+                                 GPUBatch *geom,
+                                 ResourceHandle resource_handle)
 {
-  if (grp == nullptr) {
-    return;
-  }
-
-  if (v_first == -1) {
-    DRW_shgroup_call(grp, geom, ob);
-  }
-  else if (use_instancing) {
-    DRW_shgroup_call_instance_range(grp, ob, geom, v_first, v_count);
-  }
-  else {
-    DRW_shgroup_call_range(grp, ob, geom, v_first, v_count);
+  if (sub_pass != nullptr) {
+    sub_pass->draw(geom, resource_handle);
   }
 }
 
@@ -102,9 +89,13 @@ static inline void shgroup_geometry_call(DRWShadingGroup *grp,
 /** \name Mesh
  * \{ */
 
-void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle)
+void SyncModule::sync_mesh(Object *ob,
+                           ObjectHandle &ob_handle,
+                           ResourceHandle res_handle,
+                           const ObjectRef &ob_ref)
 {
-  bool has_motion = inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc);
+  bool has_motion = inst_.velocity.step_object_sync(
+      ob, ob_handle.object_key, res_handle, ob_handle.recalc);
 
   MaterialArray &material_array = inst_.materials.material_array_get(ob, has_motion);
 
@@ -123,14 +114,20 @@ void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle)
       continue;
     }
     Material *material = material_array.materials[i];
-    shgroup_geometry_call(material->shading.shgrp, ob, geom);
-    shgroup_geometry_call(material->prepass.shgrp, ob, geom);
-    shgroup_geometry_call(material->shadow.shgrp, ob, geom);
+    geometry_call(material->shading.sub_pass, geom, res_handle);
+    geometry_call(material->prepass.sub_pass, geom, res_handle);
+    geometry_call(material->shadow.sub_pass, geom, res_handle);
 
-    is_shadow_caster = is_shadow_caster || material->shadow.shgrp != nullptr;
+    is_shadow_caster = is_shadow_caster || material->shadow.sub_pass != nullptr;
     is_alpha_blend = is_alpha_blend || material->is_alpha_blend_transparent;
+
+    GPUMaterial *gpu_material = material_array.gpu_materials[i];
+    ::Material *mat = GPU_material_get_material(gpu_material);
+    inst_.cryptomatte.sync_material(mat);
   }
 
+  inst_.manager->extract_object_attributes(res_handle, ob_ref, material_array.gpu_materials);
+  inst_.cryptomatte.sync_object(ob, res_handle);
   // shadows.sync_object(ob, ob_handle, is_shadow_caster, is_alpha_blend);
 }
 
@@ -155,11 +152,13 @@ struct gpIterData {
   int vcount = 0;
   bool instancing = false;
 
-  gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle)
+  gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle, ResourceHandle resource_handle)
       : inst(inst_),
         ob(ob_),
         material_array(inst_.materials.material_array_get(
-            ob_, inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc)))
+            ob_,
+            inst_.velocity.step_object_sync(
+                ob, ob_handle.object_key, resource_handle, ob_handle.recalc)))
   {
     cfra = DEG_get_ctime(inst.depsgraph);
   };
@@ -167,26 +166,28 @@ struct gpIterData {
 
 static void gpencil_drawcall_flush(gpIterData &iter)
 {
+#if 0 /* Incompatible with new draw manager. */
   if (iter.geom != nullptr) {
-    shgroup_geometry_call(iter.material->shading.shgrp,
+    geometry_call(iter.material->shading.sub_pass,
                           iter.ob,
                           iter.geom,
                           iter.vfirst,
                           iter.vcount,
                           iter.instancing);
-    shgroup_geometry_call(iter.material->prepass.shgrp,
+    geometry_call(iter.material->prepass.sub_pass,
                           iter.ob,
                           iter.geom,
                           iter.vfirst,
                           iter.vcount,
                           iter.instancing);
-    shgroup_geometry_call(iter.material->shadow.shgrp,
+    geometry_call(iter.material->shadow.sub_pass,
                           iter.ob,
                           iter.geom,
                           iter.vfirst,
                           iter.vcount,
                           iter.instancing);
   }
+#endif
   iter.geom = nullptr;
   iter.vfirst = -1;
   iter.vcount = 0;
@@ -250,18 +251,22 @@ static void gpencil_stroke_sync(bGPDlayer *UNUSED(gpl),
   }
 }
 
-void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle)
+void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle)
 {
   /* TODO(fclem): Waiting for a user option to use the render engine instead of gpencil engine. */
-  return;
+  if (true) {
+    inst_.gpencil_engine_enabled = true;
+    return;
+  }
+  UNUSED_VARS(res_handle);
 
-  gpIterData iter(inst_, ob, ob_handle);
+  gpIterData iter(inst_, ob, ob_handle, res_handle);
 
   BKE_gpencil_visible_stroke_iter((bGPdata *)ob->data, nullptr, gpencil_stroke_sync, &iter);
 
   gpencil_drawcall_flush(iter);
 
-  // bool is_caster = true;      /* TODO material.shadow.shgrp. */
+  // bool is_caster = true;      /* TODO material.shadow.sub_pass. */
   // bool is_alpha_blend = true; /* TODO material.is_alpha_blend. */
   // shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend);
 }
@@ -277,14 +282,24 @@ static void shgroup_curves_call(MaterialPass &matpass,
                                 ParticleSystem *part_sys = nullptr,
                                 ModifierData *modifier_data = nullptr)
 {
-  if (matpass.shgrp == nullptr) {
+  UNUSED_VARS(ob, modifier_data);
+  if (matpass.sub_pass == nullptr) {
     return;
   }
-  DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.shgrp, matpass.gpumat);
+  if (part_sys != nullptr) {
+    // DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.sub_pass, matpass.gpumat);
+  }
+  else {
+    // DRW_shgroup_curves_create_sub(ob, matpass.sub_pass, matpass.gpumat);
+  }
 }
 
-void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data)
+void SyncModule::sync_curves(Object *ob,
+                             ObjectHandle &ob_handle,
+                             ResourceHandle res_handle,
+                             ModifierData *modifier_data)
 {
+  UNUSED_VARS(res_handle);
   int mat_nr = CURVES_MATERIAL_NR;
 
   ParticleSystem *part_sys = nullptr;
@@ -309,10 +324,16 @@ void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *
   shgroup_curves_call(material.prepass, ob, part_sys, modifier_data);
   shgroup_curves_call(material.shadow, ob, part_sys, modifier_data);
 
+  inst_.cryptomatte.sync_object(ob, res_handle);
+  GPUMaterial *gpu_material =
+      inst_.materials.material_array_get(ob, has_motion).gpu_materials[mat_nr - 1];
+  ::Material *mat = GPU_material_get_material(gpu_material);
+  inst_.cryptomatte.sync_material(mat);
+
   /* TODO(fclem) Hair velocity. */
   // shading_passes.velocity.gpencil_add(ob, ob_handle);
 
-  // bool is_caster = material.shadow.shgrp != nullptr;
+  // bool is_caster = material.shadow.sub_pass != nullptr;
   // bool is_alpha_blend = material.is_alpha_blend_transparent;
   // shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend);
 }
diff --git a/source/blender/draw/engines/eevee_next/eevee_sync.hh b/source/blender/draw/engines/eevee_next/eevee_sync.hh
index bd8147a2882..ab883ce44c2 100644
--- a/source/blender/draw/engines/eevee_next/eevee_sync.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_sync.hh
@@ -150,9 +150,15 @@ class SyncModule {
   ObjectHandle &sync_object(Object *ob);
   WorldHandle &sync_world(::World *world);
 
-  void sync_mesh(Object *ob, ObjectHandle &ob_handle);
-  void sync_gpencil(Object *ob, ObjectHandle &ob_handle);
-  void sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data = nullptr);
+  void sync_mesh(Object *ob,
+                 ObjectHandle &ob_handle,
+                 ResourceHandle res_handle,
+                 const ObjectRef &ob_ref);
+  void sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle);
+  void sync_curves(Object *ob,
+                   ObjectHandle &ob_handle,
+                   ResourceHandle res_handle,
+                   ModifierData *modifier_data = nullptr);
 };
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.cc b/source/blender/draw/engines/eevee_next/eevee_velocity.cc
index ceae9df44d0..7af311a8ccc 100644
--- a/source/blender/draw/engines/eevee_next/eevee_velocity.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_velocity.cc
@@ -9,10 +9,6 @@
  * temporal re-projection or motion blur.
  *
  * It is the module that tracks the objects between frames updates.
- *
- * #VelocityModule contains all motion steps data and logic.
- * #VelocityPass contains the resolve pass for static geometry.
- * #VelocityView is a per view instance that contain the velocity buffer.
  */
 
 #include "BKE_duplilist.h"
@@ -36,16 +32,21 @@ namespace blender::eevee {
 
 void VelocityModule::init()
 {
-#if 0 /* TODO renderpasses */
-  if (inst_.render && (inst_.render_passes.vector != nullptr)) {
-    /* No motion blur and the vector pass was requested. Do the step sync here. */
+  if (inst_.render && (inst_.film.enabled_passes_get() & EEVEE_RENDER_PASS_VECTOR) != 0) {
+    /* No motion blur and the vector pass was requested. Do the steps sync here. */
     const Scene *scene = inst_.scene;
     float initial_time = scene->r.cfra + scene->r.subframe;
     step_sync(STEP_PREVIOUS, initial_time - 1.0f);
     step_sync(STEP_NEXT, initial_time + 1.0f);
+
     inst_.set_time(initial_time);
+    step_ = STEP_CURRENT;
+    /* Let the main sync loop handle the current step. */
   }
-#endif
+
+  /* For viewport, only previous motion is supported.
+   * Still bind previous step to avoid undefined behavior. */
+  next_step_ = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
 }
 
 static void step_object_sync_render(void *velocity,
@@ -54,7 +55,9 @@ static void step_object_sync_render(void *velocity,
                                     Depsgraph *UNUSED(depsgraph))
 {
   ObjectKey object_key(ob);
-  reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key);
+  /* NOTE: Dummy resource handle since this will not be used for drawing. */
+  ResourceHandle resource_handle(0);
+  reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key, resource_handle);
 }
 
 void VelocityModule::step_sync(eVelocityStep step, float time)
@@ -70,10 +73,18 @@ void VelocityModule::step_camera_sync()
 {
   inst_.camera.sync();
   *camera_steps[step_] = inst_.camera.data_get();
+  step_time[step_] = inst_.scene->r.cfra + inst_.scene->r.subframe;
+  /* Fix undefined camera steps when rendering is starting. */
+  if ((step_ == STEP_CURRENT) && (camera_steps[STEP_PREVIOUS]->initialized == false)) {
+    *camera_steps[STEP_PREVIOUS] = *static_cast<CameraData *>(camera_steps[step_]);
+    camera_steps[STEP_PREVIOUS]->initialized = true;
+    step_time[STEP_PREVIOUS] = step_time[step_];
+  }
 }
 
 bool VelocityModule::step_object_sync(Object *ob,
                                       ObjectKey &object_key,
+                                      ResourceHandle resource_handle,
                                       int /*IDRecalcFlag*/ recalc)
 {
   bool has_motion = object_has_velocity(ob) || (recalc & ID_RECALC_TRANSFORM);
@@ -85,8 +96,6 @@ bool VelocityModule::step_object_sync(Object *ob,
     return false;
   }
 
-  uint32_t resource_id = DRW_object_resource_id_get(ob);
-
   /* Object motion. */
   /* FIXME(fclem) As we are using original objects pointers, there is a chance the previous
    * object key matches a totally different object if the scene was changed by user or python
@@ -95,7 +104,7 @@ bool VelocityModule::step_object_sync(Object *ob,
    * We live with that until we have a correct way of identifying new objects. */
   VelocityObjectData &vel = velocity_map.lookup_or_add_default(object_key);
   vel.obj.ofs[step_] = object_steps_usage[step_]++;
-  vel.obj.resource_id = resource_id;
+  vel.obj.resource_id = resource_handle.resource_index();
   vel.id = (ID *)ob->data;
   object_steps[step_]->get_or_resize(vel.obj.ofs[step_]) = ob->obmat;
   if (step_ == STEP_CURRENT) {
@@ -162,7 +171,7 @@ bool VelocityModule::step_object_sync(Object *ob,
   }
 
   /* TODO(@fclem): Reset sampling here? Should ultimately be covered by depsgraph update tags. */
-  // inst_.sampling.reset();
+  inst_.sampling.reset();
 
   return true;
 }
@@ -213,6 +222,7 @@ void VelocityModule::step_swap()
     SWAP(VelocityObjectBuf *, object_steps[step_a], object_steps[step_b]);
     SWAP(VelocityGeometryBuf *, geometry_steps[step_a], geometry_steps[step_b]);
     SWAP(CameraDataBuf *, camera_steps[step_a], camera_steps[step_b]);
+    SWAP(float, step_time[step_a], step_time[step_b]);
 
     for (VelocityObjectData &vel : velocity_map.values()) {
       vel.obj.ofs[step_a] = vel.obj.ofs[step_b];
@@ -239,10 +249,7 @@ void VelocityModule::step_swap()
 
 void VelocityModule::begin_sync()
 {
-  if (inst_.is_viewport()) {
-    /* Viewport always evaluate current step. */
-    step_ = STEP_CURRENT;
-  }
+  step_ = STEP_CURRENT;
   step_camera_sync();
   object_steps_usage[step_] = 0;
 }
@@ -255,7 +262,7 @@ void VelocityModule::end_sync()
   uint32_t max_resource_id_ = 0u;
 
   for (Map<ObjectKey, VelocityObjectData>::Item item : velocity_map.items()) {
-    if (item.value.obj.resource_id == (uint)-1) {
+    if (item.value.obj.resource_id == (uint32_t)-1) {
       deleted_obj.append(item.key);
     }
     else {
@@ -264,14 +271,18 @@ void VelocityModule::end_sync()
   }
 
   if (deleted_obj.size() > 0) {
-    // inst_.sampling.reset();
+    inst_.sampling.reset();
+  }
+
+  if (inst_.is_viewport() && camera_has_motion()) {
+    inst_.sampling.reset();
   }
 
-  for (auto key : deleted_obj) {
+  for (auto &key : deleted_obj) {
     velocity_map.remove(key);
   }
 
-  indirection_buf.resize(power_of_2_max_u(max_resource_id_ + 1));
+  indirection_buf.resize(ceil_to_multiple_u(max_resource_id_, 128));
 
   /* Avoid uploading more data to the GPU as well as an extra level of
    * indirection on the GPU by copying back offsets the to VelocityIndex. */
@@ -300,19 +311,6 @@ void VelocityModule::end_sync()
   camera_steps[STEP_CURRENT]->push_update();
   camera_steps[STEP_NEXT]->push_update();
   indirection_buf.push_update();
-
-  {
-    resolve_ps_ = DRW_pass_create("Velocity.Resolve", (DRWState)0);
-    GPUShader *sh = inst_.shaders.static_shader_get(VELOCITY_RESOLVE);
-    DRWShadingGroup *grp = DRW_shgroup_create(sh, resolve_ps_);
-    DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_);
-    DRW_shgroup_uniform_image_ref(grp, "velocity_view_img", &velocity_view_tx_);
-    DRW_shgroup_uniform_image_ref(grp, "velocity_camera_img", &velocity_camera_tx_);
-    DRW_shgroup_uniform_block(grp, "camera_prev", *camera_steps[STEP_PREVIOUS]);
-    DRW_shgroup_uniform_block(grp, "camera_curr", *camera_steps[STEP_CURRENT]);
-    DRW_shgroup_uniform_block(grp, "camera_next", *camera_steps[STEP_NEXT]);
-    DRW_shgroup_call_compute_ref(grp, resolve_dispatch_size_);
-  }
 }
 
 bool VelocityModule::object_has_velocity(const Object *ob)
@@ -359,60 +357,30 @@ void VelocityModule::bind_resources(DRWShadingGroup *grp)
   DRW_shgroup_storage_block_ref(grp, "velocity_indirection_buf", &indirection_buf);
 }
 
-/* Resolve pass for static geometry and to camera space projection. */
-void VelocityModule::resolve_camera_motion(GPUTexture *depth_tx,
-                                           GPUTexture *velocity_view_tx,
-                                           GPUTexture *velocity_camera_tx)
+bool VelocityModule::camera_has_motion() const
 {
-  input_depth_tx_ = depth_tx;
-  velocity_view_tx_ = velocity_view_tx;
-  velocity_camera_tx_ = velocity_camera_tx;
-
-  resolve_dispatch_size_.x = divide_ceil_u(GPU_texture_width(depth_tx), 8);
-  resolve_dispatch_size_.y = divide_ceil_u(GPU_texture_height(depth_tx), 8);
-
-  DRW_draw_pass(resolve_ps_);
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Velocity View
- * \{ */
-
-void VelocityView::sync()
-{
-  /* TODO: Remove. */
-  velocity_view_tx_.sync();
-  velocity_camera_tx_.sync();
-}
-
-void VelocityView::acquire(int2 extent)
-{
-  /* WORKAROUND: View name should be unique and static.
-   * With this, we can reuse the same texture across views. */
-  DrawEngineType *owner = (DrawEngineType *)view_name_.c_str();
-
-  /* Only RG16F when only doing only reprojection or motion blur. */
-  eGPUTextureFormat format = inst_.is_viewport() ? GPU_RG16F : GPU_RGBA16F;
-  velocity_view_tx_.acquire(extent, format, owner);
-  if (false /* TODO(fclem): Panoramic camera. */) {
-    velocity_camera_tx_.acquire(extent, format, owner);
-  }
-  else {
-    velocity_camera_tx_.acquire(int2(1), format, owner);
+  /* Only valid after sync. */
+  if (inst_.is_viewport()) {
+    /* Viewport has no next step. */
+    return *camera_steps[STEP_PREVIOUS] != *camera_steps[STEP_CURRENT];
   }
+  return *camera_steps[STEP_PREVIOUS] != *camera_steps[STEP_CURRENT] &&
+         *camera_steps[STEP_NEXT] != *camera_steps[STEP_CURRENT];
 }
 
-void VelocityView::resolve(GPUTexture *depth_tx)
+bool VelocityModule::camera_changed_projection() const
 {
-  inst_.velocity.resolve_camera_motion(depth_tx, velocity_view_tx_, velocity_camera_tx_);
+  /* Only valid after sync. */
+  if (inst_.is_viewport()) {
+    return camera_steps[STEP_PREVIOUS]->type != camera_steps[STEP_CURRENT]->type;
+  }
+  /* Cannot happen in render mode since we set the type during the init phase. */
+  return false;
 }
 
-void VelocityView::release()
+float VelocityModule::step_time_delta_get(eVelocityStep start, eVelocityStep end) const
 {
-  velocity_view_tx_.release();
-  velocity_camera_tx_.release();
+  return step_time[end] - step_time[start];
 }
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.hh b/source/blender/draw/engines/eevee_next/eevee_velocity.hh
index e2606c061e1..6f18b05d476 100644
--- a/source/blender/draw/engines/eevee_next/eevee_velocity.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_velocity.hh
@@ -27,8 +27,6 @@ namespace blender::eevee {
 
 /** Container for scene velocity data. */
 class VelocityModule {
-  friend class VelocityView;
-
  public:
   struct VelocityObjectData : public VelocityIndex {
     /** ID to retrieve the corresponding #VelocityGeometryData after copy. */
@@ -58,6 +56,8 @@ class VelocityModule {
   int3 object_steps_usage = int3(0);
   /** Buffer of all #VelocityIndex used in this frame. Indexed by draw manager resource id. */
   VelocityIndexBuf indirection_buf;
+  /** Frame time at which each steps were evaluated. */
+  float3 step_time;
 
   /**
    * Copies of camera data. One for previous and one for next time step.
@@ -67,16 +67,10 @@ class VelocityModule {
  private:
   Instance &inst_;
 
+  /** Step being synced. */
   eVelocityStep step_ = STEP_CURRENT;
-
-  DRWPass *resolve_ps_ = nullptr;
-
-  /** Reference only. Not owned. */
-  GPUTexture *input_depth_tx_;
-  GPUTexture *velocity_view_tx_;
-  GPUTexture *velocity_camera_tx_;
-
-  int3 resolve_dispatch_size_ = int3(1, 1, 1);
+  /** Step referenced as next step. */
+  eVelocityStep next_step_ = STEP_NEXT;
 
  public:
   VelocityModule(Instance &inst) : inst_(inst)
@@ -111,7 +105,10 @@ class VelocityModule {
   void step_sync(eVelocityStep step, float time);
 
   /* Gather motion data. Returns true if the object **can** have motion. */
-  bool step_object_sync(Object *ob, ObjectKey &object_key, int recalc = 0);
+  bool step_object_sync(Object *ob,
+                        ObjectKey &object_key,
+                        ResourceHandle resource_handle,
+                        int recalc = 0);
 
   /* Moves next frame data to previous frame data. Nullify next frame data. */
   void step_swap();
@@ -121,56 +118,29 @@ class VelocityModule {
 
   void bind_resources(DRWShadingGroup *grp);
 
- private:
-  bool object_has_velocity(const Object *ob);
-  bool object_is_deform(const Object *ob);
-
-  void resolve_camera_motion(GPUTexture *depth_tx,
-                             GPUTexture *velocity_view_tx,
-                             GPUTexture *velocity_camera_tx);
-};
+  template<typename T> void bind_resources(draw::detail::Pass<T> *pass)
+  {
+    /* Storage Buf. */
+    pass->bind_ssbo(VELOCITY_OBJ_PREV_BUF_SLOT, &(*object_steps[STEP_PREVIOUS]));
+    pass->bind_ssbo(VELOCITY_OBJ_NEXT_BUF_SLOT, &(*object_steps[next_step_]));
+    pass->bind_ssbo(VELOCITY_GEO_PREV_BUF_SLOT, &(*geometry_steps[STEP_PREVIOUS]));
+    pass->bind_ssbo(VELOCITY_GEO_NEXT_BUF_SLOT, &(*geometry_steps[next_step_]));
+    pass->bind_ssbo(VELOCITY_INDIRECTION_BUF_SLOT, &indirection_buf);
+    /* Uniform Buf. */
+    pass->bind_ubo(VELOCITY_CAMERA_PREV_BUF, &(*camera_steps[STEP_PREVIOUS]));
+    pass->bind_ubo(VELOCITY_CAMERA_CURR_BUF, &(*camera_steps[STEP_CURRENT]));
+    pass->bind_ubo(VELOCITY_CAMERA_NEXT_BUF, &(*camera_steps[next_step_]));
+  }
 
-/** \} */
+  bool camera_has_motion() const;
+  bool camera_changed_projection() const;
 
-/* -------------------------------------------------------------------- */
-/** \name Velocity
- *
- * \{ */
+  /* Returns frame time difference between two steps. */
+  float step_time_delta_get(eVelocityStep start, eVelocityStep end) const;
 
-/**
- * Per view module.
- */
-class VelocityView {
  private:
-  Instance &inst_;
-
-  StringRefNull view_name_;
-
-  TextureFromPool velocity_camera_tx_ = {"velocity_camera_tx_"};
-  TextureFromPool velocity_view_tx_ = {"velocity_view_tx_"};
-
- public:
-  VelocityView(Instance &inst, const char *name) : inst_(inst), view_name_(name){};
-  ~VelocityView(){};
-
-  void sync();
-
-  void acquire(int2 extent);
-  void release();
-
-  void resolve(GPUTexture *depth_tx);
-
-  /**
-   * Getters
-   **/
-  GPUTexture *view_vectors_get() const
-  {
-    return velocity_view_tx_;
-  }
-  GPUTexture *camera_vectors_get() const
-  {
-    return (velocity_camera_tx_.is_valid()) ? velocity_camera_tx_ : velocity_view_tx_;
-  }
+  bool object_has_velocity(const Object *ob);
+  bool object_is_deform(const Object *ob);
 };
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc
index e21342c5ef6..48951c2bae7 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@@ -34,17 +34,19 @@ void ShadingView::init()
   // mb_.init();
 }
 
-void ShadingView::sync(int2 render_extent_)
+void ShadingView::sync()
 {
+  int2 render_extent = inst_.film.render_extent_get();
+
   if (false /* inst_.camera.is_panoramic() */) {
-    int64_t render_pixel_count = render_extent_.x * (int64_t)render_extent_.y;
+    int64_t render_pixel_count = render_extent.x * (int64_t)render_extent.y;
     /* Divide pixel count between the 6 views. Rendering to a square target. */
     extent_[0] = extent_[1] = ceilf(sqrtf(1 + (render_pixel_count / 6)));
     /* TODO(@fclem): Clip unused views here. */
     is_enabled_ = true;
   }
   else {
-    extent_ = render_extent_;
+    extent_ = render_extent;
     /* Only enable -Z view. */
     is_enabled_ = (StringRefNull(name_) == "negZ_view");
   }
@@ -54,47 +56,34 @@ void ShadingView::sync(int2 render_extent_)
   }
 
   /* Create views. */
-  // const CameraData &data = inst_.camera.data_get();
+  const CameraData &cam = inst_.camera.data_get();
 
   float4x4 viewmat, winmat;
   const float(*viewmat_p)[4] = viewmat.ptr(), (*winmat_p)[4] = winmat.ptr();
-#if 0
   if (false /* inst_.camera.is_panoramic() */) {
     /* TODO(@fclem) Over-scans. */
     /* For now a mandatory 5% over-scan for DoF. */
-    float side = data.clip_near * 1.05f;
-    float near = data.clip_near;
-    float far = data.clip_far;
+    float side = cam.clip_near * 1.05f;
+    float near = cam.clip_near;
+    float far = cam.clip_far;
     perspective_m4(winmat.ptr(), -side, side, -side, side, near, far);
-    viewmat = face_matrix_ * data.viewmat;
+    viewmat = face_matrix_ * cam.viewmat;
   }
   else {
-    viewmat_p = data.viewmat.ptr();
-    winmat_p = data.winmat.ptr();
+    viewmat_p = cam.viewmat.ptr();
+    winmat_p = cam.winmat.ptr();
   }
-#else
-  /* TEMP */
-  UNUSED_VARS(face_matrix_);
-  const DRWView *default_view = DRW_view_default_get();
-  DRW_view_winmat_get(default_view, winmat.ptr(), false);
-  DRW_view_viewmat_get(default_view, viewmat.ptr(), false);
-#endif
 
   main_view_ = DRW_view_create(viewmat_p, winmat_p, nullptr, nullptr, nullptr);
   sub_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
   render_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
 
   // dof_.sync(winmat_p, extent_);
-  // mb_.sync(extent_);
-  velocity_.sync();
   // rt_buffer_opaque_.sync(extent_);
   // rt_buffer_refract_.sync(extent_);
   // inst_.hiz_back.view_sync(extent_);
   // inst_.hiz_front.view_sync(extent_);
   // inst_.gbuffer.view_sync(extent_);
-
-  combined_tx_.sync();
-  postfx_tx_.sync();
 }
 
 void ShadingView::render()
@@ -103,29 +92,25 @@ void ShadingView::render()
     return;
   }
 
-  /* Query temp textures and create framebuffers. */
-  /* HACK: View name should be unique and static.
-   * With this, we can reuse the same texture across views. */
-  DrawEngineType *owner = (DrawEngineType *)name_;
-
-  DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
-
-  depth_tx_.ensure_2d(GPU_DEPTH24_STENCIL8, extent_);
-  combined_tx_.acquire(extent_, GPU_RGBA16F, owner);
-  velocity_.acquire(extent_);
-  // combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), GPU_ATTACHMENT_TEXTURE(combined_tx_));
-  // prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_),
-  //                    GPU_ATTACHMENT_TEXTURE(velocity_.view_vectors_get()));
-  combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(dtxl->depth), GPU_ATTACHMENT_TEXTURE(dtxl->color));
-  prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(dtxl->depth),
-                     GPU_ATTACHMENT_TEXTURE(velocity_.view_vectors_get()));
+  /* Query temp textures and create frame-buffers. */
+  RenderBuffers &rbufs = inst_.render_buffers;
+  rbufs.acquire(extent_);
+  combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx),
+                      GPU_ATTACHMENT_TEXTURE(rbufs.combined_tx));
+  prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx),
+                     GPU_ATTACHMENT_TEXTURE(rbufs.vector_tx));
 
   update_view();
 
+  inst_.hiz_buffer.set_dirty();
+
   DRW_stats_group_start(name_);
-  // DRW_view_set_active(render_view_);
+  DRW_view_set_active(render_view_);
+
+  /* If camera has any motion, compute motion vector in the film pass. Otherwise, we avoid float
+   * precision issue by setting the motion of all static geometry to 0. */
+  float4 clear_velocity = float4(inst_.velocity.camera_has_motion() ? VELOCITY_INVALID : 0.0f);
 
-  float4 clear_velocity(VELOCITY_INVALID);
   GPU_framebuffer_bind(prepass_fb_);
   GPU_framebuffer_clear_color(prepass_fb_, clear_velocity);
   /* Alpha stores transmittance. So start at 1. */
@@ -133,7 +118,10 @@ void ShadingView::render()
   GPU_framebuffer_bind(combined_fb_);
   GPU_framebuffer_clear_color_depth(combined_fb_, clear_color, 1.0f);
 
-  inst_.pipelines.world.render();
+  inst_.pipelines.world.render(render_view_new_);
+
+  /* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */
+  inst_.lights.set_view(render_view_new_, extent_);
 
   // inst_.pipelines.deferred.render(
   //     render_view_, rt_buffer_opaque_, rt_buffer_refract_, depth_tx_, combined_tx_);
@@ -142,52 +130,36 @@ void ShadingView::render()
 
   // inst_.lookdev.render_overlay(view_fb_);
 
-  inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_, depth_tx_, combined_tx_);
+  inst_.pipelines.forward.render(render_view_new_, prepass_fb_, combined_fb_, rbufs.combined_tx);
 
-  // inst_.lights.debug_draw(view_fb_);
-  // inst_.shadows.debug_draw(view_fb_);
+  inst_.lights.debug_draw(render_view_new_, combined_fb_);
+  inst_.hiz_buffer.debug_draw(render_view_new_, combined_fb_);
 
-  // velocity_.resolve(depth_tx_);
-  velocity_.resolve(dtxl->depth);
+  GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
 
-  // if (inst_.render_passes.vector) {
-  //   inst_.render_passes.vector->accumulate(velocity_.camera_vectors_get(), sub_view_);
-  // }
+  inst_.film.accumulate(sub_view_, combined_final_tx);
 
-  // GPUTexture *final_radiance_tx = render_post(combined_tx_);
+  // inst_.shadows.debug_draw();
 
-  // if (inst_.render_passes.combined) {
-  //   inst_.render_passes.combined->accumulate(final_radiance_tx, sub_view_);
-  // }
-
-  // if (inst_.render_passes.depth) {
-  //   inst_.render_passes.depth->accumulate(depth_tx_, sub_view_);
-  // }
+  rbufs.release();
+  postfx_tx_.release();
 
   DRW_stats_group_end();
-
-  combined_tx_.release();
-  postfx_tx_.release();
-  velocity_.release();
 }
 
-GPUTexture *ShadingView::render_post(GPUTexture *input_tx)
+GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
 {
-#if 0
-  if (!dof_.postfx_enabled() && !mb_.enabled()) {
+  if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) {
     return input_tx;
   }
-  /* HACK: View name should be unique and static.
-   * With this, we can reuse the same texture across views. */
-  postfx_tx_.acquire(extent_, GPU_RGBA16F, (void *)name_);
+  postfx_tx_.acquire(extent_, GPU_RGBA16F);
 
-  GPUTexture *velocity_tx = velocity_.view_vectors_get();
   GPUTexture *output_tx = postfx_tx_;
 
   /* Swapping is done internally. Actual output is set to the next input. */
-  dof_.render(depth_tx_, &input_tx, &output_tx);
-  mb_.render(depth_tx_, velocity_tx, &input_tx, &output_tx);
-#endif
+  inst_.depth_of_field.render(render_view_new_, &input_tx, &output_tx, dof_buffer_);
+  inst_.motion_blur.render(render_view_new_, &input_tx, &output_tx);
+
   return input_tx;
 }
 
@@ -197,20 +169,25 @@ void ShadingView::update_view()
   DRW_view_viewmat_get(main_view_, viewmat.ptr(), false);
   DRW_view_winmat_get(main_view_, winmat.ptr(), false);
 
+  /* TODO(fclem): Mixed-resolution rendering: We need to make sure we render with exactly the same
+   * distances between pixels to line up render samples and target pixels.
+   * So if the target resolution is not a multiple of the resolution divisor, we need to make the
+   * projection window bigger in the +X and +Y directions. */
+
   /* Anti-Aliasing / Super-Sampling jitter. */
-  // float jitter_u = 2.0f * (inst_.sampling.rng_get(SAMPLING_FILTER_U) - 0.5f) / extent_[0];
-  // float jitter_v = 2.0f * (inst_.sampling.rng_get(SAMPLING_FILTER_V) - 0.5f) / extent_[1];
+  float2 jitter = inst_.film.pixel_jitter_get() / float2(extent_);
+  /* Transform to NDC space. */
+  jitter *= 2.0f;
 
-  // window_translate_m4(winmat.ptr(), winmat.ptr(), jitter_u, jitter_v);
+  window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter));
   DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr());
 
-  /* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop
+  /* FIXME(fclem): The offset may be noticeably large and the culling might make object pop
    * out of the blurring radius. To fix this, use custom enlarged culling matrix. */
-  // dof_.jitter_apply(winmat, viewmat);
+  inst_.depth_of_field.jitter_apply(winmat, viewmat);
   DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
 
-  // inst_.lightprobes.set_view(render_view_, extent_);
-  // inst_.lights.set_view(render_view_, extent_, !inst_.use_scene_lights());
+  render_view_new_.sync(viewmat, winmat);
 }
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.hh b/source/blender/draw/engines/eevee_next/eevee_view.hh
index fb74412f557..74e513357cd 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_view.hh
@@ -41,19 +41,13 @@ class ShadingView {
   /** Matrix to apply to the viewmat. */
   const float (*face_matrix_)[4];
 
-  /** Post-FX modules. */
-  // DepthOfField dof_;
-  // MotionBlur mb_;
-  VelocityView velocity_;
-
   /** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */
   // RaytraceBuffer rt_buffer_opaque_;
   // RaytraceBuffer rt_buffer_refract_;
+  DepthOfFieldBuffer dof_buffer_;
 
   Framebuffer prepass_fb_;
   Framebuffer combined_fb_;
-  Texture depth_tx_;
-  TextureFromPool combined_tx_;
   TextureFromPool postfx_tx_;
 
   /** Main views is created from the camera (or is from the viewport). It is not jittered. */
@@ -63,6 +57,7 @@ class ShadingView {
   DRWView *sub_view_ = nullptr;
   /** Same as sub_view_ but has Depth Of Field jitter applied. */
   DRWView *render_view_ = nullptr;
+  View render_view_new_;
 
   /** Render size of the view. Can change between scene sample eval. */
   int2 extent_ = {-1, -1};
@@ -71,17 +66,17 @@ class ShadingView {
 
  public:
   ShadingView(Instance &inst, const char *name, const float (*face_matrix)[4])
-      : inst_(inst), name_(name), face_matrix_(face_matrix), velocity_(inst, name){};
+      : inst_(inst), name_(name), face_matrix_(face_matrix), render_view_new_(name){};
 
   ~ShadingView(){};
 
   void init();
 
-  void sync(int2 render_extent_);
+  void sync();
 
   void render();
 
-  GPUTexture *render_post(GPUTexture *input_tx);
+  GPUTexture *render_postfx(GPUTexture *input_tx);
 
  private:
   void update_view();
@@ -94,7 +89,7 @@ class ShadingView {
  *
  * Container for all views needed to render the final image.
  * We might need up to 6 views for panoramic cameras.
- * All views are always available but only enabled for if need.
+ * All views are always available but only enabled for if needed.
  * \{ */
 
 class MainView {
@@ -109,8 +104,6 @@ class MainView {
   ShadingView shading_views_4;
   ShadingView shading_views_5;
 #define shading_views_ (&shading_views_0)
-  /** Internal render size. */
-  int render_extent_[2];
 
  public:
   MainView(Instance &inst)
@@ -123,15 +116,8 @@ class MainView {
   {
   }
 
-  void init(const int2 full_extent_)
+  void init()
   {
-    /* TODO(fclem) parameter hidden in experimental. We need to figure out mipmap bias to preserve
-     * texture crispiness. */
-    float resolution_scale = 1.0f;
-    for (int i = 0; i < 2; i++) {
-      render_extent_[i] = max_ii(1, roundf(full_extent_[i] * resolution_scale));
-    }
-
     for (auto i : IndexRange(6)) {
       shading_views_[i].init();
     }
@@ -140,7 +126,7 @@ class MainView {
   void sync()
   {
     for (auto i : IndexRange(6)) {
-      shading_views_[i].sync(render_extent_);
+      shading_views_[i].sync();
     }
   }
 
diff --git a/source/blender/draw/engines/eevee_next/eevee_world.cc b/source/blender/draw/engines/eevee_next/eevee_world.cc
index b9cb24fe30a..313c0bda42e 100644
--- a/source/blender/draw/engines/eevee_next/eevee_world.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_world.cc
@@ -42,10 +42,10 @@ DefaultWorldNodeTree::~DefaultWorldNodeTree()
   MEM_SAFE_FREE(ntree_);
 }
 
-/* Configure a default nodetree with the given world.  */
+/* Configure a default node-tree with the given world. */
 bNodeTree *DefaultWorldNodeTree::nodetree_get(::World *wo)
 {
-  /* WARNING: This function is not threadsafe. Which is not a problem for the moment. */
+  /* WARNING: This function is not thread-safe. Which is not a problem for the moment. */
   copy_v3_fl3(color_socket_->value, wo->horr, wo->horg, wo->horb);
   return ntree_;
 }
@@ -79,7 +79,7 @@ void World::sync()
   /* TODO(fclem) This should be detected to scene level. */
   ::World *orig_world = (::World *)DEG_get_original_id(&bl_world->id);
   if (assign_if_different(prev_original_world, orig_world)) {
-    // inst_.sampling.reset();
+    inst_.sampling.reset();
   }
 
   bNodeTree *ntree = (bl_world->nodetree && bl_world->use_nodes) ?
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl
index a65bb7decb6..6fe5fa01fa3 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl
@@ -3,6 +3,8 @@
 #pragma BLENDER_REQUIRE(common_math_lib.glsl)
 #pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
 
+#define EEVEE_ATTRIBUTE_LIB
+
 #if defined(MAT_GEOM_MESH)
 
 /* -------------------------------------------------------------------- */
@@ -131,7 +133,7 @@ int g_curves_attr_id = 0;
 int curves_attribute_element_id()
 {
   int id = interp.curves_strand_id;
-  if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) {
+  if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) {
 #  ifdef COMMON_HAIR_LIB
     id = hair_get_base_id();
 #  endif
@@ -282,43 +284,3 @@ vec3 attr_load_uv(vec3 attr)
 /** \} */
 
 #endif
-
-/* -------------------------------------------------------------------- */
-/** \name Volume Attribute post
- *
- * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on
- * the engine side. But as of now, the engines are responsible for loading the attributes.
- *
- * \{ */
-
-#if defined(MAT_GEOM_VOLUME)
-
-float attr_load_temperature_post(float attr)
-{
-  /* Bring the into standard range without having to modify the grid values */
-  attr = (attr > 0.01) ? (attr * drw_volume.temperature_mul + drw_volume.temperature_bias) : 0.0;
-  return attr;
-}
-vec4 attr_load_color_post(vec4 attr)
-{
-  /* Density is premultiplied for interpolation, divide it out here. */
-  attr.rgb *= safe_rcp(attr.a);
-  attr.rgb *= drw_volume.color_mul.rgb;
-  attr.a = 1.0;
-  return attr;
-}
-
-#else /* Noop for any other surface. */
-
-float attr_load_temperature_post(float attr)
-{
-  return attr;
-}
-vec4 attr_load_color_post(vec4 attr)
-{
-  return attr;
-}
-
-#endif
-
-/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl
index f79e9102d76..2611f714b59 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl
@@ -143,24 +143,10 @@ vec2 camera_uv_from_view(CameraData cam, vec3 vV)
   }
 }
 
-vec2 camera_uv_from_world(CameraData cam, vec3 V)
+vec2 camera_uv_from_world(CameraData cam, vec3 P)
 {
-  vec3 vV = transform_point(cam.viewmat, V);
-  switch (cam.type) {
-    default:
-    case CAMERA_ORTHO:
-      return camera_uv_from_view(cam.persmat, false, V);
-    case CAMERA_PERSP:
-      return camera_uv_from_view(cam.persmat, true, V);
-    case CAMERA_PANO_EQUIRECT:
-      return camera_equirectangular_from_direction(cam, vV);
-    case CAMERA_PANO_EQUISOLID:
-      /* ATTR_FALLTHROUGH; */
-    case CAMERA_PANO_EQUIDISTANT:
-      return camera_fisheye_from_direction(cam, vV);
-    case CAMERA_PANO_MIRROR:
-      return camera_mirror_ball_from_direction(cam, vV);
-  }
+  vec3 vV = transform_direction(cam.viewmat, normalize(P));
+  return camera_uv_from_view(cam, vV);
 }
 
 /** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
new file mode 100644
index 00000000000..d5fdaae6fc1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
@@ -0,0 +1,37 @@
+
+/* -------------------------------------------------------------------- */
+/** \name YCoCg
+ * \{ */
+
+vec3 colorspace_YCoCg_from_scene_linear(vec3 rgb_color)
+{
+  const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1),     /* Y */
+                                            vec3(2, 0, -2),    /* Co */
+                                            vec3(-1, 2, -1))); /* Cg */
+  return colorspace_tx * rgb_color;
+}
+
+vec4 colorspace_YCoCg_from_scene_linear(vec4 rgba_color)
+{
+  return vec4(colorspace_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
+}
+
+vec3 colorspace_scene_linear_from_YCoCg(vec3 ycocg_color)
+{
+  float Y = ycocg_color.x;
+  float Co = ycocg_color.y;
+  float Cg = ycocg_color.z;
+
+  vec3 rgb_color;
+  rgb_color.r = Y + Co - Cg;
+  rgb_color.g = Y + Cg;
+  rgb_color.b = Y - Co - Cg;
+  return rgb_color * 0.25;
+}
+
+vec4 colorspace_scene_linear_from_YCoCg(vec4 ycocg_color)
+{
+  return vec4(colorspace_scene_linear_from_YCoCg(ycocg_color.rgb), ycocg_color.a);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl
new file mode 100644
index 00000000000..e874a6b56ea
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl
@@ -0,0 +1,70 @@
+/** Storing/merging and sorting cryptomatte samples. */
+
+bool cryptomatte_can_merge_sample(vec2 dst, vec2 src)
+{
+  if (dst == vec2(0.0, 0.0)) {
+    return true;
+  }
+  if (dst.x == src.x) {
+    return true;
+  }
+  return false;
+}
+
+vec2 cryptomatte_merge_sample(vec2 dst, vec2 src)
+{
+  return vec2(src.x, dst.y + src.y);
+}
+
+vec4 cryptomatte_false_color(float hash)
+{
+  uint m3hash = floatBitsToUint(hash);
+  return vec4(hash,
+              float(m3hash << 8) / float(0xFFFFFFFFu),
+              float(m3hash << 16) / float(0xFFFFFFFFu),
+              1.0);
+}
+
+void cryptomatte_clear_samples(FilmSample dst)
+{
+  int layer_len = imageSize(cryptomatte_img).z;
+  for (int i = 0; i < layer_len; i++) {
+    imageStore(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
+  }
+}
+
+void cryptomatte_store_film_sample(FilmSample dst,
+                                   int cryptomatte_layer_id,
+                                   vec2 crypto_sample,
+                                   out vec4 out_color)
+{
+  if (crypto_sample.y == 0.0) {
+    return;
+  }
+  for (int i = 0; i < film_buf.cryptomatte_samples_len / 2; i++) {
+    ivec3 img_co = ivec3(dst.texel, cryptomatte_layer_id + i);
+    vec4 sample_pair = imageLoad(cryptomatte_img, img_co);
+    if (cryptomatte_can_merge_sample(sample_pair.xy, crypto_sample)) {
+      sample_pair.xy = cryptomatte_merge_sample(sample_pair.xy, crypto_sample);
+      /* In viewport only one layer is active. */
+      /* TODO(jbakker):  we are displaying the first sample, but we should display the highest
+       * weighted one. */
+      if (cryptomatte_layer_id + i == 0) {
+        out_color = cryptomatte_false_color(sample_pair.x);
+      }
+    }
+    else if (cryptomatte_can_merge_sample(sample_pair.zw, crypto_sample)) {
+      sample_pair.zw = cryptomatte_merge_sample(sample_pair.zw, crypto_sample);
+    }
+    else if (i == film_buf.cryptomatte_samples_len / 2 - 1) {
+      /* TODO(jbakker): New hash detected, but there is no space left to store it. Currently we
+       * will ignore this sample, but ideally we could replace a sample with a lowest weight. */
+      continue;
+    }
+    else {
+      continue;
+    }
+    imageStore(cryptomatte_img, img_co, sample_pair);
+    break;
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
new file mode 100644
index 00000000000..99a47c541e9
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
@@ -0,0 +1,680 @@
+
+/**
+ * Depth of Field Gather accumulator.
+ * We currently have only 2 which are very similar.
+ * One is for the halfres gather passes and the other one for slight in focus regions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Options.
+ * \{ */
+
+/* Quality options */
+#ifdef DOF_HOLEFILL_PASS
+/* No need for very high density for hole_fill. */
+const int gather_ring_count = 3;
+const int gather_ring_density = 3;
+const int gather_max_density_change = 0;
+const int gather_density_change_ring = 1;
+#else
+const int gather_ring_count = DOF_GATHER_RING_COUNT;
+const int gather_ring_density = 3;
+const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */
+const int gather_density_change_ring = 1;
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Constants.
+ * \{ */
+
+const float unit_ring_radius = 1.0 / float(gather_ring_count);
+const float unit_sample_radius = 1.0 / float(gather_ring_count + 0.5);
+const float large_kernel_radius = 0.5 + float(gather_ring_count);
+const float smaller_kernel_radius = 0.5 + float(gather_ring_count - gather_density_change_ring);
+/* NOTE(fclem) the bias is reducing issues with density change visible transition. */
+const float radius_downscale_factor = smaller_kernel_radius / large_kernel_radius;
+const int change_density_at_ring = (gather_ring_count - gather_density_change_ring + 1);
+const float coc_radius_error = 2.0;
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gather common.
+ * \{ */
+
+struct DofGatherData {
+  vec4 color;
+  float weight;
+  float dist; /* TODO remove */
+  /* For scatter occlusion. */
+  float coc;
+  float coc_sqr;
+  /* For ring bucket merging. */
+  float transparency;
+
+  float layer_opacity;
+};
+
+#define GATHER_DATA_INIT DofGatherData(vec4(0.0), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+/* Intersection with the center of the kernel. */
+float dof_intersection_weight(float coc, float distance_from_center, float intersection_multiplier)
+{
+  if (no_smooth_intersection) {
+    return step(0.0, (abs(coc) - distance_from_center));
+  }
+  else {
+    /* (Slide 64). */
+    return saturate((abs(coc) - distance_from_center) * intersection_multiplier + 0.5);
+  }
+}
+
+/* Returns weight of the sample for the outer bucket (containing previous
+ * rings). */
+float dof_gather_accum_weight(float coc, float bordering_radius, bool first_ring)
+{
+  /* First ring has nothing to be mixed against. */
+  if (first_ring) {
+    return 0.0;
+  }
+  return saturate(coc - bordering_radius);
+}
+
+void dof_gather_ammend_weight(inout DofGatherData sample_data, float weight)
+{
+  sample_data.color *= weight;
+  sample_data.coc *= weight;
+  sample_data.coc_sqr *= weight;
+  sample_data.weight *= weight;
+}
+
+void dof_gather_accumulate_sample(DofGatherData sample_data,
+                                  float weight,
+                                  inout DofGatherData accum_data)
+{
+  accum_data.color += sample_data.color * weight;
+  accum_data.coc += sample_data.coc * weight;
+  accum_data.coc_sqr += sample_data.coc * (sample_data.coc * weight);
+  accum_data.weight += weight;
+}
+
+void dof_gather_accumulate_sample_pair(DofGatherData pair_data[2],
+                                       float bordering_radius,
+                                       float intersection_multiplier,
+                                       bool first_ring,
+                                       const bool do_fast_gather,
+                                       const bool is_foreground,
+                                       inout DofGatherData ring_data,
+                                       inout DofGatherData accum_data)
+{
+  if (do_fast_gather) {
+    for (int i = 0; i < 2; i++) {
+      dof_gather_accumulate_sample(pair_data[i], 1.0, accum_data);
+      accum_data.layer_opacity += 1.0;
+    }
+    return;
+  }
+
+#if 0
+  const float mirroring_threshold = -dof_layer_threshold - dof_layer_offset;
+  /* TODO(fclem) Promote to parameter? dither with Noise? */
+  const float mirroring_min_distance = 15.0;
+  if (pair_data[0].coc < mirroring_threshold &&
+      (pair_data[1].coc - mirroring_min_distance) > pair_data[0].coc) {
+    pair_data[1].coc = pair_data[0].coc;
+  }
+  else if (pair_data[1].coc < mirroring_threshold &&
+           (pair_data[0].coc - mirroring_min_distance) > pair_data[1].coc) {
+    pair_data[0].coc = pair_data[1].coc;
+  }
+#endif
+
+  for (int i = 0; i < 2; i++) {
+    float sample_weight = dof_sample_weight(pair_data[i].coc);
+    float layer_weight = dof_layer_weight(pair_data[i].coc, is_foreground);
+    float inter_weight = dof_intersection_weight(
+        pair_data[i].coc, pair_data[i].dist, intersection_multiplier);
+    float weight = inter_weight * layer_weight * sample_weight;
+
+    /**
+     * If a CoC is larger than bordering radius we accumulate it to the general accumulator.
+     * If not, we accumulate to the ring bucket. This is to have more consistent sample occlusion.
+     **/
+    float accum_weight = dof_gather_accum_weight(pair_data[i].coc, bordering_radius, first_ring);
+    dof_gather_accumulate_sample(pair_data[i], weight * accum_weight, accum_data);
+    dof_gather_accumulate_sample(pair_data[i], weight * (1.0 - accum_weight), ring_data);
+
+    accum_data.layer_opacity += layer_weight;
+
+    if (is_foreground) {
+      ring_data.transparency += 1.0 - inter_weight * layer_weight;
+    }
+    else {
+      float coc = is_foreground ? -pair_data[i].coc : pair_data[i].coc;
+      ring_data.transparency += saturate(coc - bordering_radius);
+    }
+  }
+}
+
+void dof_gather_accumulate_sample_ring(DofGatherData ring_data,
+                                       int sample_count,
+                                       bool first_ring,
+                                       const bool do_fast_gather,
+                                       /* accum_data occludes the ring_data if true. */
+                                       const bool reversed_occlusion,
+                                       inout DofGatherData accum_data)
+{
+  if (do_fast_gather) {
+    /* Do nothing as ring_data contains nothing. All samples are already in
+     * accum_data. */
+    return;
+  }
+
+  if (first_ring) {
+    /* Layer opacity is directly accumulated into accum_data data. */
+    accum_data.color = ring_data.color;
+    accum_data.coc = ring_data.coc;
+    accum_data.coc_sqr = ring_data.coc_sqr;
+    accum_data.weight = ring_data.weight;
+
+    accum_data.transparency = ring_data.transparency / float(sample_count);
+    return;
+  }
+
+  if (ring_data.weight == 0.0) {
+    return;
+  }
+
+  float ring_avg_coc = ring_data.coc / ring_data.weight;
+  float accum_avg_coc = accum_data.coc / accum_data.weight;
+
+  /* Smooth test to set opacity to see if the ring average coc occludes the
+   * accumulation. Test is reversed to be multiplied against opacity. */
+  float ring_occlu = saturate(accum_avg_coc - ring_avg_coc);
+  /* The bias here is arbitrary. Seems to avoid weird looking foreground in most
+   * cases. We might need to make it a parameter or find a relative bias. */
+  float accum_occlu = saturate((ring_avg_coc - accum_avg_coc) * 0.1 - 1.0);
+
+  if (is_resolve) {
+    ring_occlu = accum_occlu = 0.0;
+  }
+
+  if (no_gather_occlusion) {
+    ring_occlu = 0.0;
+    accum_occlu = 0.0;
+  }
+
+  /* (Slide 40) */
+  float ring_opacity = saturate(1.0 - ring_data.transparency / float(sample_count));
+  float accum_opacity = 1.0 - accum_data.transparency;
+
+  if (reversed_occlusion) {
+    /* Accum_data occludes the ring. */
+    float alpha = (accum_data.weight == 0.0) ? 0.0 : accum_opacity * accum_occlu;
+    float one_minus_alpha = 1.0 - alpha;
+
+    accum_data.color += ring_data.color * one_minus_alpha;
+    accum_data.coc += ring_data.coc * one_minus_alpha;
+    accum_data.coc_sqr += ring_data.coc_sqr * one_minus_alpha;
+    accum_data.weight += ring_data.weight * one_minus_alpha;
+
+    accum_data.transparency *= 1.0 - ring_opacity;
+  }
+  else {
+    /* Ring occludes the accum_data (Same as reference). */
+    float alpha = (accum_data.weight == 0.0) ? 1.0 : (ring_opacity * ring_occlu);
+    float one_minus_alpha = 1.0 - alpha;
+
+    accum_data.color = accum_data.color * one_minus_alpha + ring_data.color;
+    accum_data.coc = accum_data.coc * one_minus_alpha + ring_data.coc;
+    accum_data.coc_sqr = accum_data.coc_sqr * one_minus_alpha + ring_data.coc_sqr;
+    accum_data.weight = accum_data.weight * one_minus_alpha + ring_data.weight;
+  }
+}
+
+/* FIXME(fclem) Seems to be wrong since it needs ringcount+1 as input for
+ * slightfocus gather. */
+/* This should be replaced by web_sample_count_get() but doing so is breaking other things. */
+int dof_gather_total_sample_count(const int ring_count, const int ring_density)
+{
+  return (ring_count * ring_count - ring_count) * ring_density + 1;
+}
+
+void dof_gather_accumulate_center_sample(DofGatherData center_data,
+                                         float bordering_radius,
+                                         int i_radius,
+                                         const bool do_fast_gather,
+                                         const bool is_foreground,
+                                         const bool is_resolve,
+                                         inout DofGatherData accum_data)
+{
+  float layer_weight = dof_layer_weight(center_data.coc, is_foreground);
+  float sample_weight = dof_sample_weight(center_data.coc);
+  float weight = layer_weight * sample_weight;
+  float accum_weight = dof_gather_accum_weight(center_data.coc, bordering_radius, false);
+
+  if (do_fast_gather) {
+    /* Hope for the compiler to optimize the above. */
+    layer_weight = 1.0;
+    sample_weight = 1.0;
+    accum_weight = 1.0;
+    weight = 1.0;
+  }
+
+  center_data.transparency = 1.0 - weight;
+
+  dof_gather_accumulate_sample(center_data, weight * accum_weight, accum_data);
+
+  if (!do_fast_gather) {
+    if (is_resolve) {
+      /* NOTE(fclem): Hack to smooth transition to full in-focus opacity. */
+      int total_sample_count = dof_gather_total_sample_count(i_radius + 1,
+                                                             DOF_SLIGHT_FOCUS_DENSITY);
+      float fac = saturate(1.0 - abs(center_data.coc) / float(dof_layer_threshold));
+      accum_data.layer_opacity += float(total_sample_count) * fac * fac;
+    }
+    accum_data.layer_opacity += layer_weight;
+
+    /* Logic of dof_gather_accumulate_sample(). */
+    weight *= (1.0 - accum_weight);
+    center_data.coc_sqr = center_data.coc * (center_data.coc * weight);
+    center_data.color *= weight;
+    center_data.coc *= weight;
+    center_data.weight = weight;
+
+    if (is_foreground && !is_resolve) {
+      /* Reduce issue with closer foreground over distant foreground. */
+      float ring_area = sqr(bordering_radius);
+      dof_gather_ammend_weight(center_data, ring_area);
+    }
+
+    /* Accumulate center as its own ring. */
+    dof_gather_accumulate_sample_ring(
+        center_data, 1, false, do_fast_gather, is_foreground, accum_data);
+  }
+}
+
+int dof_gather_total_sample_count_with_density_change(const int ring_count,
+                                                      const int ring_density,
+                                                      int density_change)
+{
+  int sample_count_per_density_change = dof_gather_total_sample_count(ring_count, ring_density) -
+                                        dof_gather_total_sample_count(
+                                            ring_count - gather_density_change_ring, ring_density);
+
+  return dof_gather_total_sample_count(ring_count, ring_density) +
+         sample_count_per_density_change * density_change;
+}
+
+void dof_gather_accumulate_resolve(int total_sample_count,
+                                   DofGatherData accum_data,
+                                   out vec4 out_col,
+                                   out float out_weight,
+                                   out vec2 out_occlusion)
+{
+  float weight_inv = safe_rcp(accum_data.weight);
+  out_col = accum_data.color * weight_inv;
+  out_occlusion = vec2(abs(accum_data.coc), accum_data.coc_sqr) * weight_inv;
+
+  if (is_foreground) {
+    out_weight = 1.0 - accum_data.transparency;
+  }
+  else if (accum_data.weight > 0.0) {
+    out_weight = accum_data.layer_opacity / float(total_sample_count);
+  }
+  else {
+    out_weight = 0.0;
+  }
+  /* Gathering may not accumulate to 1.0 alpha because of float precision. */
+  if (out_weight > 0.99) {
+    out_weight = 1.0;
+  }
+  else if (out_weight < 0.01) {
+    out_weight = 0.0;
+  }
+  /* Same thing for alpha channel. */
+  if (out_col.a > 0.993) {
+    out_col.a = 1.0;
+  }
+  else if (out_col.a < 0.003) {
+    out_col.a = 0.0;
+  }
+}
+
+float dof_load_gather_coc(sampler2D gather_input_coc_tx, vec2 uv, float lod)
+{
+  float coc = textureLod(gather_input_coc_tx, uv, lod).r;
+  /* We gather at halfres. CoC must be divided by 2 to be compared against radii. */
+  return coc * 0.5;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Common Gather accumulator.
+ * \{ */
+
+/* Radii needs to be halfres CoC sizes. */
+bool dof_do_density_change(float base_radius, float min_intersectable_radius)
+{
+  /* Reduce artifact for very large blur. */
+  min_intersectable_radius *= 0.1;
+
+  bool need_new_density = (base_radius * unit_ring_radius > min_intersectable_radius);
+  bool larger_than_min_density = (base_radius * radius_downscale_factor >
+                                  float(gather_ring_count));
+
+  return need_new_density && larger_than_min_density;
+}
+
+void dof_gather_init(float base_radius,
+                     vec2 noise,
+                     out vec2 center_co,
+                     out float lod,
+                     out float intersection_multiplier)
+{
+  /* Jitter center half a ring to reduce undersampling. */
+  vec2 jitter_ofs = 0.499 * sample_disk(noise);
+  if (DOF_BOKEH_TEXTURE) {
+    jitter_ofs *= dof_buf.bokeh_anisotropic_scale;
+  }
+  vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
+  center_co = frag_coord + jitter_ofs * base_radius * unit_sample_radius;
+
+  /* TODO(fclem) Seems like the default lod selection is too big. Bias to avoid blocky moving out
+   * of focus shapes. */
+  const float lod_bias = -2.0;
+  lod = max(floor(log2(base_radius * unit_sample_radius) + 0.5) + lod_bias, 0.0);
+
+  if (no_gather_mipmaps) {
+    lod = 0.0;
+  }
+  /* (Slide 64). */
+  intersection_multiplier = pow(0.5, lod);
+}
+
+void dof_gather_accumulator(sampler2D color_tx,
+                            sampler2D color_bilinear_tx,
+                            sampler2D coc_tx,
+                            sampler2D bkh_lut_tx, /* Renamed because of ugly macro. */
+                            float base_radius,
+                            float min_intersectable_radius,
+                            const bool do_fast_gather,
+                            const bool do_density_change,
+                            out vec4 out_color,
+                            out float out_weight,
+                            out vec2 out_occlusion)
+{
+  vec2 frag_coord = vec2(gl_GlobalInvocationID.xy);
+  vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U);
+  vec2 noise = no_gather_random ? vec2(0.0, 0.0) :
+                                  vec2(interlieved_gradient_noise(frag_coord, 0, noise_offset.x),
+                                       interlieved_gradient_noise(frag_coord, 1, noise_offset.y));
+
+  if (!do_fast_gather) {
+    /* Jitter the radius to reduce noticeable density changes. */
+    base_radius += noise.x * unit_ring_radius * base_radius;
+  }
+  else {
+    /* Jittering the radius more than we need means we are going to feather the bokeh shape half a
+     * ring. So we need to compensate for fast gather that does not check CoC intersection. */
+    base_radius += (0.5 - noise.x) * 1.5 * unit_ring_radius * base_radius;
+  }
+  /* TODO(fclem) another seed? For now Cranly-Partterson rotation with golden ratio. */
+  noise.x = fract(noise.x * 6.1803398875);
+
+  float lod, isect_mul;
+  vec2 center_co;
+  dof_gather_init(base_radius, noise, center_co, lod, isect_mul);
+
+  bool first_ring = true;
+
+  DofGatherData accum_data = GATHER_DATA_INIT;
+
+  int density_change = 0;
+  for (int ring = gather_ring_count; ring > 0; ring--) {
+    int sample_pair_count = gather_ring_density * ring;
+
+    float step_rot = M_PI / float(sample_pair_count);
+    mat2 step_rot_mat = rot2_from_angle(step_rot);
+
+    float angle_offset = noise.y * step_rot;
+    vec2 offset = vec2(cos(angle_offset), sin(angle_offset));
+
+    float ring_radius = float(ring) * unit_sample_radius * base_radius;
+
+    /* Slide 38. */
+    float bordering_radius = ring_radius +
+                             (0.5 + coc_radius_error) * base_radius * unit_sample_radius;
+    DofGatherData ring_data = GATHER_DATA_INIT;
+    for (int sample_pair = 0; sample_pair < sample_pair_count; sample_pair++) {
+      offset = step_rot_mat * offset;
+
+      DofGatherData pair_data[2];
+      for (int i = 0; i < 2; i++) {
+        vec2 offset_co = ((i == 0) ? offset : -offset);
+        if (DOF_BOKEH_TEXTURE) {
+          /* Scaling to 0.25 for speed. Improves texture cache hit. */
+          offset_co = texture(bkh_lut_tx, offset_co * 0.25 + 0.5).rg;
+          offset_co *= (is_foreground) ? -dof_buf.bokeh_anisotropic_scale :
+                                         dof_buf.bokeh_anisotropic_scale;
+        }
+        vec2 sample_co = center_co + offset_co * ring_radius;
+        vec2 sample_uv = sample_co * dof_buf.gather_uv_fac;
+        if (do_fast_gather) {
+          pair_data[i].color = textureLod(color_bilinear_tx, sample_uv, lod);
+        }
+        else {
+          pair_data[i].color = textureLod(color_tx, sample_uv, lod);
+        }
+        pair_data[i].coc = dof_load_gather_coc(coc_tx, sample_uv, lod);
+        pair_data[i].dist = ring_radius;
+      }
+
+      dof_gather_accumulate_sample_pair(pair_data,
+                                        bordering_radius,
+                                        isect_mul,
+                                        first_ring,
+                                        do_fast_gather,
+                                        is_foreground,
+                                        ring_data,
+                                        accum_data);
+    }
+
+    if (is_foreground) {
+      /* Reduce issue with closer foreground over distant foreground. */
+      /* TODO(fclem) this seems to not be completely correct as the issue remains. */
+      float ring_area = (sqr(float(ring) + 0.5 + coc_radius_error) -
+                         sqr(float(ring) - 0.5 + coc_radius_error)) *
+                        sqr(base_radius * unit_sample_radius);
+      dof_gather_ammend_weight(ring_data, ring_area);
+    }
+
+    dof_gather_accumulate_sample_ring(
+        ring_data, sample_pair_count * 2, first_ring, do_fast_gather, is_foreground, accum_data);
+
+    first_ring = false;
+
+    if (do_density_change && (ring == change_density_at_ring) &&
+        (density_change < gather_max_density_change)) {
+      if (dof_do_density_change(base_radius, min_intersectable_radius)) {
+        base_radius *= radius_downscale_factor;
+        ring += gather_density_change_ring;
+        /* We need to account for the density change in the weights (slide 62).
+         * For that multiply old kernel data by its area divided by the new kernel area. */
+        const float outer_rings_weight = 1.0 / (radius_downscale_factor * radius_downscale_factor);
+        /* Samples are already weighted per ring in foreground pass. */
+        if (!is_foreground) {
+          dof_gather_ammend_weight(accum_data, outer_rings_weight);
+        }
+        /* Re-init kernel position & sampling parameters. */
+        dof_gather_init(base_radius, noise, center_co, lod, isect_mul);
+        density_change++;
+      }
+    }
+  }
+
+  {
+    /* Center sample. */
+    vec2 sample_uv = center_co * dof_buf.gather_uv_fac;
+    DofGatherData center_data;
+    if (do_fast_gather) {
+      center_data.color = textureLod(color_bilinear_tx, sample_uv, lod);
+    }
+    else {
+      center_data.color = textureLod(color_tx, sample_uv, lod);
+    }
+    center_data.coc = dof_load_gather_coc(coc_tx, sample_uv, lod);
+    center_data.dist = 0.0;
+
+    /* Slide 38. */
+    float bordering_radius = (0.5 + coc_radius_error) * base_radius * unit_sample_radius;
+
+    dof_gather_accumulate_center_sample(
+        center_data, bordering_radius, 0, do_fast_gather, is_foreground, false, accum_data);
+  }
+
+  int total_sample_count = dof_gather_total_sample_count_with_density_change(
+      gather_ring_count, gather_ring_density, density_change);
+  dof_gather_accumulate_resolve(
+      total_sample_count, accum_data, out_color, out_weight, out_occlusion);
+
+  if (debug_gather_perf && density_change > 0) {
+    float fac = saturate(float(density_change) / float(10.0));
+    out_color.rgb = avg(out_color.rgb) * neon_gradient(fac);
+  }
+  if (debug_gather_perf && do_fast_gather) {
+    out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0);
+  }
+  if (debug_scatter_perf) {
+    out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0);
+  }
+
+  /* Output premultiplied color so we can use bilinear sampler in resolve pass. */
+  out_color *= out_weight;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Slight focus accumulator.
+ *
+ * The full pixel neighborhood is gathered.
+ * \{ */
+
+void dof_slight_focus_gather(sampler2D depth_tx,
+                             sampler2D color_tx,
+                             sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
+                             float radius,
+                             out vec4 out_color,
+                             out float out_weight,
+                             out float out_center_coc)
+{
+  vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
+  vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U);
+  vec2 noise = no_gather_random ? vec2(0.0) :
+                                  vec2(interlieved_gradient_noise(frag_coord, 3, noise_offset.x),
+                                       interlieved_gradient_noise(frag_coord, 5, noise_offset.y));
+
+  DofGatherData fg_accum = GATHER_DATA_INIT;
+  DofGatherData bg_accum = GATHER_DATA_INIT;
+
+  int i_radius = clamp(int(radius), 0, int(dof_layer_threshold));
+
+  const float sample_count_max = float(DOF_SLIGHT_FOCUS_SAMPLE_MAX);
+  /* Scale by search area. */
+  float sample_count = sample_count_max * saturate(sqr(radius) / sqr(dof_layer_threshold));
+
+  bool first_ring = true;
+
+  for (float s = 0.0; s < sample_count; s++) {
+    vec2 rand2 = fract(hammersley_2d(s, sample_count) + noise);
+    vec2 offset = sample_disk(rand2);
+    float ring_dist = sqrt(rand2.y);
+
+    DofGatherData pair_data[2];
+    for (int i = 0; i < 2; i++) {
+      vec2 sample_offset = ((i == 0) ? offset : -offset);
+      /* OPTI: could precompute the factor. */
+      vec2 sample_uv = (frag_coord + sample_offset) / vec2(textureSize(depth_tx, 0));
+      float depth = textureLod(depth_tx, sample_uv, 0.0).r;
+      pair_data[i].coc = dof_coc_from_depth(dof_buf, sample_uv, depth);
+      pair_data[i].color = safe_color(textureLod(color_tx, sample_uv, 0.0));
+      pair_data[i].dist = ring_dist;
+      if (DOF_BOKEH_TEXTURE) {
+        /* Contains subpixel distance to bokeh shape. */
+        ivec2 lut_texel = ivec2(round(sample_offset)) + dof_max_slight_focus_radius;
+        pair_data[i].dist = texelFetch(bkh_lut_tx, lut_texel, 0).r;
+      }
+      pair_data[i].coc = clamp(pair_data[i].coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+    }
+
+    float bordering_radius = ring_dist + 0.5;
+    const float isect_mul = 1.0;
+    DofGatherData bg_ring = GATHER_DATA_INIT;
+    dof_gather_accumulate_sample_pair(
+        pair_data, bordering_radius, isect_mul, first_ring, false, false, bg_ring, bg_accum);
+    /* Treat each sample as a ring. */
+    dof_gather_accumulate_sample_ring(bg_ring, 2, first_ring, false, false, bg_accum);
+
+    if (DOF_BOKEH_TEXTURE) {
+      /* Swap distances in order to flip bokeh shape for foreground. */
+      float tmp = pair_data[0].dist;
+      pair_data[0].dist = pair_data[1].dist;
+      pair_data[1].dist = tmp;
+    }
+    DofGatherData fg_ring = GATHER_DATA_INIT;
+    dof_gather_accumulate_sample_pair(
+        pair_data, bordering_radius, isect_mul, first_ring, false, true, fg_ring, fg_accum);
+    /* Treat each sample as a ring. */
+    dof_gather_accumulate_sample_ring(fg_ring, 2, first_ring, false, true, fg_accum);
+
+    first_ring = false;
+  }
+
+  /* Center sample. */
+  vec2 sample_uv = frag_coord / vec2(textureSize(depth_tx, 0));
+  DofGatherData center_data;
+  center_data.color = safe_color(textureLod(color_tx, sample_uv, 0.0));
+  center_data.coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
+  center_data.coc = clamp(center_data.coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+  center_data.dist = 0.0;
+
+  out_center_coc = center_data.coc;
+
+  /* Slide 38. */
+  float bordering_radius = 0.5;
+
+  dof_gather_accumulate_center_sample(
+      center_data, bordering_radius, i_radius, false, true, true, fg_accum);
+  dof_gather_accumulate_center_sample(
+      center_data, bordering_radius, i_radius, false, false, true, bg_accum);
+
+  vec4 bg_col, fg_col;
+  float bg_weight, fg_weight;
+  vec2 unused_occlusion;
+
+  int total_sample_count = int(sample_count) * 2 + 1;
+  dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion);
+  dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion);
+
+  /* Fix weighting issues on perfectly focus to slight focus transitioning areas. */
+  if (abs(center_data.coc) < 0.5) {
+    bg_col = center_data.color;
+    bg_weight = 1.0;
+  }
+
+  /* Alpha Over */
+  float alpha = 1.0 - fg_weight;
+  out_weight = bg_weight * alpha + fg_weight;
+  out_color = bg_col * bg_weight * alpha + fg_col * fg_weight;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
new file mode 100644
index 00000000000..26a597b04e8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
@@ -0,0 +1,55 @@
+
+/**
+ * Bokeh Look Up Table: This outputs a radius multiplier to shape the sampling in gather pass or
+ * the scatter sprite appearance. This is only used if bokeh shape is either anamorphic or is not
+ * a perfect circle.
+ * We correct samples spacing for polygonal bokeh shapes. However, we do not for anamorphic bokeh
+ * as it is way more complex and expensive to do.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+  vec2 gather_uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) / float(DOF_BOKEH_LUT_SIZE));
+  /* Center uv in range [-1..1]. */
+  gather_uv = gather_uv * 2.0 - 1.0;
+
+  vec2 slight_focus_texel = vec2(gl_GlobalInvocationID.xy) - float(dof_max_slight_focus_radius);
+
+  float radius = length(gather_uv);
+
+  if (dof_buf.bokeh_blades > 0.0) {
+    /* NOTE: atan(y,x) has output range [-M_PI..M_PI], so add 2pi to avoid negative angles. */
+    float theta = atan(gather_uv.y, gather_uv.x) + M_2PI;
+    float r = length(gather_uv);
+
+    radius /= circle_to_polygon_radius(dof_buf.bokeh_blades, theta - dof_buf.bokeh_rotation);
+
+    float theta_new = circle_to_polygon_angle(dof_buf.bokeh_blades, theta);
+    float r_new = circle_to_polygon_radius(dof_buf.bokeh_blades, theta_new);
+
+    theta_new -= dof_buf.bokeh_rotation;
+
+    gather_uv = r_new * vec2(-cos(theta_new), sin(theta_new));
+
+    {
+      /* Slight focus distance */
+      slight_focus_texel *= dof_buf.bokeh_anisotropic_scale_inv;
+      float theta = atan(slight_focus_texel.y, -slight_focus_texel.x) + M_2PI;
+      slight_focus_texel /= circle_to_polygon_radius(dof_buf.bokeh_blades,
+                                                     theta + dof_buf.bokeh_rotation);
+    }
+  }
+  else {
+    gather_uv *= safe_rcp(length(gather_uv));
+  }
+
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  /* For gather store the normalized UV. */
+  imageStore(out_gather_lut_img, texel, gather_uv.xyxy);
+  /* For scatter store distance. LUT will be scaled by COC. */
+  imageStore(out_scatter_lut_img, texel, vec4(radius));
+  /* For slight focus gather store pixel perfect distance. */
+  imageStore(out_resolve_lut_img, texel, vec4(length(slight_focus_texel)));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
new file mode 100644
index 00000000000..3d45f285da9
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
@@ -0,0 +1,32 @@
+
+/**
+ * Downsample pass: CoC aware downsample to quarter resolution.
+ *
+ * Pretty much identical to the setup pass but get CoC from buffer.
+ * Also does not weight luma for the bilateral weights.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+  vec2 halfres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy);
+  /* Center uv around the 4 halfres pixels. */
+  vec2 quad_center = vec2(gl_GlobalInvocationID * 2 + 1) * halfres_texel_size;
+
+  vec4 colors[4];
+  vec4 cocs;
+  for (int i = 0; i < 4; i++) {
+    vec2 sample_uv = quad_center + quad_offsets[i] * halfres_texel_size;
+    colors[i] = textureLod(color_tx, sample_uv, 0.0);
+    cocs[i] = textureLod(coc_tx, sample_uv, 0.0).r;
+  }
+
+  vec4 weights = dof_bilateral_coc_weights(cocs);
+  /* Normalize so that the sum is 1. */
+  weights *= safe_rcp(sum(weights));
+
+  vec4 out_color = weighted_sum_array(colors, weights);
+
+  imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
new file mode 100644
index 00000000000..49c93ca63cd
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
@@ -0,0 +1,163 @@
+
+/**
+ * Gather Filter pass: Filter the gather pass result to reduce noise.
+ *
+ * This is a simple 3x3 median filter to avoid dilating highlights with a 3x3 max filter even if
+ * cheaper.
+ */
+
+struct FilterSample {
+  vec4 color;
+  float weight;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name Pixel cache.
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float weight_cache[cache_size][cache_size];
+
+void cache_init()
+{
+  /**
+   * Load enough values into LDS to perform the filter.
+   *
+   * ┌──────────────────────────────┐
+   * │                              │  < Border texels that needs to be loaded.
+   * │    x  x  x  x  x  x  x  x    │  ─┐
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │ Thread Group Size 8x8.
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │  ─┘
+   * │ L  L  L  L  L                │  < Border texels that needs to be loaded.
+   * └──────────────────────────────┘
+   *   └───────────┘
+   *    Load using 5x5 threads.
+   */
+
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1;
+  if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+    for (int y = 0; y < 2; y++) {
+      for (int x = 0; x < 2; x++) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset, ivec2(0), textureSize(color_tx, 0) - 1);
+
+        color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0);
+        weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r;
+      }
+    }
+  }
+  barrier();
+}
+
+FilterSample cache_sample(int x, int y)
+{
+  return FilterSample(color_cache[y][x], weight_cache[y][x]);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Median filter
+ * From:
+ * Implementing Median Filters in XC4000E FPGAs
+ * JOHN L. SMITH, Univision Technologies Inc., Billerica, MA
+ * http://users.utcluj.ro/~baruch/resources/Image/xl23_16.pdf
+ * Figure 1
+ * \{ */
+
+FilterSample filter_min(FilterSample a, FilterSample b)
+{
+  return FilterSample(min(a.color, b.color), min(a.weight, b.weight));
+}
+
+FilterSample filter_max(FilterSample a, FilterSample b)
+{
+  return FilterSample(max(a.color, b.color), max(a.weight, b.weight));
+}
+
+FilterSample filter_min(FilterSample a, FilterSample b, FilterSample c)
+{
+  return FilterSample(min(a.color, min(c.color, b.color)), min(a.weight, min(c.weight, b.weight)));
+}
+
+FilterSample filter_max(FilterSample a, FilterSample b, FilterSample c)
+{
+  return FilterSample(max(a.color, max(c.color, b.color)), max(a.weight, max(c.weight, b.weight)));
+}
+
+FilterSample filter_median(FilterSample s1, FilterSample s2, FilterSample s3)
+{
+  /* From diagram, with nodes numbered from top to bottom. */
+  FilterSample l1 = filter_min(s2, s3);
+  FilterSample h1 = filter_max(s2, s3);
+  FilterSample h2 = filter_max(s1, l1);
+  FilterSample l3 = filter_min(h2, h1);
+  return l3;
+}
+
+struct FilterLmhResult {
+  FilterSample low;
+  FilterSample median;
+  FilterSample high;
+};
+
+FilterLmhResult filter_lmh(FilterSample s1, FilterSample s2, FilterSample s3)
+{
+  /* From diagram, with nodes numbered from top to bottom. */
+  FilterSample h1 = filter_max(s2, s3);
+  FilterSample l1 = filter_min(s2, s3);
+
+  FilterSample h2 = filter_max(s1, l1);
+  FilterSample l2 = filter_min(s1, l1);
+
+  FilterSample h3 = filter_max(h2, h1);
+  FilterSample l3 = filter_min(h2, h1);
+
+  FilterLmhResult result;
+  result.low = l2;
+  result.median = l3;
+  result.high = h3;
+
+  return result;
+}
+
+/** \} */
+
+void main()
+{
+  /**
+   * NOTE: We can **NOT** optimize by discarding some tiles as the result is sampled using bilinear
+   * filtering in the resolve pass. Not outputting to a tile means that border texels have
+   * undefined value and tile border will be noticeable in the final image.
+   */
+
+  cache_init();
+
+  ivec2 texel = ivec2(gl_LocalInvocationID.xy);
+
+  FilterLmhResult rows[3];
+  for (int y = 0; y < 3; y++) {
+    rows[y] = filter_lmh(cache_sample(texel.x + 0, texel.y + y),
+                         cache_sample(texel.x + 1, texel.y + y),
+                         cache_sample(texel.x + 2, texel.y + y));
+  }
+  /* Left nodes. */
+  FilterSample high = filter_max(rows[0].low, rows[1].low, rows[2].low);
+  /* Right nodes. */
+  FilterSample low = filter_min(rows[0].high, rows[1].high, rows[2].high);
+  /* Center nodes. */
+  FilterSample median = filter_median(rows[0].median, rows[1].median, rows[2].median);
+  /* Last bottom nodes. */
+  median = filter_median(low, median, high);
+
+  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+  imageStore(out_color_img, out_texel, median.color);
+  imageStore(out_weight_img, out_texel, vec4(median.weight));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl
new file mode 100644
index 00000000000..cf8dd7a36e6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl
@@ -0,0 +1,99 @@
+
+/**
+ * Gather pass: Convolve foreground and background parts in separate passes.
+ *
+ * Using the min&max CoC tile buffer, we select the best appropriate method to blur the scene
+ *color. A fast gather path is taken if there is not many CoC variation inside the tile.
+ *
+ * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring
+ * rotation to ensure maximum coverage.
+ *
+ * Outputs:
+ * - Color * Weight, Weight, Occlusion 'CoC' Depth (mean and variance)
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
+
+void main()
+{
+  ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE);
+  CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
+  CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
+
+  float base_radius, min_radius, min_intersectable_radius;
+  bool can_early_out;
+  if (is_foreground) {
+    base_radius = -coc_tile.fg_min_coc;
+    min_radius = -coc_tile.fg_max_coc;
+    min_intersectable_radius = -coc_tile.fg_max_intersectable_coc;
+    can_early_out = !prediction.do_foreground;
+  }
+  else {
+    base_radius = coc_tile.bg_max_coc;
+    min_radius = coc_tile.bg_min_coc;
+    min_intersectable_radius = coc_tile.bg_min_intersectable_coc;
+    can_early_out = !prediction.do_background;
+  }
+
+  bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground);
+
+  /* Gather at half resolution. Divide CoC by 2. */
+  base_radius *= 0.5;
+  min_intersectable_radius *= 0.5;
+
+  bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius);
+
+  vec4 out_color;
+  float out_weight;
+  vec2 out_occlusion;
+
+  if (can_early_out) {
+    out_color = vec4(0.0);
+    out_weight = 0.0;
+    out_occlusion = vec2(0.0, 0.0);
+  }
+  else if (do_fast_gather) {
+    dof_gather_accumulator(color_tx,
+                           color_bilinear_tx,
+                           coc_tx,
+                           bokeh_lut_tx,
+                           base_radius,
+                           min_intersectable_radius,
+                           true,
+                           false,
+                           out_color,
+                           out_weight,
+                           out_occlusion);
+  }
+  else if (do_density_change) {
+    dof_gather_accumulator(color_tx,
+                           color_bilinear_tx,
+                           coc_tx,
+                           bokeh_lut_tx,
+                           base_radius,
+                           min_intersectable_radius,
+                           false,
+                           true,
+                           out_color,
+                           out_weight,
+                           out_occlusion);
+  }
+  else {
+    dof_gather_accumulator(color_tx,
+                           color_bilinear_tx,
+                           coc_tx,
+                           bokeh_lut_tx,
+                           base_radius,
+                           min_intersectable_radius,
+                           false,
+                           false,
+                           out_color,
+                           out_weight,
+                           out_occlusion);
+  }
+
+  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+  imageStore(out_color_img, out_texel, out_color);
+  imageStore(out_weight_img, out_texel, vec4(out_weight));
+  imageStore(out_occlusion_img, out_texel, out_occlusion.xyxy);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl
new file mode 100644
index 00000000000..5cdabbc2d4b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl
@@ -0,0 +1,70 @@
+
+/**
+ * Holefill pass: Gather background parts where foreground is present.
+ *
+ * Using the min&max CoC tile buffer, we select the best appropriate method to blur the scene
+ *color. A fast gather path is taken if there is not many CoC variation inside the tile.
+ *
+ * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring
+ * rotation to ensure maximum coverage.
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
+
+void main()
+{
+  ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE);
+  CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
+  CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
+
+  float base_radius = -coc_tile.fg_min_coc;
+  float min_radius = -coc_tile.fg_max_coc;
+  float min_intersectable_radius = dof_tile_large_coc;
+  bool can_early_out = !prediction.do_hole_fill;
+
+  bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground);
+
+  /* Gather at half resolution. Divide CoC by 2. */
+  base_radius *= 0.5;
+  min_intersectable_radius *= 0.5;
+
+  bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius);
+
+  vec4 out_color = vec4(0.0);
+  float out_weight = 0.0;
+  vec2 unused_occlusion = vec2(0.0, 0.0);
+
+  if (can_early_out) {
+    /* Early out. */
+  }
+  else if (do_fast_gather) {
+    dof_gather_accumulator(color_tx,
+                           color_bilinear_tx,
+                           coc_tx,
+                           coc_tx,
+                           base_radius,
+                           min_intersectable_radius,
+                           true,
+                           false,
+                           out_color,
+                           out_weight,
+                           unused_occlusion);
+  }
+  else {
+    dof_gather_accumulator(color_tx,
+                           color_bilinear_tx,
+                           coc_tx,
+                           coc_tx,
+                           base_radius,
+                           min_intersectable_radius,
+                           false,
+                           false,
+                           out_color,
+                           out_weight,
+                           unused_occlusion);
+  }
+
+  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+  imageStore(out_color_img, out_texel, out_color);
+  imageStore(out_weight_img, out_texel, vec4(out_weight));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl
new file mode 100644
index 00000000000..f89da641446
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl
@@ -0,0 +1,327 @@
+
+/**
+ * Depth of Field utils.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Constants.
+ * \{ */
+
+#ifndef DOF_SLIGHT_FOCUS_DENSITY
+#  define DOF_SLIGHT_FOCUS_DENSITY 2
+#endif
+
+#ifdef DOF_RESOLVE_PASS
+const bool is_resolve = true;
+#else
+const bool is_resolve = false;
+#endif
+#ifdef DOF_FOREGROUND_PASS
+const bool is_foreground = DOF_FOREGROUND_PASS;
+#else
+const bool is_foreground = false;
+#endif
+/* Debug options */
+const bool debug_gather_perf = false;
+const bool debug_scatter_perf = false;
+const bool debug_resolve_perf = false;
+
+const bool no_smooth_intersection = false;
+const bool no_gather_occlusion = false;
+const bool no_gather_mipmaps = false;
+const bool no_gather_random = false;
+const bool no_gather_filtering = false;
+const bool no_scatter_occlusion = false;
+const bool no_scatter_pass = false;
+const bool no_foreground_pass = false;
+const bool no_background_pass = false;
+const bool no_slight_focus_pass = false;
+const bool no_focus_pass = false;
+const bool no_hole_fill_pass = false;
+
+/* Distribute weights between near/slightfocus/far fields (slide 117). */
+const float dof_layer_threshold = 4.0;
+/* Make sure it overlaps. */
+const float dof_layer_offset_fg = 0.5 + 1.0;
+/* Extra offset for convolution layers to avoid light leaking from background. */
+const float dof_layer_offset = 0.5 + 0.5;
+
+const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS;
+
+const vec2 quad_offsets[4] = vec2[4](
+    vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Weighting and downsampling utils.
+ * \{ */
+
+float dof_hdr_color_weight(vec4 color)
+{
+  /* Very fast "luma" weighting. */
+  float luma = (color.g * 2.0) + (color.r + color.b);
+  /* TODO(fclem) Pass correct exposure. */
+  const float exposure = 1.0;
+  return 1.0 / (luma * exposure + 4.0);
+}
+
+float dof_coc_select(vec4 cocs)
+{
+  /* Select biggest coc. */
+  float selected_coc = cocs.x;
+  if (abs(cocs.y) > abs(selected_coc)) {
+    selected_coc = cocs.y;
+  }
+  if (abs(cocs.z) > abs(selected_coc)) {
+    selected_coc = cocs.z;
+  }
+  if (abs(cocs.w) > abs(selected_coc)) {
+    selected_coc = cocs.w;
+  }
+  return selected_coc;
+}
+
+/* NOTE: Do not forget to normalize weights afterwards. */
+vec4 dof_bilateral_coc_weights(vec4 cocs)
+{
+  float chosen_coc = dof_coc_select(cocs);
+
+  const float scale = 4.0; /* TODO(fclem) revisit. */
+  /* NOTE: The difference between the cocs should be inside a abs() function,
+   * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). */
+  return saturate(1.0 - (chosen_coc - cocs) * scale);
+}
+
+/* NOTE: Do not forget to normalize weights afterwards. */
+vec4 dof_bilateral_color_weights(vec4 colors[4])
+{
+  vec4 weights;
+  for (int i = 0; i < 4; i++) {
+    weights[i] = dof_hdr_color_weight(colors[i]);
+  }
+  return weights;
+}
+
+/* Returns signed Circle of confusion radius (in pixel) based on depth buffer value [0..1]. */
+float dof_coc_from_depth(DepthOfFieldData dof_data, vec2 uv, float depth)
+{
+  if (is_panoramic(dof_data.camera_type)) {
+    /* Use radial depth. */
+    depth = -length(get_view_space_from_depth(uv, depth));
+  }
+  else {
+    depth = get_view_z_from_depth(depth);
+  }
+  return coc_radius_from_camera_depth(dof_data, depth);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gather & Scatter Weighting
+ * \{ */
+
+float dof_layer_weight(float coc, const bool is_foreground)
+{
+  /* NOTE: These are fullres pixel CoC value. */
+  if (is_resolve) {
+    return saturate(-abs(coc) + dof_layer_threshold + dof_layer_offset) *
+           float(is_foreground ? (coc <= 0.5) : (coc > -0.5));
+  }
+  else {
+    coc *= 2.0; /* Account for half pixel gather. */
+    float threshold = dof_layer_threshold -
+                      ((is_foreground) ? dof_layer_offset_fg : dof_layer_offset);
+    return saturate(((is_foreground) ? -coc : coc) - threshold);
+  }
+}
+vec4 dof_layer_weight(vec4 coc)
+{
+  /* NOTE: Used for scatter pass which already flipped the sign correctly. */
+  coc *= 2.0; /* Account for half pixel gather. */
+  return saturate(coc - dof_layer_threshold + dof_layer_offset);
+}
+
+/* NOTE: This is halfres CoC radius. */
+float dof_sample_weight(float coc)
+{
+#if 1 /* Optimized */
+  return min(1.0, 1.0 / sqr(coc));
+#else
+  /* Full intensity if CoC radius is below the pixel footprint. */
+  const float min_coc = 1.0;
+  coc = max(min_coc, abs(coc));
+  return (M_PI * min_coc * min_coc) / (M_PI * coc * coc);
+#endif
+}
+vec4 dof_sample_weight(vec4 coc)
+{
+#if 1 /* Optimized */
+  return min(vec4(1.0), 1.0 / sqr(coc));
+#else
+  /* Full intensity if CoC radius is below the pixel footprint. */
+  const float min_coc = 1.0;
+  coc = max(vec4(min_coc), abs(coc));
+  return (M_PI * min_coc * min_coc) / (M_PI * coc * coc);
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Circle of Confusion tiles
+ * \{ */
+
+struct CocTile {
+  float fg_min_coc;
+  float fg_max_coc;
+  float fg_max_intersectable_coc;
+  float bg_min_coc;
+  float bg_max_coc;
+  float bg_min_intersectable_coc;
+};
+
+/* WATCH: Might have to change depending on the texture format. */
+const float dof_tile_large_coc = 1024.0;
+
+/* Init a CoC tile for reduction algorithms. */
+CocTile dof_coc_tile_init()
+{
+  CocTile tile;
+  tile.fg_min_coc = 0.0;
+  tile.fg_max_coc = -dof_tile_large_coc;
+  tile.fg_max_intersectable_coc = dof_tile_large_coc;
+  tile.bg_min_coc = dof_tile_large_coc;
+  tile.bg_max_coc = 0.0;
+  tile.bg_min_intersectable_coc = dof_tile_large_coc;
+  return tile;
+}
+
+CocTile dof_coc_tile_unpack(vec3 fg, vec3 bg)
+{
+  CocTile tile;
+  tile.fg_min_coc = -fg.x;
+  tile.fg_max_coc = -fg.y;
+  tile.fg_max_intersectable_coc = -fg.z;
+  tile.bg_min_coc = bg.x;
+  tile.bg_max_coc = bg.y;
+  tile.bg_min_intersectable_coc = bg.z;
+  return tile;
+}
+
+/* WORKAROUND(fclem): GLSL compilers differs in what qualifiers are requires to pass images as
+ * parameters. Workaround by using defines. */
+#define dof_coc_tile_load(tiles_fg_img_, tiles_bg_img_, texel_) \
+  dof_coc_tile_unpack( \
+      imageLoad(tiles_fg_img_, clamp(texel_, ivec2(0), imageSize(tiles_fg_img_) - 1)).xyz, \
+      imageLoad(tiles_bg_img_, clamp(texel_, ivec2(0), imageSize(tiles_bg_img_) - 1)).xyz)
+
+void dof_coc_tile_pack(CocTile tile, out vec3 out_fg, out vec3 out_bg)
+{
+  out_fg.x = -tile.fg_min_coc;
+  out_fg.y = -tile.fg_max_coc;
+  out_fg.z = -tile.fg_max_intersectable_coc;
+  out_bg.x = tile.bg_min_coc;
+  out_bg.y = tile.bg_max_coc;
+  out_bg.z = tile.bg_min_intersectable_coc;
+}
+
+#define dof_coc_tile_store(tiles_fg_img_, tiles_bg_img_, texel_out_, tile_data_) \
+  if (true) { \
+    vec3 out_fg; \
+    vec3 out_bg; \
+    dof_coc_tile_pack(tile_data_, out_fg, out_bg); \
+    imageStore(tiles_fg_img_, texel_out_, out_fg.xyzz); \
+    imageStore(tiles_bg_img_, texel_out_, out_bg.xyzz); \
+  }
+
+bool dof_do_fast_gather(float max_absolute_coc, float min_absolute_coc, const bool is_foreground)
+{
+  float min_weight = dof_layer_weight((is_foreground) ? -min_absolute_coc : min_absolute_coc,
+                                      is_foreground);
+  if (min_weight < 1.0) {
+    return false;
+  }
+  /* FIXME(fclem): This is a workaround to fast gather triggering too early. Since we use custom
+   * opacity mask, the opacity is not given to be 100% even for after normal threshold. */
+  if (is_foreground && min_absolute_coc < dof_layer_threshold) {
+    return false;
+  }
+  return (max_absolute_coc - min_absolute_coc) < (DOF_FAST_GATHER_COC_ERROR * max_absolute_coc);
+}
+
+struct CocTilePrediction {
+  bool do_foreground;
+  bool do_slight_focus;
+  bool do_focus;
+  bool do_background;
+  bool do_hole_fill;
+};
+
+/**
+ * Using the tile CoC infos, predict which convolutions are required and the ones that can be
+ * skipped.
+ */
+CocTilePrediction dof_coc_tile_prediction_get(CocTile tile)
+{
+  /* Based on tile value, predict what pass we need to load. */
+  CocTilePrediction predict;
+
+  predict.do_foreground = (-tile.fg_min_coc > dof_layer_threshold - dof_layer_offset_fg);
+  bool fg_fully_opaque = predict.do_foreground &&
+                         dof_do_fast_gather(-tile.fg_min_coc, -tile.fg_max_coc, true);
+  predict.do_background = !fg_fully_opaque &&
+                          (tile.bg_max_coc > dof_layer_threshold - dof_layer_offset);
+  bool bg_fully_opaque = predict.do_background &&
+                         dof_do_fast_gather(-tile.bg_max_coc, tile.bg_min_coc, false);
+  predict.do_hole_fill = !fg_fully_opaque && -tile.fg_min_coc > 0.0;
+  predict.do_focus = !fg_fully_opaque;
+  predict.do_slight_focus = !fg_fully_opaque;
+
+#if 0 /* Debug */
+  predict.do_foreground = predict.do_background = predict.do_hole_fill = true;
+#endif
+  return predict;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gathering
+ * \{ */
+
+/**
+ * Generate samples in a square pattern with the ring radius. X is the center tile.
+ *
+ *    Dist1          Dist2
+ *                 6 5 4 3 2
+ *    3 2 1        7       1
+ *    . X 0        .   X   0
+ *    . . .        .       .
+ *                 . . . . .
+ *
+ * Samples are expected to be mirrored to complete the pattern.
+ **/
+ivec2 dof_square_ring_sample_offset(int ring_distance, int sample_id)
+{
+  ivec2 offset;
+  if (sample_id < ring_distance) {
+    offset.x = ring_distance;
+    offset.y = sample_id;
+  }
+  else if (sample_id < ring_distance * 3) {
+    offset.x = ring_distance - sample_id + ring_distance;
+    offset.y = ring_distance;
+  }
+  else {
+    offset.x = -ring_distance;
+    offset.y = ring_distance - sample_id + 3 * ring_distance;
+  }
+  return offset;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
new file mode 100644
index 00000000000..a6426cd06e4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
@@ -0,0 +1,247 @@
+
+/**
+ * Reduce copy pass: filter fireflies and split color between scatter and gather input.
+ *
+ * NOTE: The texture can end up being too big because of the mipmap padding. We correct for
+ * that during the convolution phase.
+ *
+ * Inputs:
+ * - Output of setup pass (halfres) and reduce downsample pass (quarter res).
+ * Outputs:
+ * - Halfres padded to avoid mipmap misalignment (so possibly not matching input size).
+ * - Gather input color (whole mip chain), Scatter rect list, Signed CoC (whole mip chain).
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */
+float dof_scatter_neighborhood_rejection(vec3 color)
+{
+  color = min(vec3(dof_buf.scatter_neighbor_max_color), color);
+
+  float validity = 0.0;
+
+  /* Centered in the middle of 4 quarter res texel. */
+  vec2 texel_size = 1.0 / vec2(textureSize(downsample_tx, 0).xy);
+  vec2 uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) * 0.5) * texel_size;
+
+  vec3 max_diff = vec3(0.0);
+  for (int i = 0; i < 4; i++) {
+    vec2 sample_uv = uv + quad_offsets[i] * texel_size;
+    vec3 ref = textureLod(downsample_tx, sample_uv, 0.0).rgb;
+
+    ref = min(vec3(dof_buf.scatter_neighbor_max_color), ref);
+    float diff = max_v3(max(vec3(0.0), abs(ref - color)));
+
+    const float rejection_threshold = 0.7;
+    diff = saturate(diff / rejection_threshold - 1.0);
+    validity = max(validity, diff);
+  }
+
+  return validity;
+}
+
+/* This avoids Bokeh sprite popping in and out at the screen border and
+ * drawing Bokeh sprites larger than the screen. */
+float dof_scatter_screen_border_rejection(float coc, ivec2 texel)
+{
+  vec2 screen_size = vec2(imageSize(inout_color_lod0_img));
+  vec2 uv = (vec2(texel) + 0.5) / screen_size;
+  vec2 screen_pos = uv * screen_size;
+  float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos));
+  /* Fullres to halfres CoC. */
+  coc *= 0.5;
+  /* Allow 10px transition. */
+  const float rejection_hardeness = 1.0 / 10.0;
+  return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0);
+}
+
+float dof_scatter_luminosity_rejection(vec3 color)
+{
+  const float rejection_hardness = 1.0;
+  return saturate(max_v3(color - dof_buf.scatter_color_threshold) * rejection_hardness);
+}
+
+float dof_scatter_coc_radius_rejection(float coc)
+{
+  const float rejection_hardness = 0.3;
+  return saturate((abs(coc) - dof_buf.scatter_coc_threshold) * rejection_hardness);
+}
+
+float fast_luma(vec3 color)
+{
+  return (2.0 * color.g) + color.r + color.b;
+}
+
+const uint cache_size = gl_WorkGroupSize.x;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+shared float do_scatter[cache_size][cache_size];
+
+void main()
+{
+  ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(inout_color_lod0_img) - 1);
+  uvec2 texel_local = gl_LocalInvocationID.xy;
+  /* Increase readablility. */
+#define LOCAL_INDEX texel_local.y][texel_local.x
+#define LOCAL_OFFSET(x_, y_) texel_local.y + (y_)][texel_local.x + (x_)
+
+  /* Load level 0 into cache. */
+  color_cache[LOCAL_INDEX] = imageLoad(inout_color_lod0_img, texel);
+  coc_cache[LOCAL_INDEX] = imageLoad(in_coc_lod0_img, texel).r;
+
+  /* Only scatter if luminous enough. */
+  do_scatter[LOCAL_INDEX] = dof_scatter_luminosity_rejection(color_cache[LOCAL_INDEX].rgb);
+  /* Only scatter if CoC is big enough. */
+  do_scatter[LOCAL_INDEX] *= dof_scatter_coc_radius_rejection(coc_cache[LOCAL_INDEX]);
+  /* Only scatter if CoC is not too big to avoid performance issues. */
+  do_scatter[LOCAL_INDEX] *= dof_scatter_screen_border_rejection(coc_cache[LOCAL_INDEX], texel);
+  /* Only scatter if neighborhood is different enough. */
+  do_scatter[LOCAL_INDEX] *= dof_scatter_neighborhood_rejection(color_cache[LOCAL_INDEX].rgb);
+  /* For debugging. */
+  if (no_scatter_pass) {
+    do_scatter[LOCAL_INDEX] = 0.0;
+  }
+
+  barrier();
+
+  /* Add a scatter sprite for each 2x2 pixel neighborhood passing the threshold. */
+  if (all(equal(texel_local & 1u, uvec2(0)))) {
+    vec4 do_scatter4;
+    /* Follows quad_offsets order. */
+    do_scatter4.x = do_scatter[LOCAL_OFFSET(0, 1)];
+    do_scatter4.y = do_scatter[LOCAL_OFFSET(1, 1)];
+    do_scatter4.z = do_scatter[LOCAL_OFFSET(1, 0)];
+    do_scatter4.w = do_scatter[LOCAL_OFFSET(0, 0)];
+    if (any(greaterThan(do_scatter4, vec4(0.0)))) {
+      /* Apply energy conservation to anamorphic scattered bokeh. */
+      do_scatter4 *= max_v2(dof_buf.bokeh_anisotropic_scale_inv);
+
+      /* Circle of Confusion. */
+      vec4 coc4;
+      coc4.x = coc_cache[LOCAL_OFFSET(0, 1)];
+      coc4.y = coc_cache[LOCAL_OFFSET(1, 1)];
+      coc4.z = coc_cache[LOCAL_OFFSET(1, 0)];
+      coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
+      /* We are scattering at half resolution, so divide CoC by 2. */
+      coc4 *= 0.5;
+      /* Sprite center position. Center sprite around the 4 texture taps. */
+      vec2 offset = vec2(gl_GlobalInvocationID.xy) + 1;
+      /* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin
+       * and because we smooth the bokeh shape a bit in the pixel shader. */
+      vec2 half_extent = max_v4(abs(coc4)) * dof_buf.bokeh_anisotropic_scale + 2.5;
+      /* Issue a sprite for each field if any CoC matches. */
+      if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
+        /* Same value for all threads. Not an issue if we don't sync access to it. */
+        scatter_fg_indirect_buf.vertex_len = 4u;
+        /* Issue 1 strip instance per sprite. */
+        uint rect_id = atomicAdd(scatter_fg_indirect_buf.instance_len, 1u);
+        if (rect_id < dof_buf.scatter_max_rect) {
+
+          vec4 coc4_fg = max(vec4(0.0), -coc4);
+          vec4 fg_weights = dof_layer_weight(coc4_fg) * dof_sample_weight(coc4_fg) * do_scatter4;
+          /* Filter NaNs. */
+          fg_weights = select(fg_weights, vec4(0.0), equal(coc4_fg, vec4(0.0)));
+
+          ScatterRect rect_fg;
+          rect_fg.offset = offset;
+          /* Negate extent to flip the sprite. Mimics optical phenomenon. */
+          rect_fg.half_extent = -half_extent;
+          /* NOTE: Since we fliped the quad along (1,-1) line, we need to also swap the (1,1) and
+           * (0,0) values so that quad_offsets is in the right order in the vertex shader. */
+
+          /* Circle of Confusion absolute radius in halfres pixels. */
+          rect_fg.color_and_coc[0].a = coc4_fg[0];
+          rect_fg.color_and_coc[1].a = coc4_fg[3];
+          rect_fg.color_and_coc[2].a = coc4_fg[2];
+          rect_fg.color_and_coc[3].a = coc4_fg[1];
+          /* Apply weights. */
+          rect_fg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * fg_weights[0];
+          rect_fg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * fg_weights[3];
+          rect_fg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * fg_weights[2];
+          rect_fg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * fg_weights[1];
+
+          scatter_fg_list_buf[rect_id] = rect_fg;
+        }
+      }
+      if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
+        /* Same value for all threads. Not an issue if we don't sync access to it. */
+        scatter_bg_indirect_buf.vertex_len = 4u;
+        /* Issue 1 strip instance per sprite. */
+        uint rect_id = atomicAdd(scatter_bg_indirect_buf.instance_len, 1u);
+        if (rect_id < dof_buf.scatter_max_rect) {
+          vec4 coc4_bg = max(vec4(0.0), coc4);
+          vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4;
+          /* Filter NaNs. */
+          bg_weights = select(bg_weights, vec4(0.0), equal(coc4_bg, vec4(0.0)));
+
+          ScatterRect rect_bg;
+          rect_bg.offset = offset;
+          rect_bg.half_extent = half_extent;
+
+          /* Circle of Confusion absolute radius in halfres pixels. */
+          rect_bg.color_and_coc[0].a = coc4_bg[0];
+          rect_bg.color_and_coc[1].a = coc4_bg[1];
+          rect_bg.color_and_coc[2].a = coc4_bg[2];
+          rect_bg.color_and_coc[3].a = coc4_bg[3];
+          /* Apply weights. */
+          rect_bg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * bg_weights[0];
+          rect_bg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * bg_weights[1];
+          rect_bg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * bg_weights[2];
+          rect_bg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * bg_weights[3];
+
+          scatter_bg_list_buf[rect_id] = rect_bg;
+        }
+      }
+    }
+  }
+
+  /* Remove scatter color from gather. */
+  color_cache[LOCAL_INDEX].rgb *= 1.0 - do_scatter[LOCAL_INDEX];
+  imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);
+
+  /* Recursive downsample. */
+  for (uint i = 1u; i < DOF_MIP_COUNT; i++) {
+    barrier();
+    uint mask = ~(~0u << i);
+    if (all(equal(gl_LocalInvocationID.xy & mask, uvec2(0)))) {
+      uint ofs = 1u << (i - 1u);
+
+      /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
+      vec4 coc4;
+      coc4.x = coc_cache[LOCAL_OFFSET(0, ofs)];
+      coc4.y = coc_cache[LOCAL_OFFSET(ofs, ofs)];
+      coc4.z = coc_cache[LOCAL_OFFSET(ofs, 0)];
+      coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
+
+      vec4 colors[4];
+      colors[0] = color_cache[LOCAL_OFFSET(0, ofs)];
+      colors[1] = color_cache[LOCAL_OFFSET(ofs, ofs)];
+      colors[2] = color_cache[LOCAL_OFFSET(ofs, 0)];
+      colors[3] = color_cache[LOCAL_OFFSET(0, 0)];
+
+      vec4 weights = dof_bilateral_coc_weights(coc4);
+      weights *= dof_bilateral_color_weights(colors);
+      /* Normalize so that the sum is 1. */
+      weights *= safe_rcp(sum(weights));
+
+      color_cache[LOCAL_INDEX] = weighted_sum_array(colors, weights);
+      coc_cache[LOCAL_INDEX] = dot(coc4, weights);
+
+      ivec2 texel = ivec2(gl_GlobalInvocationID.xy >> i);
+
+      if (i == 1) {
+        imageStore(out_color_lod1_img, texel, color_cache[LOCAL_INDEX]);
+        imageStore(out_coc_lod1_img, texel, vec4(coc_cache[LOCAL_INDEX]));
+      }
+      else if (i == 2) {
+        imageStore(out_color_lod2_img, texel, color_cache[LOCAL_INDEX]);
+        imageStore(out_coc_lod2_img, texel, vec4(coc_cache[LOCAL_INDEX]));
+      }
+      else /* if (i == 3) */ {
+        imageStore(out_color_lod3_img, texel, color_cache[LOCAL_INDEX]);
+        imageStore(out_coc_lod3_img, texel, vec4(coc_cache[LOCAL_INDEX]));
+      }
+    }
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl
new file mode 100644
index 00000000000..5123eb0c238
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl
@@ -0,0 +1,178 @@
+
+/**
+ * Recombine Pass: Load separate convolution layer and composite with self
+ * slight defocus convolution and in-focus fields.
+ *
+ * The halfres gather methods are fast but lack precision for small CoC areas.
+ * To fix this we do a bruteforce gather to have a smooth transition between
+ * in-focus and defocus regions.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
+
+shared uint shared_max_slight_focus_abs_coc;
+
+/**
+ * Returns The max CoC in the Slight Focus range inside this compute tile.
+ */
+float dof_slight_focus_coc_tile_get(vec2 frag_coord)
+{
+  if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
+    shared_max_slight_focus_abs_coc = floatBitsToUint(0.0);
+  }
+  barrier();
+
+  float local_abs_max = 0.0;
+  /* Sample in a cross (X) pattern. This covers all pixels over the whole tile, as long as
+   * dof_max_slight_focus_radius is less than the group size. */
+  for (int i = 0; i < 4; i++) {
+    vec2 sample_uv = (frag_coord + quad_offsets[i] * 2.0 * dof_max_slight_focus_radius) /
+                     vec2(textureSize(color_tx, 0));
+    float coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
+    coc = clamp(coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+    if (abs(coc) < dof_max_slight_focus_radius) {
+      local_abs_max = max(local_abs_max, abs(coc));
+    }
+  }
+  /* Use atomic reduce operation. */
+  atomicMax(shared_max_slight_focus_abs_coc, floatBitsToUint(local_abs_max));
+  /* "Broadcast" result across all threads. */
+  barrier();
+
+  return uintBitsToFloat(shared_max_slight_focus_abs_coc);
+}
+
+vec3 dof_neighborhood_clamp(vec2 frag_coord, vec3 color, float center_coc, float weight)
+{
+  /* Stabilize color by clamping with the stable half res neighborhood. */
+  vec3 neighbor_min, neighbor_max;
+  const vec2 corners[4] = vec2[4](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));
+  for (int i = 0; i < 4; i++) {
+    /**
+     * Visit the 4 half-res texels around (and containing) the fullres texel.
+     * Here a diagram of a fullscreen texel (f) in the bottom left corner of a half res texel.
+     * We sample the stable half-resolution texture at the 4 location denoted by (h).
+     * ┌───────┬───────┐
+     * │     h │     h │
+     * │       │       │
+     * │       │ f     │
+     * ├───────┼───────┤
+     * │     h │     h │
+     * │       │       │
+     * │       │       │
+     * └───────┴───────┘
+     */
+    vec2 uv_sample = ((frag_coord + corners[i]) * 0.5) / vec2(textureSize(stable_color_tx, 0));
+    /* Reminder: The content of this buffer is YCoCg + CoC. */
+    vec3 ycocg_sample = textureLod(stable_color_tx, uv_sample, 0.0).rgb;
+    neighbor_min = (i == 0) ? ycocg_sample : min(neighbor_min, ycocg_sample);
+    neighbor_max = (i == 0) ? ycocg_sample : max(neighbor_max, ycocg_sample);
+  }
+  /* Pad the bounds in the near in focus region to get back a bit of detail. */
+  float padding = 0.125 * saturate(1.0 - sqr(center_coc) / sqr(8.0));
+  neighbor_max += abs(neighbor_min) * padding;
+  neighbor_min -= abs(neighbor_min) * padding;
+  /* Progressively apply the clamp to avoid harsh transition. Also mask by weight. */
+  float fac = saturate(sqr(center_coc) * 4.0) * weight;
+  /* Clamp in YCoCg space to avoid too much color drift. */
+  color = colorspace_YCoCg_from_scene_linear(color);
+  color = mix(color, clamp(color, neighbor_min, neighbor_max), fac);
+  color = colorspace_scene_linear_from_YCoCg(color);
+  return color;
+}
+
+void main()
+{
+  vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
+  ivec2 tile_co = ivec2(frag_coord / float(DOF_TILES_SIZE * 2));
+
+  CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
+  CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
+
+  vec2 uv = frag_coord / vec2(textureSize(color_tx, 0));
+  vec2 uv_halfres = (frag_coord * 0.5) / vec2(textureSize(color_bg_tx, 0));
+
+  float slight_focus_max_coc = 0.0;
+  if (prediction.do_slight_focus) {
+    slight_focus_max_coc = dof_slight_focus_coc_tile_get(frag_coord);
+    prediction.do_slight_focus = slight_focus_max_coc >= 0.5;
+    if (prediction.do_slight_focus) {
+      prediction.do_focus = false;
+    }
+  }
+
+  if (prediction.do_focus) {
+    float center_coc = (dof_coc_from_depth(dof_buf, uv, textureLod(depth_tx, uv, 0.0).r));
+    prediction.do_focus = abs(center_coc) <= 0.5;
+  }
+
+  vec4 out_color = vec4(0.0);
+  float weight = 0.0;
+
+  vec4 layer_color;
+  float layer_weight;
+
+  if (!no_hole_fill_pass && prediction.do_hole_fill) {
+    layer_color = textureLod(color_hole_fill_tx, uv_halfres, 0.0);
+    layer_weight = textureLod(weight_hole_fill_tx, uv_halfres, 0.0).r;
+    out_color = layer_color * safe_rcp(layer_weight);
+    weight = float(layer_weight > 0.0);
+  }
+
+  if (!no_background_pass && prediction.do_background) {
+    layer_color = textureLod(color_bg_tx, uv_halfres, 0.0);
+    layer_weight = textureLod(weight_bg_tx, uv_halfres, 0.0).r;
+    /* Always prefer background to hole_fill pass. */
+    layer_color *= safe_rcp(layer_weight);
+    layer_weight = float(layer_weight > 0.0);
+    /* Composite background. */
+    out_color = out_color * (1.0 - layer_weight) + layer_color;
+    weight = weight * (1.0 - layer_weight) + layer_weight;
+    /* Fill holes with the composited background. */
+    out_color *= safe_rcp(weight);
+    weight = float(weight > 0.0);
+  }
+
+  if (!no_slight_focus_pass && prediction.do_slight_focus) {
+    float center_coc;
+    dof_slight_focus_gather(depth_tx,
+                            color_tx,
+                            bokeh_lut_tx,
+                            slight_focus_max_coc,
+                            layer_color,
+                            layer_weight,
+                            center_coc);
+
+    /* Composite slight defocus. */
+    out_color = out_color * (1.0 - layer_weight) + layer_color;
+    weight = weight * (1.0 - layer_weight) + layer_weight;
+
+    out_color.rgb = dof_neighborhood_clamp(frag_coord, out_color.rgb, center_coc, layer_weight);
+  }
+
+  if (!no_focus_pass && prediction.do_focus) {
+    layer_color = safe_color(textureLod(color_tx, uv, 0.0));
+    layer_weight = 1.0;
+    /* Composite in focus. */
+    out_color = out_color * (1.0 - layer_weight) + layer_color;
+    weight = weight * (1.0 - layer_weight) + layer_weight;
+  }
+
+  if (!no_foreground_pass && prediction.do_foreground) {
+    layer_color = textureLod(color_fg_tx, uv_halfres, 0.0);
+    layer_weight = textureLod(weight_fg_tx, uv_halfres, 0.0).r;
+    /* Composite foreground. */
+    out_color = out_color * (1.0 - layer_weight) + layer_color;
+  }
+
+  /* Fix float precision issue in alpha compositing. */
+  if (out_color.a > 0.99) {
+    out_color.a = 1.0;
+  }
+
+  if (debug_resolve_perf && prediction.do_slight_focus) {
+    out_color.rgb *= vec3(1.0, 0.1, 0.1);
+  }
+
+  imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl
new file mode 100644
index 00000000000..cfb7fd2568b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl
@@ -0,0 +1,62 @@
+
+/**
+ * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur.
+ *
+ * We only scatter one quad per sprite and one sprite per 4 pixels to reduce vertex shader
+ * invocations and overdraw.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+#define linearstep(p0, p1, v) (clamp(((v) - (p0)) / abs((p1) - (p0)), 0.0, 1.0))
+
+void main()
+{
+  vec4 coc4 = vec4(interp.color_and_coc1.w,
+                   interp.color_and_coc2.w,
+                   interp.color_and_coc3.w,
+                   interp.color_and_coc4.w);
+  vec4 shapes;
+  if (use_bokeh_lut) {
+    shapes = vec4(texture(bokeh_lut_tx, interp.rect_uv1).r,
+                  texture(bokeh_lut_tx, interp.rect_uv2).r,
+                  texture(bokeh_lut_tx, interp.rect_uv3).r,
+                  texture(bokeh_lut_tx, interp.rect_uv4).r);
+  }
+  else {
+    shapes = vec4(length(interp.rect_uv1),
+                  length(interp.rect_uv2),
+                  length(interp.rect_uv3),
+                  length(interp.rect_uv4));
+  }
+  shapes *= interp.distance_scale;
+  /* Becomes signed distance field in pixel units. */
+  shapes -= coc4;
+  /* Smooth the edges a bit to fade out the undersampling artifacts. */
+  shapes = saturate(1.0 - linearstep(-0.8, 0.8, shapes));
+  /* Outside of bokeh shape. Try to avoid overloading ROPs. */
+  if (max_v4(shapes) == 0.0) {
+    discard;
+  }
+
+  if (!no_scatter_occlusion) {
+    /* Works because target is the same size as occlusion_tx. */
+    vec2 uv = gl_FragCoord.xy / vec2(textureSize(occlusion_tx, 0).xy);
+    vec2 occlusion_data = texture(occlusion_tx, uv).rg;
+    /* Fix tilling artifacts. (Slide 90) */
+    const float correction_fac = 1.0 - DOF_FAST_GATHER_COC_ERROR;
+    /* Occlude the sprite with geometry from the same field using a chebychev test (slide 85). */
+    float mean = occlusion_data.x;
+    float variance = occlusion_data.y;
+    shapes *= variance * safe_rcp(variance + sqr(max(coc4 * correction_fac - mean, 0.0)));
+  }
+
+  out_color = (interp.color_and_coc1 * shapes[0] + interp.color_and_coc2 * shapes[1] +
+               interp.color_and_coc3 * shapes[2] + interp.color_and_coc4 * shapes[3]);
+  /* Do not accumulate alpha. This has already been accumulated by the gather pass. */
+  out_color.a = 0.0;
+
+  if (debug_scatter_perf) {
+    out_color.rgb = avg(out_color.rgb) * vec3(1.0, 0.0, 0.0);
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl
new file mode 100644
index 00000000000..d870496a06c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl
@@ -0,0 +1,45 @@
+
+/**
+ * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur.
+ *
+ * We only scatter one triangle per sprite and one sprite per 4 pixels to reduce vertex shader
+ * invocations and overdraw.
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+  ScatterRect rect = scatter_list_buf[gl_InstanceID];
+
+  interp.color_and_coc1 = rect.color_and_coc[0];
+  interp.color_and_coc2 = rect.color_and_coc[1];
+  interp.color_and_coc3 = rect.color_and_coc[2];
+  interp.color_and_coc4 = rect.color_and_coc[3];
+
+  vec2 uv = vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0 - 1.0;
+  uv = uv * rect.half_extent;
+
+  gl_Position = vec4(uv + rect.offset, 0.0, 1.0);
+  /* NDC range [-1..1]. */
+  gl_Position.xy = (gl_Position.xy / vec2(textureSize(occlusion_tx, 0).xy)) * 2.0 - 1.0;
+
+  if (use_bokeh_lut) {
+    /* Bias scale to avoid sampling at the texture's border. */
+    interp.distance_scale = (float(DOF_BOKEH_LUT_SIZE) / float(DOF_BOKEH_LUT_SIZE - 1));
+    vec2 uv_div = 1.0 / (interp.distance_scale * abs(rect.half_extent));
+    interp.rect_uv1 = ((uv + quad_offsets[0]) * uv_div) * 0.5 + 0.5;
+    interp.rect_uv2 = ((uv + quad_offsets[1]) * uv_div) * 0.5 + 0.5;
+    interp.rect_uv3 = ((uv + quad_offsets[2]) * uv_div) * 0.5 + 0.5;
+    interp.rect_uv4 = ((uv + quad_offsets[3]) * uv_div) * 0.5 + 0.5;
+    /* Only for sampling. */
+    interp.distance_scale *= max_v2(abs(rect.half_extent));
+  }
+  else {
+    interp.distance_scale = 1.0;
+    interp.rect_uv1 = uv + quad_offsets[0];
+    interp.rect_uv2 = uv + quad_offsets[1];
+    interp.rect_uv3 = uv + quad_offsets[2];
+    interp.rect_uv4 = uv + quad_offsets[3];
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl
new file mode 100644
index 00000000000..c017a5aa965
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl
@@ -0,0 +1,46 @@
+
+/**
+ * Setup pass: CoC and luma aware downsample to half resolution of the input scene color buffer.
+ *
+ * An addition to the downsample CoC, we output the maximum slight out of focus CoC to be
+ * sure we don't miss a pixel.
+ *
+ * Input:
+ *  Full-resolution color & depth buffer
+ * Output:
+ *  Half-resolution Color, signed CoC (out_coc.x), and max slight focus abs CoC (out_coc.y).
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+  vec2 fullres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy);
+  /* Center uv around the 4 fullres pixels. */
+  vec2 quad_center = vec2(gl_GlobalInvocationID.xy * 2 + 1) * fullres_texel_size;
+
+  vec4 colors[4];
+  vec4 cocs;
+  for (int i = 0; i < 4; i++) {
+    vec2 sample_uv = quad_center + quad_offsets[i] * fullres_texel_size;
+    /* NOTE: We use samplers without filtering. */
+    colors[i] = safe_color(textureLod(color_tx, sample_uv, 0.0));
+    cocs[i] = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
+  }
+
+  cocs = clamp(cocs, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+
+  vec4 weights = dof_bilateral_coc_weights(cocs);
+  weights *= dof_bilateral_color_weights(colors);
+  /* Normalize so that the sum is 1. */
+  weights *= safe_rcp(sum(weights));
+
+  ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+  vec4 out_color = weighted_sum_array(colors, weights);
+  imageStore(out_color_img, out_texel, out_color);
+
+  float out_coc = dot(cocs, weights);
+  imageStore(out_coc_img, out_texel, vec4(out_coc));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
new file mode 100644
index 00000000000..46a25b84840
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
@@ -0,0 +1,367 @@
+
+/**
+ * Temporal Stabilization of the Depth of field input.
+ * Corresponds to the TAA pass in the paper.
+ * We actually duplicate the TAA logic but with a few changes:
+ * - We run this pass at half resolution.
+ * - We store CoC instead of Opacity in the alpha channel of the history.
+ *
+ * This is and adaption of the code found in eevee_film_lib.glsl
+ *
+ * Inputs:
+ * - Output of setup pass (halfres).
+ * Outputs:
+ * - Stabilized Color and CoC (halfres).
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+
+struct DofSample {
+  vec4 color;
+  float coc;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name LDS Cache
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+/* Need 2 pixel border for depth. */
+const uint cache_depth_size = gl_WorkGroupSize.x + 4;
+shared float depth_cache[cache_depth_size][cache_depth_size];
+
+void dof_cache_init()
+{
+  /**
+   * Load enough values into LDS to perform the filter.
+   *
+   * ┌──────────────────────────────┐
+   * │                              │  < Border texels that needs to be loaded.
+   * │    x  x  x  x  x  x  x  x    │  ─┐
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │
+   * │    x  x  x  x  x  x  x  x    │   │ Thread Group Size 8x8.
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │   │
+   * │ L  L  L  L  L  x  x  x  x    │  ─┘
+   * │ L  L  L  L  L                │  < Border texels that needs to be loaded.
+   * └──────────────────────────────┘
+   *   └───────────┘
+   *    Load using 5x5 threads.
+   */
+
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  for (int y = 0; y < 2; y++) {
+    for (int x = 0; x < 2; x++) {
+      /* 1 Pixel border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1);
+
+        vec4 color = texelFetch(color_tx, load_texel, 0);
+        color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color);
+        coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x;
+      }
+      /* 2 Pixels border. */
+      if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) {
+        ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u);
+        ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+        /* Depth is fullres. Load every 2 pixels. */
+        ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1);
+
+        depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x;
+      }
+    }
+  }
+  barrier();
+}
+
+/* NOTE: Sample color space is already in YCoCg space. */
+DofSample dof_fetch_input_sample(ivec2 offset)
+{
+  ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy);
+  return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]);
+}
+
+float dof_fetch_half_depth(ivec2 offset)
+{
+  ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy);
+  return depth_cache[coord.y][coord.x];
+}
+
+/** \} */
+
+float dof_luma_weight(float luma)
+{
+  /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+  /* To preserve more details in dark areas, we use a bigger bias. */
+  const float exposure_scale = 1.0; /* TODO. */
+  return 1.0 / (4.0 + luma * exposure_scale);
+}
+
+float dof_bilateral_weight(float reference_coc, float sample_coc)
+{
+  /* NOTE: The difference between the cocs should be inside a abs() function,
+   * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19).
+   * Effectively bleed background into foreground.
+   * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */
+  return saturate(1.0 - (sample_coc - reference_coc) / max(1.0, abs(reference_coc)));
+}
+
+DofSample dof_spatial_filtering()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  DofSample center = dof_fetch_input_sample(ivec2(0));
+  DofSample accum = DofSample(vec4(0.0), 0.0);
+  float accum_weight = 0.0;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) *
+                   dof_bilateral_weight(center.coc, samp.coc);
+
+    accum.color += samp.color * weight;
+    accum.coc += samp.coc * weight;
+    accum_weight += weight;
+  }
+  /* Accumulate center sample last as it does not need bilateral_weights. */
+  float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x);
+  accum.color += center.color * weight;
+  accum.coc += center.coc * weight;
+  accum_weight += weight;
+
+  float rcp_weight = 1.0 / accum_weight;
+  accum.color *= rcp_weight;
+  accum.coc *= rcp_weight;
+  return accum;
+}
+
+struct DofNeighborhoodMinMax {
+  DofSample min;
+  DofSample max;
+};
+
+/* Return history clipping bounding box in YCoCg color space. */
+DofNeighborhoodMinMax dof_neighbor_boundbox()
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+  /**
+   * Simple bounding box calculation in YCoCg as described in:
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+   */
+  DofSample min_c = dof_fetch_input_sample(ivec2(0));
+  DofSample max_c = min_c;
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+    min_c.color = min(min_c.color, samp.color);
+    max_c.color = max(max_c.color, samp.color);
+    min_c.coc = min(min_c.coc, samp.coc);
+    max_c.coc = max(max_c.coc, samp.coc);
+  }
+  /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+   * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+  DofSample min_c_3x3 = min_c;
+  DofSample max_c_3x3 = max_c;
+  const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+  for (int i = 0; i < 4; i++) {
+    DofSample samp = dof_fetch_input_sample(corners[i]);
+    min_c_3x3.color = min(min_c_3x3.color, samp.color);
+    max_c_3x3.color = max(max_c_3x3.color, samp.color);
+    min_c_3x3.coc = min(min_c_3x3.coc, samp.coc);
+    max_c_3x3.coc = max(max_c_3x3.coc, samp.coc);
+  }
+  min_c.color = (min_c.color + min_c_3x3.color) * 0.5;
+  max_c.color = (max_c.color + max_c_3x3.color) * 0.5;
+  min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5;
+  max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5;
+
+  return DofNeighborhoodMinMax(min_c, max_c);
+}
+
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
+{
+  /**
+   * Dilate velocity by using the nearest pixel in a cross pattern.
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+   */
+  const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+  float min_depth = dof_fetch_half_depth(ivec2(0));
+  ivec2 nearest_texel = ivec2(0);
+  for (int i = 0; i < 4; i++) {
+    float depth = dof_fetch_half_depth(corners[i]);
+    if (min_depth > depth) {
+      min_depth = depth;
+      nearest_texel = corners[i];
+    }
+  }
+  /* Convert to full resolution buffer pixel. */
+  ivec2 velocity_texel = (texel_sample + nearest_texel) * 2;
+  velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1);
+  vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth);
+  /* Transform to **half** pixel space. */
+  return vector.xy * vec2(textureSize(color_tx, 0));
+}
+
+/* Load color using a special filter to avoid losing detail.
+ * \a texel is sample position with subpixel accuracy. */
+DofSample dof_sample_history(vec2 input_texel)
+{
+#if 1 /* Bilinar. */
+  vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0);
+  vec4 color = textureLod(in_history_tx, uv, 0.0);
+
+#else /* Catmull Rom interpolation. 5 Bilinear Taps. */
+  vec2 center_texel;
+  vec2 inter_texel = modf(input_texel, center_texel);
+  vec2 weights[4];
+  film_get_catmull_rom_weights(inter_texel, weights);
+
+  /**
+   * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+   * corner taps.
+   * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+   * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+   */
+  center_texel += 0.5;
+
+  /* Slide 92. */
+  vec2 weight_12 = weights[1] + weights[2];
+  vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+  vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+  vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+  vec4 color;
+  vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+  float weight_center = weight_12.x * weight_12.y;
+
+  color = textureLod(in_history_tx, uv_12, 0.0) * weight_center;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+  color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+  color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+  color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+  /* Re-normalize for the removed corners. */
+  color /= (weight_center + sum(weight_cross));
+#endif
+  /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */
+  return DofSample(color.xyzz, color.w);
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src)
+{
+#if 0
+  /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+  vec3 clip_dir = src.color.rgb - history.color.rgb;
+
+  float t = line_aabb_clipping_dist(
+      history.color.rgb, clip_dir, bbox.min.color.rgb, bbox.max.color.rgb);
+  history.color.rgb += clip_dir * saturate(t);
+#else
+  /* More responsive. */
+  history.color = clamp(history.color, bbox.min.color, bbox.max.color);
+#endif
+  /* Clamp CoC to reduce convergence time. Otherwise the result is laggy. */
+  history.coc = clamp(history.coc, bbox.min.coc, bbox.max.coc);
+
+  return history;
+}
+
+float dof_history_blend_factor(
+    float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst)
+{
+  float luma_min = bbox.min.color.x;
+  float luma_max = bbox.max.color.x;
+  float luma_incoming = src.color.x;
+  float luma_history = dst.color.x;
+
+  /* 5% of incoming color by default. */
+  float blend = 0.05;
+  /* Blend less history if the pixel has substantial velocity. */
+  /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */
+  blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0));
+  /**
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+   * Bias towards history if incoming pixel is near clamping. Reduces flicker.
+   */
+  float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+  /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+  distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+  /* Linearly blend when history gets below to 25% of the bbox size. */
+  blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+  /* Progressively discard history until history CoC is twice as big as the filtered CoC.
+   * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to
+   * dilate thin features like hair (slide 19). */
+  float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc)));
+  blend = mix(blend, 1.0, coc_diff_ratio);
+  /* Discard out of view history. */
+  if (any(lessThan(texel, vec2(0))) ||
+      any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) {
+    blend = 1.0;
+  }
+  /* Discard history if invalid. */
+  if (use_history == false) {
+    blend = 1.0;
+  }
+  return blend;
+}
+
+void main()
+{
+  dof_cache_init();
+
+  ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /**
+   * Naming convention is taken from the film implementation.
+   * SRC is incoming new data.
+   * DST is history data.
+   */
+  DofSample src = dof_spatial_filtering();
+
+  /* Reproject by finding where this pixel was in the previous frame. */
+  vec2 motion = dof_pixel_history_motion_vector(src_texel);
+  vec2 history_texel = vec2(src_texel) + motion;
+
+  float velocity = length(motion);
+
+  DofSample dst = dof_sample_history(history_texel);
+
+  /* Get local color bounding box of source neighborhood. */
+  DofNeighborhoodMinMax bbox = dof_neighbor_boundbox();
+
+  float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst);
+
+  dst = dof_amend_history(bbox, dst, src);
+
+  /* Luma weighted blend to reduce flickering. */
+  float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend);
+  float weight_src = dof_luma_weight(src.color.x) * (blend);
+
+  DofSample result;
+  /* Weighted blend. */
+  result.color = vec4(dst.color.rgb, dst.coc) * weight_dst +
+                 vec4(src.color.rgb, src.coc) * weight_src;
+  result.color /= weight_src + weight_dst;
+
+  /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */
+  imageStore(out_history_img, src_texel, result.color);
+
+  /* Un-swizzle. */
+  result.coc = result.color.a;
+  /* Clamp opacity since we don't store it in history. */
+  result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a);
+
+  result.color = colorspace_scene_linear_from_YCoCg(result.color);
+
+  imageStore(out_color_img, src_texel, result.color);
+  imageStore(out_coc_img, src_texel, vec4(result.coc));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl
new file mode 100644
index 00000000000..dba8b5fd79d
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl
@@ -0,0 +1,97 @@
+
+/**
+ * Tile dilate pass: Takes the 8x8 Tiles buffer and converts dilates the tiles with large CoC to
+ * their neighborhood. This pass is repeated multiple time until the maximum CoC can be covered.
+ *
+ * Input & Output:
+ * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res.
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/* Error introduced by the random offset of the gathering kernel's center. */
+const float bluring_radius_error = 1.0 + 1.0 / (float(DOF_GATHER_RING_COUNT) + 0.5);
+const float tile_to_fullres_factor = float(DOF_TILES_SIZE * 2);
+
+void main()
+{
+  ivec2 center_tile_pos = ivec2(gl_GlobalInvocationID.xy);
+
+  CocTile ring_buckets[DOF_DILATE_RING_COUNT];
+
+  for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) {
+    ring_buckets[ring] = dof_coc_tile_init();
+
+    int ring_distance = ring + 1;
+    for (int sample_id = 0; sample_id < 4 * ring_distance; sample_id++) {
+      ivec2 offset = dof_square_ring_sample_offset(ring_distance, sample_id);
+
+      offset *= ring_width_multiplier;
+
+      for (int i = 0; i < 2; i++) {
+        ivec2 adj_tile_pos = center_tile_pos + ((i == 0) ? offset : -offset);
+
+        CocTile adj_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, adj_tile_pos);
+
+        if (DILATE_MODE_MIN_MAX) {
+          /* Actually gather the "absolute" biggest coc but keeping the sign. */
+          ring_buckets[ring].fg_min_coc = min(ring_buckets[ring].fg_min_coc, adj_tile.fg_min_coc);
+          ring_buckets[ring].bg_max_coc = max(ring_buckets[ring].bg_max_coc, adj_tile.bg_max_coc);
+        }
+        else { /* DILATE_MODE_MIN_ABS */
+          ring_buckets[ring].fg_max_coc = max(ring_buckets[ring].fg_max_coc, adj_tile.fg_max_coc);
+          ring_buckets[ring].bg_min_coc = min(ring_buckets[ring].bg_min_coc, adj_tile.bg_min_coc);
+
+          /* Should be tight as possible to reduce gather overhead (see slide 61). */
+          float closest_neighbor_distance = length(max(abs(vec2(offset)) - 1.0, 0.0)) *
+                                            tile_to_fullres_factor;
+
+          ring_buckets[ring].fg_max_intersectable_coc = max(
+              ring_buckets[ring].fg_max_intersectable_coc,
+              adj_tile.fg_max_intersectable_coc + closest_neighbor_distance);
+          ring_buckets[ring].bg_min_intersectable_coc = min(
+              ring_buckets[ring].bg_min_intersectable_coc,
+              adj_tile.bg_min_intersectable_coc + closest_neighbor_distance);
+        }
+      }
+    }
+  }
+
+  /* Load center tile. */
+  CocTile out_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, center_tile_pos);
+
+  for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) {
+    float ring_distance = float(ring + 1);
+
+    ring_distance = (ring_distance * ring_width_multiplier - 1) * tile_to_fullres_factor;
+
+    if (DILATE_MODE_MIN_MAX) {
+      /* NOTE(fclem): Unsure if both sides of the inequalities have the same unit. */
+      if (-ring_buckets[ring].fg_min_coc * bluring_radius_error > ring_distance) {
+        out_tile.fg_min_coc = min(out_tile.fg_min_coc, ring_buckets[ring].fg_min_coc);
+      }
+
+      if (ring_buckets[ring].bg_max_coc * bluring_radius_error > ring_distance) {
+        out_tile.bg_max_coc = max(out_tile.bg_max_coc, ring_buckets[ring].bg_max_coc);
+      }
+    }
+    else { /* DILATE_MODE_MIN_ABS */
+      /* Find minimum absolute CoC radii that will be intersected for the previously
+       * computed maximum CoC values. */
+      if (-out_tile.fg_min_coc * bluring_radius_error > ring_distance) {
+        out_tile.fg_max_coc = max(out_tile.fg_max_coc, ring_buckets[ring].fg_max_coc);
+        out_tile.fg_max_intersectable_coc = max(out_tile.fg_max_intersectable_coc,
+                                                ring_buckets[ring].fg_max_intersectable_coc);
+      }
+
+      if (out_tile.bg_max_coc * bluring_radius_error > ring_distance) {
+        out_tile.bg_min_coc = min(out_tile.bg_min_coc, ring_buckets[ring].bg_min_coc);
+        out_tile.bg_min_intersectable_coc = min(out_tile.bg_min_intersectable_coc,
+                                                ring_buckets[ring].bg_min_intersectable_coc);
+      }
+    }
+  }
+
+  ivec2 texel_out = ivec2(gl_GlobalInvocationID.xy);
+  dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, texel_out, out_tile);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
new file mode 100644
index 00000000000..88737ade386
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
@@ -0,0 +1,78 @@
+
+/**
+ * Tile flatten pass: Takes the halfres CoC buffer and converts it to 8x8 tiles.
+ *
+ * Output min and max values for each tile and for both foreground & background.
+ * Also outputs min intersectable CoC for the background, which is the minimum CoC
+ * that comes from the background pixels.
+ *
+ * Input:
+ * - Half-resolution Circle of confusion. Out of setup pass.
+ * Output:
+ * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/**
+ * In order to use atomic operations, we have to use uints. But this means having to deal with the
+ * negative number ourselves. Luckily, each ground have a nicely defined range of values we can
+ * remap to positive float.
+ */
+shared uint fg_min_coc;
+shared uint fg_max_coc;
+shared uint fg_max_intersectable_coc;
+shared uint bg_min_coc;
+shared uint bg_max_coc;
+shared uint bg_min_intersectable_coc;
+
+const uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc);
+
+void main()
+{
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    /* NOTE: Min/Max flipped because of inverted fg_coc sign. */
+    fg_min_coc = floatBitsToUint(0.0);
+    fg_max_coc = dof_tile_large_coc_uint;
+    fg_max_intersectable_coc = dof_tile_large_coc_uint;
+    bg_min_coc = dof_tile_large_coc_uint;
+    bg_max_coc = floatBitsToUint(0.0);
+    bg_min_intersectable_coc = dof_tile_large_coc_uint;
+  }
+  barrier();
+
+  ivec2 sample_texel = min(ivec2(gl_GlobalInvocationID.xy), textureSize(coc_tx, 0).xy - 1);
+  vec2 sample_data = texelFetch(coc_tx, sample_texel, 0).rg;
+
+  float sample_coc = sample_data.x;
+  uint fg_coc = floatBitsToUint(max(-sample_coc, 0.0));
+  /* NOTE: atomicMin/Max flipped because of inverted fg_coc sign. */
+  atomicMax(fg_min_coc, fg_coc);
+  atomicMin(fg_max_coc, fg_coc);
+  atomicMin(fg_max_intersectable_coc, (sample_coc < 0.0) ? fg_coc : dof_tile_large_coc_uint);
+
+  uint bg_coc = floatBitsToUint(max(sample_coc, 0.0));
+  atomicMin(bg_min_coc, bg_coc);
+  atomicMax(bg_max_coc, bg_coc);
+  atomicMin(bg_min_intersectable_coc, (sample_coc > 0.0) ? bg_coc : dof_tile_large_coc_uint);
+
+  barrier();
+
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    if (fg_max_intersectable_coc == dof_tile_large_coc_uint) {
+      fg_max_intersectable_coc = floatBitsToUint(0.0);
+    }
+
+    CocTile tile;
+    /* Foreground sign is flipped since we compare unsigned representation. */
+    tile.fg_min_coc = -uintBitsToFloat(fg_min_coc);
+    tile.fg_max_coc = -uintBitsToFloat(fg_max_coc);
+    tile.fg_max_intersectable_coc = -uintBitsToFloat(fg_max_intersectable_coc);
+    tile.bg_min_coc = uintBitsToFloat(bg_min_coc);
+    tile.bg_max_coc = uintBitsToFloat(bg_max_coc);
+    tile.bg_min_intersectable_coc = uintBitsToFloat(bg_min_intersectable_coc);
+
+    ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
+    dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, tile_co, tile);
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl
new file mode 100644
index 00000000000..ce1f19edf53
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl
@@ -0,0 +1,13 @@
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_film_lib.glsl)
+
+void main()
+{
+  ivec2 texel_film = ivec2(gl_GlobalInvocationID.xy);
+  /* Not used. */
+  vec4 out_color;
+  float out_depth;
+
+  film_process_data(texel_film, out_color, out_depth);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl
new file mode 100644
index 00000000000..120edd9c35e
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl
@@ -0,0 +1,77 @@
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+#define CRYPTOMATTE_LEVELS_MAX 16
+
+void cryptomatte_load_samples(ivec2 texel, int layer, out vec2 samples[CRYPTOMATTE_LEVELS_MAX])
+{
+  int pass_len = divide_ceil(cryptomatte_samples_per_layer, 2);
+  int layer_id = layer * pass_len;
+
+  /* Read all samples from the cryptomatte layer. */
+  for (int p = 0; p < pass_len; p++) {
+    vec4 pass_sample = imageLoad(cryptomatte_img, ivec3(texel, p + layer_id));
+    samples[p * 2] = pass_sample.xy;
+    samples[p * 2 + 1] = pass_sample.zw;
+  }
+  for (int i = pass_len * 2; i < CRYPTOMATTE_LEVELS_MAX; i++) {
+    samples[i] = vec2(0.0);
+  }
+}
+
+void cryptomatte_sort_samples(inout vec2 samples[CRYPTOMATTE_LEVELS_MAX])
+{
+  /* Sort samples. Lame implementation, can be replaced with a more efficient algorithm. */
+  for (int i = 0; i < cryptomatte_samples_per_layer - 1 && samples[i].y != 0.0; i++) {
+    int highest_index = i;
+    float highest_weight = samples[i].y;
+    for (int j = i + 1; j < cryptomatte_samples_per_layer && samples[j].y != 0.0; j++) {
+      if (samples[j].y > highest_weight) {
+        highest_index = j;
+        highest_weight = samples[j].y;
+      }
+    };
+
+    if (highest_index != i) {
+      vec2 tmp = samples[i];
+      samples[i] = samples[highest_index];
+      samples[highest_index] = tmp;
+    }
+  }
+}
+void cryptomatte_normalize_weight(float total_weight, inout vec2 samples[CRYPTOMATTE_LEVELS_MAX])
+{
+  for (int i = 0; i < CRYPTOMATTE_LEVELS_MAX; i++) {
+    samples[i].y /= total_weight;
+  }
+}
+
+void cryptomatte_store_samples(ivec2 texel, int layer, in vec2 samples[CRYPTOMATTE_LEVELS_MAX])
+{
+  int pass_len = divide_ceil(cryptomatte_samples_per_layer, 2);
+  int layer_id = layer * pass_len;
+
+  /* Store samples back to the cryptomatte layer. */
+  for (int p = 0; p < pass_len; p++) {
+    vec4 pass_sample;
+    pass_sample.xy = samples[p * 2];
+    pass_sample.zw = samples[p * 2 + 1];
+    imageStore(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
+  }
+}
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  for (int layer = 0; layer < cryptomatte_layer_len; layer++) {
+    vec2 samples[CRYPTOMATTE_LEVELS_MAX];
+    cryptomatte_load_samples(texel, layer, samples);
+    cryptomatte_sort_samples(samples);
+    /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
+    float weight = imageLoad(
+                       weight_img,
+                       ivec3(texel % imageSize(weight_img).xy, FILM_WEIGHT_LAYER_ACCUMULATION))
+                       .x;
+    cryptomatte_normalize_weight(weight, samples);
+    cryptomatte_store_samples(texel, layer, samples);
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl
new file mode 100644
index 00000000000..e2aaf9128a5
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl
@@ -0,0 +1,35 @@
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_film_lib.glsl)
+
+void main()
+{
+  ivec2 texel_film = ivec2(gl_FragCoord.xy) - film_buf.offset;
+  float out_depth;
+
+  if (film_buf.display_only) {
+    out_depth = imageLoad(depth_img, texel_film).r;
+
+    if (film_buf.display_id == -1) {
+      out_color = texelFetch(in_combined_tx, texel_film, 0);
+    }
+    else if (film_buf.display_storage_type == PASS_STORAGE_VALUE) {
+      out_color.rgb = imageLoad(value_accum_img, ivec3(texel_film, film_buf.display_id)).rrr;
+      out_color.a = 1.0;
+    }
+    else if (film_buf.display_storage_type == PASS_STORAGE_COLOR) {
+      out_color = imageLoad(color_accum_img, ivec3(texel_film, film_buf.display_id));
+    }
+    else /* PASS_STORAGE_CRYPTOMATTE */ {
+      out_color = cryptomatte_false_color(
+          imageLoad(cryptomatte_img, ivec3(texel_film, film_buf.display_id)).r);
+    }
+  }
+  else {
+    film_process_data(texel_film, out_color, out_depth);
+  }
+
+  gl_FragDepth = get_depth_from_view_z(-out_depth);
+
+  gl_FragDepth = film_display_depth_ammend(texel_film, gl_FragDepth);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
new file mode 100644
index 00000000000..21b9a83abb9
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@@ -0,0 +1,755 @@
+
+/**
+ * Film accumulation utils functions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_cryptomatte_lib.glsl)
+
+/* Return scene linear Z depth from the camera or radial depth for panoramic cameras. */
+float film_depth_convert_to_scene(float depth)
+{
+  if (false /* Panoramic */) {
+    /* TODO */
+    return 1.0;
+  }
+  return abs(get_view_z_from_depth(depth));
+}
+
+/* Load a texture sample in a specific format. Combined pass needs to use this. */
+vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
+{
+  vec4 color = texelFetch(combined_tx, texel, 0);
+  /* Convert transmittance to opacity. */
+  color.a = saturate(1.0 - color.a);
+  /* Transform to YCoCg for accumulation. */
+  color.rgb = colorspace_YCoCg_from_scene_linear(color.rgb);
+  return color;
+}
+
+/* Returns a weight based on Luma to reduce the flickering introduced by high energy pixels. */
+float film_luma_weight(float luma)
+{
+  /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+  /* To preserve more details in dark areas, we use a bigger bias. */
+  return 1.0 / (4.0 + luma * film_buf.exposure_scale);
+}
+
+/* -------------------------------------------------------------------- */
+/** \name Filter
+ * \{ */
+
+FilmSample film_sample_get(int sample_n, ivec2 texel_film)
+{
+#ifdef PANORAMIC
+  /* TODO(fclem): Panoramic projection will be more complex. The samples will have to be retrieve
+   * at runtime, maybe by scanning a whole region. Offset and weight will have to be computed by
+   * reprojecting the incoming pixel data into film pixel space. */
+#else
+
+#  ifdef SCALED_RENDERING
+  texel_film /= film_buf.scaling_factor;
+#  endif
+
+  FilmSample film_sample = film_buf.samples[sample_n];
+  film_sample.texel += texel_film + film_buf.offset;
+  /* Use extend on borders. */
+  film_sample.texel = clamp(film_sample.texel, ivec2(0, 0), film_buf.render_extent - 1);
+
+  /* TODO(fclem): Panoramic projection will need to compute the sample weight in the shader
+   * instead of precomputing it on CPU. */
+#  ifdef SCALED_RENDERING
+  /* We need to compute the real distance and weight since a sample
+   * can be used by many final pixel. */
+  vec2 offset = film_buf.subpixel_offset - vec2(texel_film % film_buf.scaling_factor);
+  film_sample.weight = film_filter_weight(film_buf.filter_size, len_squared(offset));
+#  endif
+
+#endif /* PANORAMIC */
+
+  /* Always return a weight above 0 to avoid blind spots between samples. */
+  film_sample.weight = max(film_sample.weight, 1e-6);
+
+  return film_sample;
+}
+
+/* Returns the combined weights of all samples affecting this film pixel. */
+float film_weight_accumulation(ivec2 texel_film)
+{
+#if 0 /* TODO(fclem): Reference implementation, also needed for panoramic cameras. */
+  float weight = 0.0;
+  for (int i = 0; i < film_buf.samples_len; i++) {
+    weight += film_sample_get(i, texel_film).weight;
+  }
+  return weight;
+#endif
+  return film_buf.samples_weight_total;
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2D tex, inout vec4 accum)
+{
+  if (pass_id == -1) {
+    return;
+  }
+  accum += texelFetch(tex, samp.texel, 0) * samp.weight;
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2D tex, inout float accum)
+{
+  if (pass_id == -1) {
+    return;
+  }
+  accum += texelFetch(tex, samp.texel, 0).x * samp.weight;
+}
+
+void film_sample_accum(
+    FilmSample samp, int pass_id, uint layer, sampler2DArray tex, inout vec4 accum)
+{
+  if (pass_id == -1) {
+    return;
+  }
+  accum += texelFetch(tex, ivec3(samp.texel, layer), 0) * samp.weight;
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2DArray tex, inout vec4 accum)
+{
+  film_sample_accum(samp, pass_id, pass_id, tex, accum);
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2DArray tex, inout float accum)
+{
+  if (pass_id == -1) {
+    return;
+  }
+  accum += texelFetch(tex, ivec3(samp.texel, pass_id), 0).x * samp.weight;
+}
+
+void film_sample_accum_mist(FilmSample samp, inout float accum)
+{
+  if (film_buf.mist_id == -1) {
+    return;
+  }
+  float depth = texelFetch(depth_tx, samp.texel, 0).x;
+  vec2 uv = (vec2(samp.texel) + 0.5) / textureSize(depth_tx, 0).xy;
+  vec3 vP = get_view_space_from_depth(uv, depth);
+  bool is_persp = ProjectionMatrix[3][3] == 0.0;
+  float mist = (is_persp) ? length(vP) : abs(vP.z);
+  /* Remap to 0..1 range. */
+  mist = saturate(mist * film_buf.mist_scale + film_buf.mist_bias);
+  /* Falloff. */
+  mist = pow(mist, film_buf.mist_exponent);
+  accum += mist * samp.weight;
+}
+
+void film_sample_accum_combined(FilmSample samp, inout vec4 accum, inout float weight_accum)
+{
+  if (film_buf.combined_id == -1) {
+    return;
+  }
+  vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, samp.texel);
+
+  /* Weight by luma to remove fireflies. */
+  float weight = film_luma_weight(color.x) * samp.weight;
+
+  accum += color * weight;
+  weight_accum += weight;
+}
+
+void film_sample_cryptomatte_accum(FilmSample samp,
+                                   int layer,
+                                   sampler2D tex,
+                                   inout vec2 crypto_samples[4])
+{
+  float hash = texelFetch(tex, samp.texel, 0)[layer];
+  /* Find existing entry. */
+  for (int i = 0; i < 4; i++) {
+    if (crypto_samples[i].x == hash) {
+      crypto_samples[i].y += samp.weight;
+      return;
+    }
+  }
+  /* Overwrite entry with less weight. */
+  for (int i = 0; i < 4; i++) {
+    if (crypto_samples[i].y < samp.weight) {
+      crypto_samples[i] = vec2(hash, samp.weight);
+      return;
+    }
+  }
+}
+
+void film_cryptomatte_layer_accum_and_store(
+    FilmSample dst, ivec2 texel_film, int pass_id, int layer_component, inout vec4 out_color)
+{
+  if (pass_id == -1) {
+    return;
+  }
+  /* x = hash, y = accumed weight. Only keep track of 4 highest weighted samples. */
+  vec2 crypto_samples[4] = vec2[4](vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0));
+  for (int i = 0; i < film_buf.samples_len; i++) {
+    FilmSample src = film_sample_get(i, texel_film);
+    film_sample_cryptomatte_accum(src, layer_component, cryptomatte_tx, crypto_samples);
+  }
+  for (int i = 0; i < 4; i++) {
+    cryptomatte_store_film_sample(dst, pass_id, crypto_samples[i], out_color);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Load/Store Data
+ * \{ */
+
+/* Returns the distance used to store nearest interpolation data. */
+float film_distance_load(ivec2 texel)
+{
+  /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
+  texel = texel % imageSize(in_weight_img).xy;
+
+  if (!film_buf.use_history || film_buf.use_reprojection) {
+    return 1.0e16;
+  }
+  return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
+}
+
+float film_weight_load(ivec2 texel)
+{
+  /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
+  texel = texel % imageSize(in_weight_img).xy;
+
+  if (!film_buf.use_history || film_buf.use_reprojection) {
+    return 0.0;
+  }
+  return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
+}
+
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
+{
+  /**
+   * Dilate velocity by using the nearest pixel in a cross pattern.
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+   */
+  const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+  float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
+  ivec2 nearest_texel = texel_sample;
+  for (int i = 0; i < 4; i++) {
+    ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy - 1);
+    float depth = texelFetch(depth_tx, texel, 0).x;
+    if (min_depth > depth) {
+      min_depth = depth;
+      nearest_texel = texel;
+    }
+  }
+
+  vec4 vector = velocity_resolve(vector_tx, nearest_texel, min_depth);
+
+  /* Transform to pixel space. */
+  vector.xy *= vec2(film_buf.extent);
+
+  return vector.xy;
+}
+
+/* \a t is inter-pixel position. 0 means perfectly on a pixel center.
+ * Returns weights in both dimensions.
+ * Multiply each dimension weights to get final pixel weights. */
+void film_get_catmull_rom_weights(vec2 t, out vec2 weights[4])
+{
+  vec2 t2 = t * t;
+  vec2 t3 = t2 * t;
+  float fc = 0.5; /* Catmull-Rom. */
+
+  vec2 fct = t * fc;
+  vec2 fct2 = t2 * fc;
+  vec2 fct3 = t3 * fc;
+  weights[0] = (fct2 * 2.0 - fct3) - fct;
+  weights[1] = (t3 * 2.0 - fct3) + (-t2 * 3.0 + fct2) + 1.0;
+  weights[2] = (-t3 * 2.0 + fct3) + (t2 * 3.0 - (2.0 * fct2)) + fct;
+  weights[3] = fct3 - fct2;
+}
+
+/* Load color using a special filter to avoid losing detail.
+ * \a texel is sample position with subpixel accuracy. */
+vec4 film_sample_catmull_rom(sampler2D color_tx, vec2 input_texel)
+{
+  vec2 center_texel;
+  vec2 inter_texel = modf(input_texel, center_texel);
+  vec2 weights[4];
+  film_get_catmull_rom_weights(inter_texel, weights);
+
+#if 0 /* Reference. 16 Taps. */
+  vec4 color = vec4(0.0);
+  for (int y = 0; y < 4; y++) {
+    for (int x = 0; x < 4; x++) {
+      ivec2 texel = ivec2(center_texel) + ivec2(x, y) - 1;
+      texel = clamp(texel, ivec2(0), textureSize(color_tx, 0).xy - 1);
+      color += texelFetch(color_tx, texel, 0) * weights[x].x * weights[y].y;
+    }
+  }
+  return color;
+
+#elif 1 /* Optimize version. 5 Bilinear Taps. */
+  /**
+   * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+   * corner taps.
+   * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+   * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+   */
+  center_texel += 0.5;
+
+  /* Slide 92. */
+  vec2 weight_12 = weights[1] + weights[2];
+  vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+  vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+  vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+  vec4 color;
+  vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+  float weight_center = weight_12.x * weight_12.y;
+
+  color = textureLod(color_tx, uv_12, 0.0) * weight_center;
+  color += textureLod(color_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+  color += textureLod(color_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+  color += textureLod(color_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+  color += textureLod(color_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+  /* Re-normalize for the removed corners. */
+  return color / (weight_center + sum(weight_cross));
+
+#else /* Nearest interpolation for debugging. 1 Tap. */
+  ivec2 texel = ivec2(center_texel) + ivec2(greaterThan(inter_texel, vec2(0.5)));
+  texel = clamp(texel, ivec2(0), textureSize(color_tx, 0).xy - 1);
+  return texelFetch(color_tx, texel, 0);
+#endif
+}
+
+/* Return history clipping bounding box in YCoCg color space. */
+void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c)
+{
+  /* Plus (+) shape offsets. */
+  const ivec2 plus_offsets[5] = ivec2[5](ivec2(0, 0), /* Center */
+                                         ivec2(-1, 0),
+                                         ivec2(0, -1),
+                                         ivec2(1, 0),
+                                         ivec2(0, 1));
+#if 0
+  /**
+   * Compute Variance of neighborhood as described in:
+   * "An Excursion in Temporal Supersampling" by Marco Salvi at GDC 2016.
+   * and:
+   * "A Survey of Temporal Antialiasing Techniques" by Yang et al.
+   */
+
+  /* First 2 moments. */
+  vec4 mu1 = vec4(0), mu2 = vec4(0);
+  for (int i = 0; i < 5; i++) {
+    vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + plus_offsets[i]);
+    mu1 += color;
+    mu2 += sqr(color);
+  }
+  mu1 *= (1.0 / 5.0);
+  mu2 *= (1.0 / 5.0);
+
+  /* Extent scaling. Range [0.75..1.25].
+   * Balance between more flickering (0.75) or more ghosting (1.25). */
+  const float gamma = 1.25;
+  /* Standard deviation. */
+  vec4 sigma = sqrt(abs(mu2 - sqr(mu1)));
+  /* eq. 6 in "A Survey of Temporal Antialiasing Techniques". */
+  min_c = mu1 - gamma * sigma;
+  max_c = mu1 + gamma * sigma;
+#else
+  /**
+   * Simple bounding box calculation in YCoCg as described in:
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+   */
+  min_c = vec4(1e16);
+  max_c = vec4(-1e16);
+  for (int i = 0; i < 5; i++) {
+    vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + plus_offsets[i]);
+    min_c = min(min_c, color);
+    max_c = max(max_c, color);
+  }
+  /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+   * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+  vec4 min_c_3x3 = min_c;
+  vec4 max_c_3x3 = max_c;
+  const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+  for (int i = 0; i < 4; i++) {
+    vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + corners[i]);
+    min_c_3x3 = min(min_c_3x3, color);
+    max_c_3x3 = max(max_c_3x3, color);
+  }
+  min_c = (min_c + min_c_3x3) * 0.5;
+  max_c = (max_c + max_c_3x3) * 0.5;
+#endif
+}
+
+/* 1D equivalent of line_aabb_clipping_dist(). */
+float film_aabb_clipping_dist_alpha(float origin, float direction, float aabb_min, float aabb_max)
+{
+  if (abs(direction) < 1e-5) {
+    return 0.0;
+  }
+  float nearest_plane = (direction > 0.0) ? aabb_min : aabb_max;
+  return (nearest_plane - origin) / direction;
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+vec4 film_amend_combined_history(
+    vec4 min_color, vec4 max_color, vec4 color_history, vec4 src_color, ivec2 src_texel)
+{
+  /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+  vec4 clip_dir = src_color - color_history;
+
+  float t = line_aabb_clipping_dist(color_history.rgb, clip_dir.rgb, min_color.rgb, max_color.rgb);
+  color_history.rgb += clip_dir.rgb * saturate(t);
+
+  /* Clip alpha on its own to avoid interference with other channels. */
+  float t_a = film_aabb_clipping_dist_alpha(color_history.a, clip_dir.a, min_color.a, max_color.a);
+  color_history.a += clip_dir.a * saturate(t_a);
+
+  return color_history;
+}
+
+float film_history_blend_factor(float velocity,
+                                vec2 texel,
+                                float luma_min,
+                                float luma_max,
+                                float luma_incoming,
+                                float luma_history)
+{
+  /* 5% of incoming color by default. */
+  float blend = 0.05;
+  /* Blend less history if the pixel has substantial velocity. */
+  blend = mix(blend, 0.20, saturate(velocity * 0.02));
+  /**
+   * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+   * Bias towards history if incoming pixel is near clamping. Reduces flicker.
+   */
+  float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+  /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+  distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+  /* Linearly blend when history gets below to 25% of the bbox size. */
+  blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+  /* Discard out of view history. */
+  if (any(lessThan(texel, vec2(0))) || any(greaterThanEqual(texel, film_buf.extent))) {
+    blend = 1.0;
+  }
+  /* Discard history if invalid. */
+  if (film_buf.use_history == false) {
+    blend = 1.0;
+  }
+  return blend;
+}
+
+/* Returns resolved final color. */
+void film_store_combined(
+    FilmSample dst, ivec2 src_texel, vec4 color, float color_weight, inout vec4 display)
+{
+  if (film_buf.combined_id == -1) {
+    return;
+  }
+
+  vec4 color_src, color_dst;
+  float weight_src, weight_dst;
+
+  /* Undo the weighting to get final spatialy-filtered color. */
+  color_src = color / color_weight;
+
+  if (film_buf.use_reprojection) {
+    /* Interactive accumulation. Do reprojection and Temporal Anti-Aliasing. */
+
+    /* Reproject by finding where this pixel was in the previous frame. */
+    vec2 motion = film_pixel_history_motion_vector(src_texel);
+    vec2 history_texel = vec2(dst.texel) + motion;
+
+    float velocity = length(motion);
+
+    /* Load weight if it is not uniform across the whole buffer (i.e: upsampling, panoramic). */
+    // dst.weight = film_weight_load(texel_combined);
+
+    color_dst = film_sample_catmull_rom(in_combined_tx, history_texel);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
+
+    /* Get local color bounding box of source neighborhood. */
+    vec4 min_color, max_color;
+    film_combined_neighbor_boundbox(src_texel, min_color, max_color);
+
+    float blend = film_history_blend_factor(
+        velocity, history_texel, min_color.x, max_color.x, color_src.x, color_dst.x);
+
+    color_dst = film_amend_combined_history(min_color, max_color, color_dst, color_src, src_texel);
+
+    /* Luma weighted blend to avoid flickering. */
+    weight_dst = film_luma_weight(color_dst.x) * (1.0 - blend);
+    weight_src = film_luma_weight(color_src.x) * (blend);
+  }
+  else {
+    /* Everything is static. Use render accumulation. */
+    color_dst = texelFetch(in_combined_tx, dst.texel, 0);
+    color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
+
+    /* Luma weighted blend to avoid flickering. */
+    weight_dst = film_luma_weight(color_dst.x) * dst.weight;
+    weight_src = color_weight;
+  }
+  /* Weighted blend. */
+  color = color_dst * weight_dst + color_src * weight_src;
+  color /= weight_src + weight_dst;
+
+  color.rgb = colorspace_scene_linear_from_YCoCg(color.rgb);
+
+  /* Fix alpha not accumulating to 1 because of float imprecision. */
+  if (color.a > 0.995) {
+    color.a = 1.0;
+  }
+
+  /* Filter NaNs. */
+  if (any(isnan(color))) {
+    color = vec4(0.0, 0.0, 0.0, 1.0);
+  }
+
+  if (film_buf.display_id == -1) {
+    display = color;
+  }
+  imageStore(out_combined_img, dst.texel, color);
+}
+
+void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 display)
+{
+  if (pass_id == -1) {
+    return;
+  }
+
+  vec4 data_film = imageLoad(color_accum_img, ivec3(dst.texel, pass_id));
+
+  color = (data_film * dst.weight + color) * dst.weight_sum_inv;
+
+  /* Filter NaNs. */
+  if (any(isnan(color))) {
+    color = vec4(0.0, 0.0, 0.0, 1.0);
+  }
+
+  if (film_buf.display_id == pass_id) {
+    display = color;
+  }
+  imageStore(color_accum_img, ivec3(dst.texel, pass_id), color);
+}
+
+void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 display)
+{
+  if (pass_id == -1) {
+    return;
+  }
+
+  float data_film = imageLoad(value_accum_img, ivec3(dst.texel, pass_id)).x;
+
+  value = (data_film * dst.weight + value) * dst.weight_sum_inv;
+
+  /* Filter NaNs. */
+  if (isnan(value)) {
+    value = 0.0;
+  }
+
+  if (film_buf.display_id == pass_id) {
+    display = vec4(value, value, value, 1.0);
+  }
+  imageStore(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
+}
+
+/* Nearest sample variant. Always stores the data. */
+void film_store_data(ivec2 texel_film, int pass_id, vec4 data_sample, inout vec4 display)
+{
+  if (pass_id == -1) {
+    return;
+  }
+
+  if (film_buf.display_id == pass_id) {
+    display = data_sample;
+  }
+  imageStore(color_accum_img, ivec3(texel_film, pass_id), data_sample);
+}
+
+void film_store_depth(ivec2 texel_film, float value, out float out_depth)
+{
+  if (film_buf.depth_id == -1) {
+    return;
+  }
+
+  out_depth = film_depth_convert_to_scene(value);
+
+  imageStore(depth_img, texel_film, vec4(out_depth));
+}
+
+void film_store_distance(ivec2 texel, float value)
+{
+  imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
+}
+
+void film_store_weight(ivec2 texel, float value)
+{
+  imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
+}
+
+float film_display_depth_ammend(ivec2 texel, float depth)
+{
+  /* This effectively offsets the depth of the whole 2x2 region to the lowest value of the region
+   * twice. One for X and one for Y direction. */
+  /* TODO(fclem): This could be improved as it gives flickering result at depth discontinuity.
+   * But this is the quickest stable result I could come with for now. */
+#ifdef GPU_FRAGMENT_SHADER
+  depth += fwidth(depth);
+#endif
+  /* Small offset to avoid depth test lessEqual failing because of all the conversions loss. */
+  depth += 2.4e-7 * 4.0;
+  return saturate(depth);
+}
+
+/** \} */
+
+/** NOTE: out_depth is scene linear depth from the camera origin. */
+void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth)
+{
+  out_color = vec4(0.0);
+  out_depth = 0.0;
+
+  float weight_accum = film_weight_accumulation(texel_film);
+  float film_weight = film_weight_load(texel_film);
+  float weight_sum = film_weight + weight_accum;
+  film_store_weight(texel_film, weight_sum);
+
+  FilmSample dst;
+  dst.texel = texel_film;
+  dst.weight = film_weight;
+  dst.weight_sum_inv = 1.0 / weight_sum;
+
+  /* NOTE: We split the accumulations into separate loops to avoid using too much registers and
+   * maximize occupancy. */
+
+  if (film_buf.combined_id != -1) {
+    /* NOTE: Do weight accumulation again since we use custom weights. */
+    float weight_accum = 0.0;
+    vec4 combined_accum = vec4(0.0);
+
+    FilmSample src;
+    for (int i = film_buf.samples_len - 1; i >= 0; i--) {
+      src = film_sample_get(i, texel_film);
+      film_sample_accum_combined(src, combined_accum, weight_accum);
+    }
+    /* NOTE: src.texel is center texel in incoming data buffer. */
+    film_store_combined(dst, src.texel, combined_accum, weight_accum, out_color);
+  }
+
+  if (film_buf.has_data) {
+    float film_distance = film_distance_load(texel_film);
+
+    /* Get sample closest to target texel. It is always sample 0. */
+    FilmSample film_sample = film_sample_get(0, texel_film);
+
+    if (film_buf.use_reprojection || film_sample.weight < film_distance) {
+      vec4 normal = texelFetch(normal_tx, film_sample.texel, 0);
+      float depth = texelFetch(depth_tx, film_sample.texel, 0).x;
+      vec4 vector = velocity_resolve(vector_tx, film_sample.texel, depth);
+      /* Transform to pixel space. */
+      vector *= vec4(film_buf.render_extent, -film_buf.render_extent);
+
+      film_store_depth(texel_film, depth, out_depth);
+      film_store_data(texel_film, film_buf.normal_id, normal, out_color);
+      film_store_data(texel_film, film_buf.vector_id, vector, out_color);
+      film_store_distance(texel_film, film_sample.weight);
+    }
+    else {
+      out_depth = imageLoad(depth_img, texel_film).r;
+    }
+  }
+
+  if (film_buf.any_render_pass_1) {
+    vec4 diffuse_light_accum = vec4(0.0);
+    vec4 specular_light_accum = vec4(0.0);
+    vec4 volume_light_accum = vec4(0.0);
+    vec4 emission_accum = vec4(0.0);
+
+    for (int i = 0; i < film_buf.samples_len; i++) {
+      FilmSample src = film_sample_get(i, texel_film);
+      film_sample_accum(src,
+                        film_buf.diffuse_light_id,
+                        RENDER_PASS_LAYER_DIFFUSE_LIGHT,
+                        light_tx,
+                        diffuse_light_accum);
+      film_sample_accum(src,
+                        film_buf.specular_light_id,
+                        RENDER_PASS_LAYER_SPECULAR_LIGHT,
+                        light_tx,
+                        specular_light_accum);
+      film_sample_accum(src, film_buf.volume_light_id, volume_light_tx, volume_light_accum);
+      film_sample_accum(src, film_buf.emission_id, emission_tx, emission_accum);
+    }
+    film_store_color(dst, film_buf.diffuse_light_id, diffuse_light_accum, out_color);
+    film_store_color(dst, film_buf.specular_light_id, specular_light_accum, out_color);
+    film_store_color(dst, film_buf.volume_light_id, volume_light_accum, out_color);
+    film_store_color(dst, film_buf.emission_id, emission_accum, out_color);
+  }
+
+  if (film_buf.any_render_pass_2) {
+    vec4 diffuse_color_accum = vec4(0.0);
+    vec4 specular_color_accum = vec4(0.0);
+    vec4 environment_accum = vec4(0.0);
+    float mist_accum = 0.0;
+    float shadow_accum = 0.0;
+    float ao_accum = 0.0;
+
+    for (int i = 0; i < film_buf.samples_len; i++) {
+      FilmSample src = film_sample_get(i, texel_film);
+      film_sample_accum(src, film_buf.diffuse_color_id, diffuse_color_tx, diffuse_color_accum);
+      film_sample_accum(src, film_buf.specular_color_id, specular_color_tx, specular_color_accum);
+      film_sample_accum(src, film_buf.environment_id, environment_tx, environment_accum);
+      film_sample_accum(src, film_buf.shadow_id, shadow_tx, shadow_accum);
+      film_sample_accum(src, film_buf.ambient_occlusion_id, ambient_occlusion_tx, ao_accum);
+      film_sample_accum_mist(src, mist_accum);
+    }
+    film_store_color(dst, film_buf.diffuse_color_id, diffuse_color_accum, out_color);
+    film_store_color(dst, film_buf.specular_color_id, specular_color_accum, out_color);
+    film_store_color(dst, film_buf.environment_id, environment_accum, out_color);
+    film_store_value(dst, film_buf.shadow_id, shadow_accum, out_color);
+    film_store_value(dst, film_buf.ambient_occlusion_id, ao_accum, out_color);
+    film_store_value(dst, film_buf.mist_id, mist_accum, out_color);
+  }
+
+  for (int aov = 0; aov < film_buf.aov_color_len; aov++) {
+    vec4 aov_accum = vec4(0.0);
+
+    for (int i = 0; i < film_buf.samples_len; i++) {
+      FilmSample src = film_sample_get(i, texel_film);
+      film_sample_accum(src, aov, aov_color_tx, aov_accum);
+    }
+    film_store_color(dst, film_buf.aov_color_id + aov, aov_accum, out_color);
+  }
+
+  for (int aov = 0; aov < film_buf.aov_value_len; aov++) {
+    float aov_accum = 0.0;
+
+    for (int i = 0; i < film_buf.samples_len; i++) {
+      FilmSample src = film_sample_get(i, texel_film);
+      film_sample_accum(src, aov, aov_value_tx, aov_accum);
+    }
+    film_store_value(dst, film_buf.aov_value_id + aov, aov_accum, out_color);
+  }
+
+  if (film_buf.cryptomatte_samples_len != 0) {
+    /* Cryptomatte passes cannot be cleared by a weighted store like other passes. */
+    if (!film_buf.use_history || film_buf.use_reprojection) {
+      cryptomatte_clear_samples(dst);
+    }
+
+    film_cryptomatte_layer_accum_and_store(
+        dst, texel_film, film_buf.cryptomatte_object_id, 0, out_color);
+    film_cryptomatte_layer_accum_and_store(
+        dst, texel_film, film_buf.cryptomatte_asset_id, 1, out_color);
+    film_cryptomatte_layer_accum_and_store(
+        dst, texel_film, film_buf.cryptomatte_material_id, 2, out_color);
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
new file mode 100644
index 00000000000..e93d0f472fa
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
@@ -0,0 +1,24 @@
+
+/**
+ * Debug hiz down sampling pass.
+ * Output red if above any max pixels, blue otherwise.
+ */
+
+void main()
+{
+  ivec2 texel = ivec2(gl_FragCoord.xy);
+
+  float depth0 = texelFetch(hiz_tx, texel, 0).r;
+
+  vec4 color = vec4(0.1, 0.1, 1.0, 1.0);
+  for (int i = 1; i < HIZ_MIP_COUNT; i++) {
+    ivec2 lvl_texel = texel / ivec2(uvec2(1) << uint(i));
+    lvl_texel = min(lvl_texel, textureSize(hiz_tx, i) - 1);
+    if (texelFetch(hiz_tx, lvl_texel, i).r < depth0) {
+      color = vec4(1.0, 0.1, 0.1, 1.0);
+      break;
+    }
+  }
+  out_debug_color_add = vec4(color.rgb, 0.0) * 0.2;
+  out_debug_color_mul = color;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
new file mode 100644
index 00000000000..597bc73e2ad
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
@@ -0,0 +1,121 @@
+
+/**
+ * Shader that down-sample depth buffer, creating a Hierarchical-Z buffer.
+ * Saves max value of each 2x2 texel in the mipmap above the one we are
+ * rendering to. Adapted from
+ * http://rastergrid.com/blog/2010/10/hierarchical-z-map-based-occlusion-culling/
+ *
+ * Major simplification has been made since we pad the buffer to always be
+ * bigger than input to avoid mipmapping misalignement.
+ *
+ * Start by copying the base level by quad loading the depth.
+ * Then each thread compute it's local depth for level 1.
+ * After that we use shared variables to do inter thread comunication and
+ * downsample to max level.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+shared float local_depths[gl_WorkGroupSize.y][gl_WorkGroupSize.x];
+
+/* Load values from the previous lod level. */
+vec4 load_local_depths(ivec2 pixel)
+{
+  pixel *= 2;
+  return vec4(local_depths[pixel.y + 1][pixel.x + 0],
+              local_depths[pixel.y + 1][pixel.x + 1],
+              local_depths[pixel.y + 0][pixel.x + 1],
+              local_depths[pixel.y + 0][pixel.x + 0]);
+}
+
+void store_local_depth(ivec2 pixel, float depth)
+{
+  local_depths[pixel.y][pixel.x] = depth;
+}
+
+void main()
+{
+  ivec2 local_px = ivec2(gl_LocalInvocationID.xy);
+  /* Bottom left corner of the kernel. */
+  ivec2 kernel_origin = ivec2(gl_WorkGroupSize.xy * gl_WorkGroupID.xy);
+
+  /* Copy level 0. */
+  ivec2 src_px = ivec2(kernel_origin + local_px) * 2;
+  vec2 samp_co = (vec2(src_px) + 0.5) / vec2(textureSize(depth_tx, 0));
+  vec4 samp = textureGather(depth_tx, samp_co);
+
+  if (update_mip_0) {
+    imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx);
+    imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy);
+    imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz);
+    imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww);
+  }
+
+  /* Level 1. (No load) */
+  float max_depth = max_v4(samp);
+  ivec2 dst_px = ivec2(kernel_origin + local_px);
+  imageStore(out_mip_1, dst_px, vec4(max_depth));
+  store_local_depth(local_px, max_depth);
+
+  /* Level 2-5. */
+  bool active_thread;
+  int mask_shift = 1;
+
+#define downsample_level(out_mip__, lod_) \
+  active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
+  barrier(); /* Wait for previous writes to finish. */ \
+  if (active_thread) { \
+    max_depth = max_v4(load_local_depths(local_px)); \
+    dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \
+    imageStore(out_mip__, dst_px, vec4(max_depth)); \
+  } \
+  barrier(); /* Wait for previous reads to finish. */ \
+  if (active_thread) { \
+    store_local_depth(local_px, max_depth); \
+  } \
+  mask_shift++;
+
+  downsample_level(out_mip_2, 2);
+  downsample_level(out_mip_3, 3);
+  downsample_level(out_mip_4, 4);
+  downsample_level(out_mip_5, 5);
+
+  /* Since we pad the destination texture, the mip size is equal to the dispatch size. */
+  uint tile_count = uint(imageSize(out_mip_5).x * imageSize(out_mip_5).y);
+  /* Let the last tile handle the remaining LOD. */
+  bool last_tile = atomicAdd(finished_tile_counter, 1u) + 1u < tile_count;
+  if (last_tile == false) {
+    return;
+  }
+  finished_tile_counter = 0u;
+
+  ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
+  ivec2 image_border = imageSize(out_mip_5) - 1;
+  for (int y = 0; y < iter.y; y++) {
+    for (int x = 0; x < iter.x; x++) {
+      /* Load result of the other work groups. */
+      kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
+      src_px = ivec2(kernel_origin + local_px) * 2;
+      vec4 samp;
+      samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
+      samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x;
+      samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x;
+      samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x;
+      /* Level 6. */
+      float max_depth = max_v4(samp);
+      ivec2 dst_px = ivec2(kernel_origin + local_px);
+      imageStore(out_mip_6, dst_px, vec4(max_depth));
+      store_local_depth(local_px, max_depth);
+
+      mask_shift = 1;
+
+      /* Level 7. */
+      downsample_level(out_mip_7, 7);
+
+      /* Limited by OpenGL maximum of 8 image slot. */
+      // downsample_level(out_mip_8, 8);
+      // downsample_level(out_mip_9, 9);
+      // downsample_level(out_mip_10, 10);
+    }
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
new file mode 100644
index 00000000000..eefc024d0b8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
@@ -0,0 +1,54 @@
+
+/**
+ * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
+ * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
+ * pass is not conservative enough).
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_FragCoord.xy);
+
+  float depth = texelFetch(hiz_tx, texel, 0).r;
+  float vP_z = get_view_z_from_depth(depth);
+  vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
+
+  float light_count = 0.0;
+  uint light_cull = 0u;
+  vec2 px = gl_FragCoord.xy;
+  LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx)
+  {
+    LightData light = light_buf[l_idx];
+    light_cull |= 1u << l_idx;
+    light_count += 1.0;
+  }
+  LIGHT_FOREACH_END
+
+  uint light_nocull = 0u;
+  LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(light_cull_buf, l_idx)
+  {
+    LightData light = light_buf[l_idx];
+    vec3 L;
+    float dist;
+    light_vector_get(light, P, L, dist);
+    if (light_attenuation(light, L, dist) > 0.0) {
+      light_nocull |= 1u << l_idx;
+    }
+  }
+  LIGHT_FOREACH_END
+
+  vec4 color = vec4(heatmap_gradient(light_count / 4.0), 1.0);
+
+  if ((light_cull & light_nocull) != light_nocull) {
+    /* ERROR. Some lights were culled incorrectly. */
+    color = vec4(0.0, 1.0, 0.0, 1.0);
+  }
+
+  out_debug_color_add = vec4(color.rgb, 0.0) * 0.2;
+  out_debug_color_mul = color;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
new file mode 100644
index 00000000000..9c12b0e50e6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
@@ -0,0 +1,62 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+
+void main()
+{
+  uint l_idx = gl_GlobalInvocationID.x;
+  if (l_idx >= light_cull_buf.items_count) {
+    return;
+  }
+
+  LightData light = in_light_buf[l_idx];
+
+  /* Do not select 0 power lights. */
+  if (light.influence_radius_max < 1e-8) {
+    return;
+  }
+
+  /* Sun lights are packed at the end of the array. Perform early copy. */
+  if (light.type == LIGHT_SUN) {
+    /* NOTE: We know the index because sun lights are packed at the start of the input buffer. */
+    out_light_buf[light_cull_buf.local_lights_len + l_idx] = light;
+    return;
+  }
+
+  Sphere sphere;
+  switch (light.type) {
+    case LIGHT_SPOT:
+      /* Only for < ~170° Cone due to plane extraction precision. */
+      if (light.spot_tan < 10.0) {
+        Pyramid pyramid = shape_pyramid_non_oblique(
+            light._position,
+            light._position - light._back * light.influence_radius_max,
+            light._right * light.influence_radius_max * light.spot_tan / light.spot_size_inv.x,
+            light._up * light.influence_radius_max * light.spot_tan / light.spot_size_inv.y);
+        if (!intersect_view(pyramid)) {
+          return;
+        }
+      }
+    case LIGHT_RECT:
+    case LIGHT_ELLIPSE:
+    case LIGHT_POINT:
+      sphere = Sphere(light._position, light.influence_radius_max);
+      break;
+  }
+
+  /* TODO(fclem): HiZ culling? Could be quite beneficial given the nature of the 2.5D culling. */
+
+  /* TODO(fclem): Small light culling / fading? */
+
+  if (intersect_view(sphere)) {
+    uint index = atomicAdd(light_cull_buf.visible_count, 1u);
+
+    out_zdist_buf[index] = dot(cameraForward, light._position) - dot(cameraForward, cameraPos);
+    out_key_buf[index] = l_idx;
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
new file mode 100644
index 00000000000..e98b170cd4c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer.
+ * One thread processes one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+shared float zdists_cache[gl_WorkGroupSize.x];
+
+void main()
+{
+  uint src_index = gl_GlobalInvocationID.x;
+  bool valid_thread = true;
+
+  if (src_index >= light_cull_buf.visible_count) {
+    /* Do not return because we use barriers later on (which need uniform control flow).
+     * Just process the same last item but avoid insertion. */
+    src_index = light_cull_buf.visible_count - 1;
+    valid_thread = false;
+  }
+
+  float local_zdist = in_zdist_buf[src_index];
+
+  int prefix_sum = 0;
+  /* Iterate over the whole key buffer. */
+  uint iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+  for (uint i = 0u; i < iter; i++) {
+    uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x;
+    /* NOTE: This will load duplicated values, but they will be discarded. */
+    index = min(index, light_cull_buf.visible_count - 1);
+    zdists_cache[gl_LocalInvocationID.x] = in_zdist_buf[index];
+
+    barrier();
+
+    /* Iterate over the cache line. */
+    uint line_end = min(gl_WorkGroupSize.x, light_cull_buf.visible_count - gl_WorkGroupSize.x * i);
+    for (uint j = 0u; j < line_end; j++) {
+      if (zdists_cache[j] < local_zdist) {
+        prefix_sum++;
+      }
+      else if (zdists_cache[j] == local_zdist) {
+        /* Same depth, use index to order and avoid same prefix for 2 different lights. */
+        if ((gl_WorkGroupSize.x * i + j) < src_index) {
+          prefix_sum++;
+        }
+      }
+    }
+  }
+
+  if (valid_thread) {
+    /* Copy sorted light to render light buffer. */
+    uint input_index = in_key_buf[src_index];
+    out_light_buf[prefix_sum] = in_light_buf[input_index];
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
new file mode 100644
index 00000000000..37705e22b22
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
@@ -0,0 +1,188 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Culling shapes extraction
+ * \{ */
+
+struct CullingTile {
+  IsectFrustum frustum;
+  vec4 bounds;
+};
+
+/* Corners are expected to be in viewspace so that the cone is starting from the origin.
+ * Corner order does not matter. */
+vec4 tile_bound_cone(vec3 v00, vec3 v01, vec3 v10, vec3 v11)
+{
+  v00 = normalize(v00);
+  v01 = normalize(v01);
+  v10 = normalize(v10);
+  v11 = normalize(v11);
+  vec3 center = normalize(v00 + v01 + v10 + v11);
+  float angle_cosine = dot(center, v00);
+  angle_cosine = max(angle_cosine, dot(center, v01));
+  angle_cosine = max(angle_cosine, dot(center, v10));
+  angle_cosine = max(angle_cosine, dot(center, v11));
+  return vec4(center, angle_cosine);
+}
+
+/* Corners are expected to be in viewspace. Returns Z-aligned bounding cylinder.
+ * Corner order does not matter. */
+vec4 tile_bound_cylinder(vec3 v00, vec3 v01, vec3 v10, vec3 v11)
+{
+  vec3 center = (v00 + v01 + v10 + v11) * 0.25;
+  vec4 corners_dist;
+  float dist_sqr = distance_squared(center, v00);
+  dist_sqr = max(dist_sqr, distance_squared(center, v01));
+  dist_sqr = max(dist_sqr, distance_squared(center, v10));
+  dist_sqr = max(dist_sqr, distance_squared(center, v11));
+  /* Return a cone. Later converted to cylinder. */
+  return vec4(center, sqrt(dist_sqr));
+}
+
+vec2 tile_to_ndc(vec2 tile_co, vec2 offset)
+{
+  /* Add a margin to prevent culling too much if the frustum becomes too much unstable. */
+  const float margin = 0.02;
+  tile_co += margin * (offset * 2.0 - 1.0);
+
+  tile_co += offset;
+  return tile_co * light_cull_buf.tile_to_uv_fac * 2.0 - 1.0;
+}
+
+CullingTile tile_culling_get(uvec2 tile_co)
+{
+  vec2 ftile = vec2(tile_co);
+  /* Culling frustum corners for this tile. */
+  vec3 corners[8];
+  /* Follow same corners order as view frustum. */
+  corners[1].xy = corners[0].xy = tile_to_ndc(ftile, vec2(0, 0));
+  corners[5].xy = corners[4].xy = tile_to_ndc(ftile, vec2(1, 0));
+  corners[6].xy = corners[7].xy = tile_to_ndc(ftile, vec2(1, 1));
+  corners[2].xy = corners[3].xy = tile_to_ndc(ftile, vec2(0, 1));
+  corners[1].z = corners[5].z = corners[6].z = corners[2].z = -1.0;
+  corners[0].z = corners[4].z = corners[7].z = corners[3].z = 1.0;
+
+  for (int i = 0; i < 8; i++) {
+    /* Culling in view space for precision. */
+    corners[i] = project_point(ProjectionMatrixInverse, corners[i]);
+  }
+
+  bool is_persp = ProjectionMatrix[3][3] == 0.0;
+  CullingTile tile;
+  tile.bounds = (is_persp) ? tile_bound_cone(corners[0], corners[4], corners[7], corners[3]) :
+                             tile_bound_cylinder(corners[0], corners[4], corners[7], corners[3]);
+
+  tile.frustum = isect_data_setup(shape_frustum(corners));
+  return tile;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Intersection Tests
+ * \{ */
+
+bool intersect(CullingTile tile, Sphere sphere)
+{
+  bool isect = true;
+  /* Test tile intersection using bounding cone or bounding cylinder.
+   * This has less false positive cases when the sphere is large. */
+  if (ProjectionMatrix[3][3] == 0.0) {
+    isect = intersect(shape_cone(tile.bounds.xyz, tile.bounds.w), sphere);
+  }
+  else {
+    /* Simplify to a 2D circle test on the view Z axis plane. */
+    isect = intersect(shape_circle(tile.bounds.xy, tile.bounds.w),
+                      shape_circle(sphere.center.xy, sphere.radius));
+  }
+  /* Refine using frustum test. If the sphere is small it avoids intersection
+   * with a neighbor tile. */
+  if (isect) {
+    isect = intersect(tile.frustum, sphere);
+  }
+  return isect;
+}
+
+bool intersect(CullingTile tile, Box bbox)
+{
+  return intersect(tile.frustum, bbox);
+}
+
+bool intersect(CullingTile tile, Pyramid pyramid)
+{
+  return intersect(tile.frustum, pyramid);
+}
+
+/** \} */
+
+void main()
+{
+  uint word_idx = gl_GlobalInvocationID.x % light_cull_buf.tile_word_len;
+  uint tile_idx = gl_GlobalInvocationID.x / light_cull_buf.tile_word_len;
+  uvec2 tile_co = uvec2(tile_idx % light_cull_buf.tile_x_len,
+                        tile_idx / light_cull_buf.tile_x_len);
+
+  if (tile_co.y >= light_cull_buf.tile_y_len) {
+    return;
+  }
+
+  /* TODO(fclem): We could stop the tile at the HiZ depth. */
+  CullingTile tile = tile_culling_get(tile_co);
+
+  uint l_idx = word_idx * 32u;
+  uint l_end = min(l_idx + 32u, light_cull_buf.visible_count);
+  uint word = 0u;
+  for (; l_idx < l_end; l_idx++) {
+    LightData light = light_buf[l_idx];
+
+    /* Culling in view space for precision and simplicity. */
+    vec3 vP = transform_point(ViewMatrix, light._position);
+    vec3 v_right = transform_direction(ViewMatrix, light._right);
+    vec3 v_up = transform_direction(ViewMatrix, light._up);
+    vec3 v_back = transform_direction(ViewMatrix, light._back);
+    float radius = light.influence_radius_max;
+
+    Sphere sphere = shape_sphere(vP, radius);
+    bool intersect_tile = intersect(tile, sphere);
+
+    switch (light.type) {
+      case LIGHT_SPOT:
+        /* Only for < ~170° Cone due to plane extraction precision. */
+        if (light.spot_tan < 10.0) {
+          Pyramid pyramid = shape_pyramid_non_oblique(
+              vP,
+              vP - v_back * radius,
+              v_right * radius * light.spot_tan / light.spot_size_inv.x,
+              v_up * radius * light.spot_tan / light.spot_size_inv.y);
+          intersect_tile = intersect_tile && intersect(tile, pyramid);
+          break;
+        }
+        /* Fallthrough to the hemispheric case. */
+      case LIGHT_RECT:
+      case LIGHT_ELLIPSE:
+        vec3 v000 = vP - v_right * radius - v_up * radius;
+        vec3 v100 = v000 + v_right * (radius * 2.0);
+        vec3 v010 = v000 + v_up * (radius * 2.0);
+        vec3 v001 = v000 - v_back * radius;
+        Box bbox = shape_box(v000, v100, v010, v001);
+        intersect_tile = intersect_tile && intersect(tile, bbox);
+      default:
+        break;
+    }
+
+    if (intersect_tile) {
+      word |= 1u << (l_idx % 32u);
+    }
+  }
+
+  out_light_tile_buf[gl_GlobalInvocationID.x] = word;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
new file mode 100644
index 00000000000..ae20153f26c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
@@ -0,0 +1,56 @@
+
+/**
+ * Create the Zbins from Z-sorted lights.
+ * Perform min-max operation in LDS memory for speed.
+ * For this reason, we only dispatch 1 thread group.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* Fits the limit of 32KB. */
+shared uint zbin_max[CULLING_ZBIN_COUNT];
+shared uint zbin_min[CULLING_ZBIN_COUNT];
+
+void main()
+{
+  const uint zbin_iter = CULLING_ZBIN_COUNT / gl_WorkGroupSize.x;
+  const uint zbin_local = gl_LocalInvocationID.x * zbin_iter;
+
+  uint src_index = gl_GlobalInvocationID.x;
+
+  for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) {
+    zbin_max[l] = 0x0u;
+    zbin_min[l] = ~0x0u;
+  }
+  barrier();
+
+  uint light_iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+  for (uint i = 0u; i < light_iter; i++) {
+    uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
+    if (index >= light_cull_buf.visible_count) {
+      continue;
+    }
+    vec3 P = light_buf[index]._position;
+    /* TODO(fclem): Could have better bounds for spot and area lights. */
+    float radius = light_buf[index].influence_radius_max;
+    float z_dist = dot(cameraForward, P) - dot(cameraForward, cameraPos);
+    int z_min = culling_z_to_zbin(
+        light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist + radius);
+    int z_max = culling_z_to_zbin(
+        light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist - radius);
+    z_min = clamp(z_min, 0, CULLING_ZBIN_COUNT - 1);
+    z_max = clamp(z_max, 0, CULLING_ZBIN_COUNT - 1);
+    /* Register to Z bins. */
+    for (int z = z_min; z <= z_max; z++) {
+      atomicMin(zbin_min[z], index);
+      atomicMax(zbin_max[z], index);
+    }
+  }
+  barrier();
+
+  /* Write result to zbins buffer. Pack min & max into 1 uint. */
+  for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) {
+    out_zbin_buf[l] = (zbin_max[l] << 16u) | (zbin_min[l] & 0xFFFFu);
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl
new file mode 100644
index 00000000000..d4abdd43aa4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl
@@ -0,0 +1,129 @@
+
+/**
+ * The resources expected to be defined are:
+ * - light_buf
+ * - light_zbin_buf
+ * - light_cull_buf
+ * - light_tile_buf
+ * - shadow_atlas_tx
+ * - shadow_tilemaps_tx
+ * - sss_transmittance_tx
+ * - utility_tx
+ */
+
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
+
+/* TODO(fclem): We could reduce register pressure by only having static branches for sun lights. */
+void light_eval_ex(ClosureDiffuse diffuse,
+                   ClosureReflection reflection,
+                   const bool is_directional,
+                   vec3 P,
+                   vec3 V,
+                   float vP_z,
+                   float thickness,
+                   vec4 ltc_mat,
+                   uint l_idx,
+                   inout vec3 out_diffuse,
+                   inout vec3 out_specular)
+{
+  LightData light = light_buf[l_idx];
+  vec3 L;
+  float dist;
+  light_vector_get(light, P, L, dist);
+
+  float visibility = light_attenuation(light, L, dist);
+
+#if 0 /* TODO(fclem): Shadows */
+  if ((light.shadow_id != LIGHT_NO_SHADOW) && (visibility > 0.0)) {
+    vec3 lL = light_world_to_local(light, -L) * dist;
+
+    float shadow_delta = shadow_delta_get(
+        shadow_atlas_tx, shadow_tilemaps_tx, light, light.shadow_data, lL, dist, P);
+
+#  ifdef SSS_TRANSMITTANCE
+    /* Transmittance evaluation first to use initial visibility. */
+    if (diffuse.sss_id != 0u && light.diffuse_power > 0.0) {
+      float delta = max(thickness, shadow_delta);
+
+      vec3 intensity = visibility * light.transmit_power *
+                       light_translucent(sss_transmittance_tx,
+                                         is_directional,
+                                         light,
+                                         diffuse.N,
+                                         L,
+                                         dist,
+                                         diffuse.sss_radius,
+                                         delta);
+      out_diffuse += light.color * intensity;
+    }
+#  endif
+
+    visibility *= float(shadow_delta - light.shadow_data.bias <= 0.0);
+  }
+#endif
+
+  if (visibility < 1e-6) {
+    return;
+  }
+
+  if (light.diffuse_power > 0.0) {
+    float intensity = visibility * light.diffuse_power *
+                      light_diffuse(utility_tx, is_directional, light, diffuse.N, V, L, dist);
+    out_diffuse += light.color * intensity;
+  }
+
+  if (light.specular_power > 0.0) {
+    float intensity = visibility * light.specular_power *
+                      light_ltc(
+                          utility_tx, is_directional, light, reflection.N, V, L, dist, ltc_mat);
+    out_specular += light.color * intensity;
+  }
+}
+
+void light_eval(ClosureDiffuse diffuse,
+                ClosureReflection reflection,
+                vec3 P,
+                vec3 V,
+                float vP_z,
+                float thickness,
+                inout vec3 out_diffuse,
+                inout vec3 out_specular)
+{
+  vec2 uv = vec2(reflection.roughness, safe_sqrt(1.0 - dot(reflection.N, V)));
+  uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS;
+  vec4 ltc_mat = utility_tx_sample(utility_tx, uv, UTIL_LTC_MAT_LAYER);
+
+  LIGHT_FOREACH_BEGIN_DIRECTIONAL(light_cull_buf, l_idx)
+  {
+    light_eval_ex(diffuse,
+                  reflection,
+                  true,
+                  P,
+                  V,
+                  vP_z,
+                  thickness,
+                  ltc_mat,
+                  l_idx,
+                  out_diffuse,
+                  out_specular);
+  }
+  LIGHT_FOREACH_END
+
+  vec2 px = gl_FragCoord.xy;
+  LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx)
+  {
+    light_eval_ex(diffuse,
+                  reflection,
+                  false,
+                  P,
+                  V,
+                  vP_z,
+                  thickness,
+                  ltc_mat,
+                  l_idx,
+                  out_diffuse,
+                  out_specular);
+  }
+  LIGHT_FOREACH_END
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
new file mode 100644
index 00000000000..22a5f98e6c3
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
@@ -0,0 +1,72 @@
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
+{
+  uint word_start = word_index * 32u;
+  uint word_end = word_start + 31u;
+  uint local_min = max(zbin_min, word_start);
+  uint local_max = min(zbin_max, word_end);
+  uint mask_width = local_max - local_min + 1;
+  return bit_field_mask(mask_width, local_min);
+}
+
+int culling_z_to_zbin(float scale, float bias, float z)
+{
+  return int(z * scale + bias);
+}
+
+/* Waiting to implement extensions support. We need:
+ * - GL_KHR_shader_subgroup_ballot
+ * - GL_KHR_shader_subgroup_arithmetic
+ * or
+ * - Vulkan 1.1
+ */
+#if 1
+#  define subgroupMin(a) a
+#  define subgroupMax(a) a
+#  define subgroupOr(a) a
+#  define subgroupBroadcastFirst(a) a
+#endif
+
+#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \
+  { \
+    { \
+      for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) {
+
+#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \
+  { \
+    uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \
+    uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+                            _culling.tile_word_len; \
+    int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \
+    zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+    uint zbin_data = _zbins[zbin_index]; \
+    uint min_index = zbin_data & 0xFFFFu; \
+    uint max_index = zbin_data >> 16u; \
+    /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+    min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+    max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+    /* Same as divide by 32 but avoid interger division. */ \
+    uint word_min = min_index >> 5u; \
+    uint word_max = max_index >> 5u; \
+    for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+      uint word = _words[tile_word_offset + word_idx]; \
+      word &= zbin_mask(word_idx, min_index, max_index); \
+      /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+      word = subgroupBroadcastFirst(subgroupOr(word)); \
+      int bit_index; \
+      while ((bit_index = findLSB(word)) != -1) { \
+        word &= ~1u << uint(bit_index); \
+        uint _item_index = word_idx * 32u + bit_index;
+
+/* No culling. Iterate over all items. */
+#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \
+  { \
+    { \
+      for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) {
+
+#define LIGHT_FOREACH_END \
+  } \
+  } \
+  }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl
new file mode 100644
index 00000000000..58608f6e1f0
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl
@@ -0,0 +1,209 @@
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_ltc_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Light Functions
+ * \{ */
+
+void light_vector_get(LightData ld, vec3 P, out vec3 L, out float dist)
+{
+  if (ld.type == LIGHT_SUN) {
+    L = ld._back;
+    dist = 1.0;
+  }
+  else {
+    L = ld._position - P;
+    dist = inversesqrt(len_squared(L));
+    L *= dist;
+    dist = 1.0 / dist;
+  }
+}
+
+/* Rotate vector to light's local space. Does not translate. */
+vec3 light_world_to_local(LightData ld, vec3 L)
+{
+  /* Avoid relying on compiler to optimize this.
+   * vec3 lL = transpose(mat3(ld.object_mat)) * L; */
+  vec3 lL;
+  lL.x = dot(ld.object_mat[0].xyz, L);
+  lL.y = dot(ld.object_mat[1].xyz, L);
+  lL.z = dot(ld.object_mat[2].xyz, L);
+  return lL;
+}
+
+/* From Frostbite PBR Course
+ * Distance based attenuation
+ * http://www.frostbite.com/wp-content/uploads/2014/11/course_notes_moving_frostbite_to_pbr.pdf */
+float light_influence_attenuation(float dist, float inv_sqr_influence)
+{
+  float factor = sqr(dist) * inv_sqr_influence;
+  float fac = saturate(1.0 - sqr(factor));
+  return sqr(fac);
+}
+
+float light_spot_attenuation(LightData ld, vec3 L)
+{
+  vec3 lL = light_world_to_local(ld, L);
+  float ellipse = inversesqrt(1.0 + len_squared(lL.xy * ld.spot_size_inv / lL.z));
+  float spotmask = smoothstep(0.0, 1.0, ellipse * ld._spot_mul + ld._spot_bias);
+  return spotmask;
+}
+
+float light_attenuation(LightData ld, vec3 L, float dist)
+{
+  float vis = 1.0;
+  if (ld.type == LIGHT_SPOT) {
+    vis *= light_spot_attenuation(ld, L);
+  }
+  if (ld.type >= LIGHT_SPOT) {
+    vis *= step(0.0, -dot(L, -ld._back));
+  }
+  if (ld.type != LIGHT_SUN) {
+#ifdef VOLUME_LIGHTING
+    vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_volume);
+#else
+    vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_surface);
+#endif
+  }
+  return vis;
+}
+
+/* Cheaper alternative than evaluating the LTC.
+ * The result needs to be multiplied by BSDF or Phase Function. */
+float light_point_light(LightData ld, const bool is_directional, vec3 L, float dist)
+{
+  if (is_directional) {
+    return 1.0;
+  }
+  /**
+   * Using "Point Light Attenuation Without Singularity" from Cem Yuksel
+   * http://www.cemyuksel.com/research/pointlightattenuation/pointlightattenuation.pdf
+   * http://www.cemyuksel.com/research/pointlightattenuation/
+   **/
+  float d_sqr = sqr(dist);
+  float r_sqr = ld.radius_squared;
+  /* Using reformulation that has better numerical percision. */
+  float power = 2.0 / (d_sqr + r_sqr + dist * sqrt(d_sqr + r_sqr));
+
+  if (is_area_light(ld.type)) {
+    /* Modulate by light plane orientation / solid angle. */
+    power *= saturate(dot(ld._back, L));
+  }
+  return power;
+}
+
+float light_diffuse(sampler2DArray utility_tx,
+                    const bool is_directional,
+                    LightData ld,
+                    vec3 N,
+                    vec3 V,
+                    vec3 L,
+                    float dist)
+{
+  if (is_directional || !is_area_light(ld.type)) {
+    float radius = ld._radius / dist;
+    return ltc_evaluate_disk_simple(utility_tx, radius, dot(N, L));
+  }
+  else if (ld.type == LIGHT_RECT) {
+    vec3 corners[4];
+    corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+    corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y;
+    corners[2] = -corners[0];
+    corners[3] = -corners[1];
+
+    corners[0] = normalize(L * dist + corners[0]);
+    corners[1] = normalize(L * dist + corners[1]);
+    corners[2] = normalize(L * dist + corners[2]);
+    corners[3] = normalize(L * dist + corners[3]);
+
+    return ltc_evaluate_quad(utility_tx, corners, N);
+  }
+  else /* (ld.type == LIGHT_ELLIPSE) */ {
+    vec3 points[3];
+    points[0] = ld._right * -ld._area_size_x + ld._up * -ld._area_size_y;
+    points[1] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+    points[2] = -points[0];
+
+    points[0] += L * dist;
+    points[1] += L * dist;
+    points[2] += L * dist;
+
+    return ltc_evaluate_disk(utility_tx, N, V, mat3(1.0), points);
+  }
+}
+
+float light_ltc(sampler2DArray utility_tx,
+                const bool is_directional,
+                LightData ld,
+                vec3 N,
+                vec3 V,
+                vec3 L,
+                float dist,
+                vec4 ltc_mat)
+{
+  if (is_directional || ld.type != LIGHT_RECT) {
+    vec3 Px = ld._right;
+    vec3 Py = ld._up;
+
+    if (is_directional || !is_area_light(ld.type)) {
+      make_orthonormal_basis(L, Px, Py);
+    }
+
+    vec3 points[3];
+    points[0] = Px * -ld._area_size_x + Py * -ld._area_size_y;
+    points[1] = Px * ld._area_size_x + Py * -ld._area_size_y;
+    points[2] = -points[0];
+
+    points[0] += L * dist;
+    points[1] += L * dist;
+    points[2] += L * dist;
+
+    return ltc_evaluate_disk(utility_tx, N, V, ltc_matrix(ltc_mat), points);
+  }
+  else {
+    vec3 corners[4];
+    corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+    corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y;
+    corners[2] = -corners[0];
+    corners[3] = -corners[1];
+
+    corners[0] += L * dist;
+    corners[1] += L * dist;
+    corners[2] += L * dist;
+    corners[3] += L * dist;
+
+    ltc_transform_quad(N, V, ltc_matrix(ltc_mat), corners);
+
+    return ltc_evaluate_quad(utility_tx, corners, vec3(0.0, 0.0, 1.0));
+  }
+}
+
+vec3 light_translucent(sampler1D transmittance_tx,
+                       const bool is_directional,
+                       LightData ld,
+                       vec3 N,
+                       vec3 L,
+                       float dist,
+                       vec3 sss_radius,
+                       float delta)
+{
+  /* TODO(fclem): We should compute the power at the entry point. */
+  /* NOTE(fclem): we compute the light attenuation using the light vector but the transmittance
+   * using the shadow depth delta. */
+  float power = light_point_light(ld, is_directional, L, dist);
+  /* Do not add more energy on front faces. Also apply lambertian BSDF. */
+  power *= max(0.0, dot(-N, L)) * M_1_PI;
+
+  sss_radius *= SSS_TRANSMIT_LUT_RADIUS;
+  vec3 channels_co = saturate(delta / sss_radius) * SSS_TRANSMIT_LUT_SCALE + SSS_TRANSMIT_LUT_BIAS;
+
+  vec3 translucency;
+  translucency.x = (sss_radius.x > 0.0) ? texture(transmittance_tx, channels_co.x).r : 0.0;
+  translucency.y = (sss_radius.y > 0.0) ? texture(transmittance_tx, channels_co.y).r : 0.0;
+  translucency.z = (sss_radius.z > 0.0) ? texture(transmittance_tx, channels_co.z).r : 0.0;
+  return translucency * power;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl
new file mode 100644
index 00000000000..57e92b0b9b4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl
@@ -0,0 +1,299 @@
+
+/**
+ * Adapted from :
+ * Real-Time Polygonal-Light Shading with Linearly Transformed Cosines.
+ * Eric Heitz, Jonathan Dupuy, Stephen Hill and David Neubelt.
+ * ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH 2016) 35(4), 2016.
+ * Project page: https://eheitzresearch.wordpress.com/415-2/
+ */
+
+/* Diffuse *clipped* sphere integral. */
+float ltc_diffuse_sphere_integral(sampler2DArray utility_tx, float avg_dir_z, float form_factor)
+{
+#if 1
+  /* use tabulated horizon-clipped sphere */
+  vec2 uv = vec2(avg_dir_z * 0.5 + 0.5, form_factor);
+  uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS;
+
+  return texture(utility_tx, vec3(uv, UTIL_DISK_INTEGRAL_LAYER))[UTIL_DISK_INTEGRAL_COMP];
+#else
+  /* Cheap approximation. Less smooth and have energy issues. */
+  return max((form_factor * form_factor + avg_dir_z) / (form_factor + 1.0), 0.0);
+#endif
+}
+
+/**
+ * An extended version of the implementation from
+ * "How to solve a cubic equation, revisited"
+ * http://momentsingraphics.de/?p=105
+ */
+vec3 ltc_solve_cubic(vec4 coefs)
+{
+  /* Normalize the polynomial */
+  coefs.xyz /= coefs.w;
+  /* Divide middle coefficients by three */
+  coefs.yz /= 3.0;
+
+  float A = coefs.w;
+  float B = coefs.z;
+  float C = coefs.y;
+  float D = coefs.x;
+
+  /* Compute the Hessian and the discriminant */
+  vec3 delta = vec3(-coefs.zy * coefs.zz + coefs.yx, dot(vec2(coefs.z, -coefs.y), coefs.xy));
+
+  /* Discriminant */
+  float discr = dot(vec2(4.0 * delta.x, -delta.y), delta.zy);
+
+  /* Clamping avoid NaN output on some platform. (see T67060) */
+  float sqrt_discr = sqrt(clamp(discr, 0.0, FLT_MAX));
+
+  vec2 xlc, xsc;
+
+  /* Algorithm A */
+  {
+    float A_a = 1.0;
+    float C_a = delta.x;
+    float D_a = -2.0 * B * delta.x + delta.y;
+
+    /* Take the cubic root of a normalized complex number */
+    float theta = atan(sqrt_discr, -D_a) / 3.0;
+
+    float _2_sqrt_C_a = 2.0 * sqrt(-C_a);
+    float x_1a = _2_sqrt_C_a * cos(theta);
+    float x_3a = _2_sqrt_C_a * cos(theta + (2.0 / 3.0) * M_PI);
+
+    float xl;
+    if ((x_1a + x_3a) > 2.0 * B) {
+      xl = x_1a;
+    }
+    else {
+      xl = x_3a;
+    }
+
+    xlc = vec2(xl - B, A);
+  }
+
+  /* Algorithm D */
+  {
+    float A_d = D;
+    float C_d = delta.z;
+    float D_d = -D * delta.y + 2.0 * C * delta.z;
+
+    /* Take the cubic root of a normalized complex number */
+    float theta = atan(D * sqrt_discr, -D_d) / 3.0;
+
+    float _2_sqrt_C_d = 2.0 * sqrt(-C_d);
+    float x_1d = _2_sqrt_C_d * cos(theta);
+    float x_3d = _2_sqrt_C_d * cos(theta + (2.0 / 3.0) * M_PI);
+
+    float xs;
+    if (x_1d + x_3d < 2.0 * C) {
+      xs = x_1d;
+    }
+    else {
+      xs = x_3d;
+    }
+
+    xsc = vec2(-D, xs + C);
+  }
+
+  float E = xlc.y * xsc.y;
+  float F = -xlc.x * xsc.y - xlc.y * xsc.x;
+  float G = xlc.x * xsc.x;
+
+  vec2 xmc = vec2(C * F - B * G, -B * F + C * E);
+
+  vec3 root = vec3(xsc.x / xsc.y, xmc.x / xmc.y, xlc.x / xlc.y);
+
+  if (root.x < root.y && root.x < root.z) {
+    root.xyz = root.yxz;
+  }
+  else if (root.z < root.x && root.z < root.y) {
+    root.xyz = root.xzy;
+  }
+
+  return root;
+}
+
+/* from Real-Time Area Lighting: a Journey from Research to Production
+ * Stephen Hill and Eric Heitz */
+vec3 ltc_edge_integral_vec(vec3 v1, vec3 v2)
+{
+  float x = dot(v1, v2);
+  float y = abs(x);
+
+  float a = 0.8543985 + (0.4965155 + 0.0145206 * y) * y;
+  float b = 3.4175940 + (4.1616724 + y) * y;
+  float v = a / b;
+
+  float theta_sintheta = (x > 0.0) ? v : 0.5 * inversesqrt(max(1.0 - x * x, 1e-7)) - v;
+
+  return cross(v1, v2) * theta_sintheta;
+}
+
+mat3 ltc_matrix(vec4 lut)
+{
+  /* Load inverse matrix. */
+  return mat3(vec3(lut.x, 0, lut.y), vec3(0, 1, 0), vec3(lut.z, 0, lut.w));
+}
+
+void ltc_transform_quad(vec3 N, vec3 V, mat3 Minv, inout vec3 corners[4])
+{
+  /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */
+  V = normalize(V + 1e-8);
+
+  /* Construct orthonormal basis around N. */
+  vec3 T1, T2;
+  T1 = normalize(V - N * dot(N, V));
+  T2 = cross(N, T1);
+
+  /* Rotate area light in (T1, T2, R) basis. */
+  Minv = Minv * transpose(mat3(T1, T2, N));
+
+  /* Apply LTC inverse matrix. */
+  corners[0] = normalize(Minv * corners[0]);
+  corners[1] = normalize(Minv * corners[1]);
+  corners[2] = normalize(Minv * corners[2]);
+  corners[3] = normalize(Minv * corners[3]);
+}
+
+/* If corners have already pass through ltc_transform_quad(),
+ * then N **MUST** be vec3(0.0, 0.0, 1.0), corresponding to the Up axis of the shading basis. */
+float ltc_evaluate_quad(sampler2DArray utility_tx, vec3 corners[4], vec3 N)
+{
+  /* Approximation using a sphere of the same solid angle than the quad.
+   * Finding the clipped sphere diffuse integral is easier than clipping the quad. */
+  vec3 avg_dir;
+  avg_dir = ltc_edge_integral_vec(corners[0], corners[1]);
+  avg_dir += ltc_edge_integral_vec(corners[1], corners[2]);
+  avg_dir += ltc_edge_integral_vec(corners[2], corners[3]);
+  avg_dir += ltc_edge_integral_vec(corners[3], corners[0]);
+
+  float form_factor = length(avg_dir);
+  float avg_dir_z = dot(N, avg_dir / form_factor);
+  return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir_z, form_factor);
+}
+
+/* If disk does not need to be transformed and is already front facing. */
+float ltc_evaluate_disk_simple(sampler2DArray utility_tx, float disk_radius, float NL)
+{
+  float r_sqr = disk_radius * disk_radius;
+  float one_r_sqr = 1.0 + r_sqr;
+  float form_factor = r_sqr * inversesqrt(one_r_sqr * one_r_sqr);
+  return form_factor * ltc_diffuse_sphere_integral(utility_tx, NL, form_factor);
+}
+
+/* disk_points are WS vectors from the shading point to the disk "bounding domain" */
+float ltc_evaluate_disk(sampler2DArray utility_tx, vec3 N, vec3 V, mat3 Minv, vec3 disk_points[3])
+{
+  /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */
+  V = normalize(V + 1e-8);
+
+  /* construct orthonormal basis around N */
+  vec3 T1, T2;
+  T1 = normalize(V - N * dot(V, N));
+  T2 = cross(N, T1);
+
+  /* rotate area light in (T1, T2, R) basis */
+  mat3 R = transpose(mat3(T1, T2, N));
+
+  /* Intermediate step: init ellipse. */
+  vec3 L_[3];
+  L_[0] = mul(R, disk_points[0]);
+  L_[1] = mul(R, disk_points[1]);
+  L_[2] = mul(R, disk_points[2]);
+
+  vec3 C = 0.5 * (L_[0] + L_[2]);
+  vec3 V1 = 0.5 * (L_[1] - L_[2]);
+  vec3 V2 = 0.5 * (L_[1] - L_[0]);
+
+  /* Transform ellipse by Minv. */
+  C = Minv * C;
+  V1 = Minv * V1;
+  V2 = Minv * V2;
+
+  /* Compute eigenvectors of new ellipse. */
+
+  float d11 = dot(V1, V1);
+  float d22 = dot(V2, V2);
+  float d12 = dot(V1, V2);
+  float a, b;                     /* Eigenvalues */
+  const float threshold = 0.0007; /* Can be adjusted. Fix artifacts. */
+  if (abs(d12) / sqrt(d11 * d22) > threshold) {
+    float tr = d11 + d22;
+    float det = -d12 * d12 + d11 * d22;
+
+    /* use sqrt matrix to solve for eigenvalues */
+    det = sqrt(det);
+    float u = 0.5 * sqrt(tr - 2.0 * det);
+    float v = 0.5 * sqrt(tr + 2.0 * det);
+    float e_max = (u + v);
+    float e_min = (u - v);
+    e_max *= e_max;
+    e_min *= e_min;
+
+    vec3 V1_, V2_;
+    if (d11 > d22) {
+      V1_ = d12 * V1 + (e_max - d11) * V2;
+      V2_ = d12 * V1 + (e_min - d11) * V2;
+    }
+    else {
+      V1_ = d12 * V2 + (e_max - d22) * V1;
+      V2_ = d12 * V2 + (e_min - d22) * V1;
+    }
+
+    a = 1.0 / e_max;
+    b = 1.0 / e_min;
+    V1 = normalize(V1_);
+    V2 = normalize(V2_);
+  }
+  else {
+    a = 1.0 / d11;
+    b = 1.0 / d22;
+    V1 *= sqrt(a);
+    V2 *= sqrt(b);
+  }
+
+  /* Now find front facing ellipse with same solid angle. */
+
+  vec3 V3 = normalize(cross(V1, V2));
+  if (dot(C, V3) < 0.0) {
+    V3 *= -1.0;
+  }
+
+  float L = dot(V3, C);
+  float inv_L = 1.0 / L;
+  float x0 = dot(V1, C) * inv_L;
+  float y0 = dot(V2, C) * inv_L;
+
+  float L_sqr = L * L;
+  a *= L_sqr;
+  b *= L_sqr;
+
+  float t = 1.0 + x0 * x0;
+  float c0 = a * b;
+  float c1 = c0 * (t + y0 * y0) - a - b;
+  float c2 = (1.0 - a * t) - b * (1.0 + y0 * y0);
+  float c3 = 1.0;
+
+  vec3 roots = ltc_solve_cubic(vec4(c0, c1, c2, c3));
+  float e1 = roots.x;
+  float e2 = roots.y;
+  float e3 = roots.z;
+
+  vec3 avg_dir = vec3(a * x0 / (a - e2), b * y0 / (b - e2), 1.0);
+
+  mat3 rotate = mat3(V1, V2, V3);
+
+  avg_dir = rotate * avg_dir;
+  avg_dir = normalize(avg_dir);
+
+  /* L1, L2 are the extends of the front facing ellipse. */
+  float L1 = sqrt(-e2 / e3);
+  float L2 = sqrt(-e2 / e1);
+
+  /* Find the sphere and compute lighting. */
+  float form_factor = max(0.0, L1 * L2 * inversesqrt((1.0 + L1 * L1) * (1.0 + L2 * L2)));
+  return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir.z, form_factor);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
new file mode 100644
index 00000000000..07139ea6a09
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
@@ -0,0 +1,115 @@
+
+/**
+ * Dilate motion vector tiles until we covered maximum velocity.
+ * Outputs the largest intersecting motion vector in the neighborhood.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl)
+
+#define DEBUG_BYPASS_DILATION 0
+
+struct MotionRect {
+  ivec2 bottom_left;
+  ivec2 extent;
+};
+
+MotionRect compute_motion_rect(ivec2 tile, vec2 motion)
+{
+#if DEBUG_BYPASS_DILATION
+  return MotionRect(tile, ivec2(1));
+#endif
+  /* Ceil to number of tile touched. */
+  ivec2 point1 = tile + ivec2(sign(motion) * ceil(abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
+  ivec2 point2 = tile;
+
+  ivec2 max_point = max(point1, point2);
+  ivec2 min_point = min(point1, point2);
+  /* Clamp to bounds. */
+  max_point = min(max_point, imageSize(in_tiles_img) - 1);
+  min_point = max(min_point, ivec2(0));
+
+  MotionRect rect;
+  rect.bottom_left = min_point;
+  rect.extent = 1 + max_point - min_point;
+  return rect;
+}
+
+struct MotionLine {
+  /** Origin of the line. */
+  vec2 origin;
+  /** Normal to the line direction. */
+  vec2 normal;
+};
+
+MotionLine compute_motion_line(ivec2 tile, vec2 motion)
+{
+  vec2 dir = safe_normalize(motion);
+
+  MotionLine line;
+  line.origin = vec2(tile);
+  /* Rotate 90° Counter-Clockwise. */
+  line.normal = vec2(-dir.y, dir.x);
+  return line;
+}
+
+bool is_inside_motion_line(ivec2 tile, MotionLine motion_line)
+{
+#if DEBUG_BYPASS_DILATION
+  return true;
+#endif
+  /* NOTE: Everything in is tile unit. */
+  float dist = point_line_projection_dist(vec2(tile), motion_line.origin, motion_line.normal);
+  /* In order to be conservative and for simplicity, we use the tiles bounding circles.
+   * Consider that both the tile and the line have bounding radius of M_SQRT1_2. */
+  return abs(dist) < M_SQRT2;
+}
+
+void main()
+{
+  ivec2 src_tile = ivec2(gl_GlobalInvocationID.xy);
+  if (any(greaterThanEqual(src_tile, imageSize(in_tiles_img)))) {
+    return;
+  }
+
+  vec4 max_motion = imageLoad(in_tiles_img, src_tile);
+
+  MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
+  MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+  if (true) {
+    /* Rectangular area (in tiles) where the motion vector spreads. */
+    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
+    MotionLine motion_line = compute_motion_line(src_tile, max_motion.xy);
+    /* Do a conservative rasterization of the line of the motion vector line. */
+    for (int x = 0; x < motion_rect.extent.x; x++) {
+      for (int y = 0; y < motion_rect.extent.y; y++) {
+        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
+        if (is_inside_motion_line(tile, motion_line)) {
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
+           * the motion next so that weighting in gather pass is better. */
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+        }
+      }
+    }
+  }
+
+  if (true) {
+    MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+    /* Rectangular area (in tiles) where the motion vector spreads. */
+    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
+    MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
+    /* Do a conservative rasterization of the line of the motion vector line. */
+    for (int x = 0; x < motion_rect.extent.x; x++) {
+      for (int y = 0; y < motion_rect.extent.y; y++) {
+        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
+        if (is_inside_motion_line(tile, motion_line)) {
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
+           * the motion next so that weighting in gather pass is better. */
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+        }
+      }
+    }
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
new file mode 100644
index 00000000000..cbbeea25d20
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
@@ -0,0 +1,103 @@
+
+/**
+ * Shaders that down-sample velocity buffer into squared tile of MB_TILE_DIVISOR pixels wide.
+ * Outputs the largest motion vector in the tile area.
+ * Also perform velocity resolve to speedup the convolution pass.
+ *
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * Adapted from G3D Innovation Engine implementation.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+
+shared uint payload_prev;
+shared uint payload_next;
+shared vec2 max_motion_prev;
+shared vec2 max_motion_next;
+
+/* Store velocity magnitude in the MSB and thread id in the LSB. */
+uint pack_payload(vec2 motion, uvec2 thread_id)
+{
+  /* NOTE: We clamp max velocity to 16k pixels. */
+  return (min(uint(ceil(length(motion))), 0xFFFFu) << 16u) | (thread_id.y << 8) | thread_id.x;
+}
+
+/* Return thread index from the payload. */
+uvec2 unpack_payload(uint payload)
+{
+  return uvec2(payload & 0xFFu, (payload >> 8) & 0xFFu);
+}
+
+void main()
+{
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    payload_prev = 0u;
+    payload_next = 0u;
+  }
+  barrier();
+
+  uint local_payload_prev = 0u;
+  uint local_payload_next = 0u;
+  vec2 local_max_motion_prev;
+  vec2 local_max_motion_next;
+
+  ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(velocity_img) - 1);
+
+  vec2 render_size = vec2(imageSize(velocity_img).xy);
+  vec2 uv = (vec2(texel) + 0.5) / render_size;
+  float depth = texelFetch(depth_tx, texel, 0).r;
+  vec4 motion = velocity_resolve(imageLoad(velocity_img, texel), uv, depth);
+#ifdef FLATTEN_VIEWPORT
+  /* imageLoad does not perform the swizzling like sampler does. Do it manually. */
+  motion = motion.xyxy;
+#endif
+
+  /* Store resolved velocity to speedup the gather pass. Out of bounds writes are ignored.
+   * Unfortunately, we cannot convert to pixel space here since it is also used by TAA and the
+   * motion blur needs to remain optional. */
+  imageStore(velocity_img, ivec2(gl_GlobalInvocationID.xy), velocity_pack(motion));
+  /* Clip velocity to viewport bounds (in NDC space). */
+  vec2 line_clip;
+  line_clip.x = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, motion.xy * 2.0);
+  line_clip.y = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, -motion.zw * 2.0);
+  motion *= min(line_clip, vec2(1.0)).xxyy;
+  /* Convert to pixel space. Note this is only for velocity tiles. */
+  motion *= render_size.xyxy;
+  /* Rescale to shutter relative motion for viewport. */
+  motion *= motion_blur_buf.motion_scale.xxyy;
+
+  uint sample_payload_prev = pack_payload(motion.xy, gl_LocalInvocationID.xy);
+  if (local_payload_prev < sample_payload_prev) {
+    local_payload_prev = sample_payload_prev;
+    local_max_motion_prev = motion.xy;
+  }
+
+  uint sample_payload_next = pack_payload(motion.zw, gl_LocalInvocationID.xy);
+  if (local_payload_next < sample_payload_next) {
+    local_payload_next = sample_payload_next;
+    local_max_motion_next = motion.zw;
+  }
+
+  /* Compare the local payload with the other threads. */
+  atomicMax(payload_prev, local_payload_prev);
+  atomicMax(payload_next, local_payload_next);
+  barrier();
+
+  /* Need to broadcast the result to another thread in order to issue a unique write. */
+  if (all(equal(unpack_payload(payload_prev), gl_LocalInvocationID.xy))) {
+    max_motion_prev = local_max_motion_prev;
+  }
+  if (all(equal(unpack_payload(payload_next), gl_LocalInvocationID.xy))) {
+    max_motion_next = local_max_motion_next;
+  }
+  barrier();
+
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
+    imageStore(out_tiles_img, tile_co, vec4(max_motion_prev, max_motion_next));
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
new file mode 100644
index 00000000000..5249e6637b6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
@@ -0,0 +1,221 @@
+
+/**
+ * Perform two gather blur in the 2 motion blur directions
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * With modification from the presentation:
+ * Next Generation Post Processing in Call of Duty Advanced Warfare
+ * by Jorge Jimenez
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl)
+
+const int gather_sample_count = 8;
+
+/* Converts uv velocity into pixel space. Assumes velocity_tx is the same resolution as the
+ * target post-fx framebuffer. */
+vec4 motion_blur_sample_velocity(sampler2D velocity_tx, vec2 uv)
+{
+  /* We can load velocity without velocity_resolve() since we resolved during the flatten pass. */
+  vec4 velocity = velocity_unpack(texture(velocity_tx, uv));
+  return velocity * vec2(textureSize(velocity_tx, 0)).xyxy * motion_blur_buf.motion_scale.xxyy;
+}
+
+vec2 spread_compare(float center_motion_length, float sample_motion_length, float offset_length)
+{
+  return saturate(vec2(center_motion_length, sample_motion_length) - offset_length + 1.0);
+}
+
+vec2 depth_compare(float center_depth, float sample_depth)
+{
+  vec2 depth_scale = vec2(-motion_blur_buf.depth_scale, motion_blur_buf.depth_scale);
+  return saturate(0.5 + depth_scale * (sample_depth - center_depth));
+}
+
+/* Kill contribution if not going the same direction. */
+float dir_compare(vec2 offset, vec2 sample_motion, float sample_motion_length)
+{
+  if (sample_motion_length < 0.5) {
+    return 1.0;
+  }
+  return (dot(offset, sample_motion) > 0.0) ? 1.0 : 0.0;
+}
+
+/* Return background (x) and foreground (y) weights. */
+vec2 sample_weights(float center_depth,
+                    float sample_depth,
+                    float center_motion_length,
+                    float sample_motion_length,
+                    float offset_length)
+{
+  /* Classify foreground/background. */
+  vec2 depth_weight = depth_compare(center_depth, sample_depth);
+  /* Weight if sample is overlapping or under the center pixel. */
+  vec2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
+  return depth_weight * spread_weight;
+}
+
+struct Accumulator {
+  vec4 fg;
+  vec4 bg;
+  /** x: Background, y: Foreground, z: dir. */
+  vec3 weight;
+};
+
+void gather_sample(vec2 screen_uv,
+                   float center_depth,
+                   float center_motion_len,
+                   vec2 offset,
+                   float offset_len,
+                   const bool next,
+                   inout Accumulator accum)
+{
+  vec2 sample_uv = screen_uv - offset * motion_blur_buf.target_size_inv;
+  vec4 sample_vectors = motion_blur_sample_velocity(velocity_tx, sample_uv);
+  vec2 sample_motion = (next) ? sample_vectors.zw : sample_vectors.xy;
+  float sample_motion_len = length(sample_motion);
+  float sample_depth = texture(depth_tx, sample_uv).r;
+  vec4 sample_color = textureLod(in_color_tx, sample_uv, 0.0);
+
+  sample_depth = get_view_z_from_depth(sample_depth);
+
+  vec3 weights;
+  weights.xy = sample_weights(
+      center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
+  weights.z = dir_compare(offset, sample_motion, sample_motion_len);
+  weights.xy *= weights.z;
+
+  accum.fg += sample_color * weights.y;
+  accum.bg += sample_color * weights.x;
+  accum.weight += weights;
+}
+
+void gather_blur(vec2 screen_uv,
+                 vec2 center_motion,
+                 float center_depth,
+                 vec2 max_motion,
+                 float ofs,
+                 const bool next,
+                 inout Accumulator accum)
+{
+  float center_motion_len = length(center_motion);
+  float max_motion_len = length(max_motion);
+
+  /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
+   * Fix this by overriding the max_motion. */
+  if (max_motion_len < center_motion_len) {
+    max_motion_len = center_motion_len;
+    max_motion = center_motion;
+  }
+
+  if (max_motion_len < 0.5) {
+    return;
+  }
+
+  int i;
+  float t, inc = 1.0 / float(gather_sample_count);
+  for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) {
+    gather_sample(screen_uv,
+                  center_depth,
+                  center_motion_len,
+                  max_motion * t,
+                  max_motion_len * t,
+                  next,
+                  accum);
+  }
+
+  if (center_motion_len < 0.5) {
+    return;
+  }
+
+  for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) {
+    /* Also sample in center motion direction.
+     * Allow recovering motion where there is conflicting
+     * motion between foreground and background. */
+    gather_sample(screen_uv,
+                  center_depth,
+                  center_motion_len,
+                  center_motion * t,
+                  center_motion_len * t,
+                  next,
+                  accum);
+  }
+}
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(depth_tx, 0).xy);
+
+  if (!in_texture_range(texel, depth_tx)) {
+    return;
+  }
+
+  /* Data of the center pixel of the gather (target). */
+  float center_depth = get_view_z_from_depth(texelFetch(depth_tx, texel, 0).r);
+  vec4 center_motion = motion_blur_sample_velocity(velocity_tx, uv);
+
+  vec4 center_color = textureLod(in_color_tx, uv, 0.0);
+
+  float noise_offset = sampling_rng_1D_get(SAMPLING_TIME);
+  /** TODO(fclem) Blue noise. */
+  vec2 rand = vec2(interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 0, noise_offset),
+                   interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 1, noise_offset));
+
+  /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
+   * Note this randomize only in one direction but in practice it's enough. */
+  rand.x = rand.x * 2.0 - 1.0;
+  ivec2 tile = (texel + ivec2(rand.x * float(MOTION_BLUR_TILE_SIZE) * 0.25)) /
+               MOTION_BLUR_TILE_SIZE;
+  tile = clamp(tile, ivec2(0), imageSize(in_tiles_img) - 1);
+  /* NOTE: Tile velocity is already in pixel space and with correct zw sign. */
+  vec4 max_motion;
+  /* Load dilation result from the indirection table. */
+  ivec2 tile_prev;
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
+  max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
+  ivec2 tile_next;
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
+  max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;
+
+  Accumulator accum;
+  accum.weight = vec3(0.0, 0.0, 1.0);
+  accum.bg = vec4(0.0);
+  accum.fg = vec4(0.0);
+  /* First linear gather. time = [T - delta, T] */
+  gather_blur(uv, center_motion.xy, center_depth, max_motion.xy, rand.y, false, accum);
+  /* Second linear gather. time = [T, T + delta] */
+  gather_blur(uv, center_motion.zw, center_depth, max_motion.zw, rand.y, true, accum);
+
+#if 1 /* Own addition. Not present in reference implementation. */
+  /* Avoid division by 0.0. */
+  float w = 1.0 / (50.0 * float(gather_sample_count) * 4.0);
+  accum.bg += center_color * w;
+  accum.weight.x += w;
+  /* NOTE: In Jimenez's presentation, they used center sample.
+   * We use background color as it contains more information for foreground
+   * elements that have not enough weights.
+   * Yield better blur in complex motion. */
+  center_color = accum.bg / accum.weight.x;
+#endif
+  /* Merge background. */
+  accum.fg += accum.bg;
+  accum.weight.y += accum.weight.x;
+  /* Balance accumulation for failed samples.
+   * We replace the missing foreground by the background. */
+  float blend_fac = saturate(1.0 - accum.weight.y / accum.weight.z);
+  vec4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
+
+#if 0 /* For debugging. */
+  out_color.rgb = out_color.ggg;
+  out_color.rg += max_motion.xy;
+#endif
+
+  imageStore(out_color_img, texel, out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
new file mode 100644
index 00000000000..436fd01795a
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
@@ -0,0 +1,48 @@
+
+
+/* -------------------------------------------------------------------- */
+/** \name Tile indirection packing
+ * \{ */
+
+#define MotionPayload uint
+
+/* Store velocity magnitude in the MSB to be able to use it with atomicMax operations. */
+MotionPayload motion_blur_tile_indirection_pack_payload(vec2 motion, uvec2 payload)
+{
+  /* NOTE: Clamp to 16383 pixel velocity. After that, it is tile position that determine the tile
+   * to dilate over. */
+  uint velocity = min(uint(ceil(length(motion))), 0x3FFFu);
+  /* Designed for 512x512 tiles max. */
+  return (velocity << 18u) | ((payload.x & 0x1FFu) << 9u) | (payload.y & 0x1FFu);
+}
+
+/* Return thread index. */
+ivec2 motion_blur_tile_indirection_pack_payload(uint data)
+{
+  return ivec2((data >> 9u) & 0x1FFu, data & 0x1FFu);
+}
+
+uint motion_blur_tile_indirection_index(uint motion_step, uvec2 tile)
+{
+  uint index = tile.x;
+  index += tile.y * MOTION_BLUR_MAX_TILE;
+  index += motion_step * MOTION_BLUR_MAX_TILE * MOTION_BLUR_MAX_TILE;
+  return index;
+}
+
+#define MOTION_PREV 0u
+#define MOTION_NEXT 1u
+
+#define motion_blur_tile_indirection_store(table_, step_, tile, payload_) \
+  if (true) { \
+    uint index = motion_blur_tile_indirection_index(step_, tile); \
+    atomicMax(table_[index], payload_); \
+  }
+
+#define motion_blur_tile_indirection_load(table_, step_, tile_, result_) \
+  if (true) { \
+    uint index = motion_blur_tile_indirection_index(step_, tile_); \
+    result_ = motion_blur_tile_indirection_pack_payload(table_[index]); \
+  }
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
index 0ccf06a9e14..dd047709afd 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
@@ -39,6 +39,8 @@ bool closure_select(float weight, inout float total_weight, inout float r)
     destination = candidate; \
   }
 
+float g_closure_rand;
+
 void closure_weights_reset()
 {
   g_diffuse_data.weight = 0.0;
@@ -58,18 +60,8 @@ void closure_weights_reset()
   g_refraction_data.roughness = 0.0;
   g_refraction_data.ior = 0.0;
 
-  /* TEMP */
-#define P(x) ((x + 0.5) / 16.0)
-  const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
-                                        vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
-                                        vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
-                                        vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
-#undef P
 #if defined(GPU_FRAGMENT_SHADER)
-  ivec2 pix = ivec2(gl_FragCoord.xy) % ivec2(4);
-  g_diffuse_rand = dither_mat4x4[pix.x][pix.y];
-  g_reflection_rand = dither_mat4x4[pix.x][pix.y];
-  g_refraction_rand = dither_mat4x4[pix.x][pix.y];
+  g_diffuse_rand = g_reflection_rand = g_refraction_rand = g_closure_rand;
 #else
   g_diffuse_rand = 0.0;
   g_reflection_rand = 0.0;
@@ -245,6 +237,20 @@ float F_eta(float a, float b)
 }
 void output_aov(vec4 color, float value, uint hash)
 {
+#if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
+  for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
+    if (aov_buf.hash_color[i] == hash) {
+      imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color);
+      return;
+    }
+  }
+  for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
+    if (aov_buf.hash_value[i] == hash) {
+      imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value));
+      return;
+    }
+  }
+#endif
 }
 
 #ifdef EEVEE_MATERIAL_STUBS
@@ -255,6 +261,10 @@ void output_aov(vec4 color, float value, uint hash)
 #  define nodetree_thickness() 0.1
 #endif
 
+#ifdef GPU_VERTEX_SHADER
+#  define closure_to_rgba(a) vec4(0.0)
+#endif
+
 /* -------------------------------------------------------------------- */
 /** \name Fragment Displacement
  *
@@ -359,3 +369,71 @@ vec3 coordinate_incoming(vec3 P)
 }
 
 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Volume Attribute post
+ *
+ * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on
+ * the engine side. But as of now, the engines are responsible for loading the attributes.
+ *
+ * \{ */
+
+#if defined(MAT_GEOM_VOLUME)
+
+float attr_load_temperature_post(float attr)
+{
+  /* Bring the into standard range without having to modify the grid values */
+  attr = (attr > 0.01) ? (attr * drw_volume.temperature_mul + drw_volume.temperature_bias) : 0.0;
+  return attr;
+}
+vec4 attr_load_color_post(vec4 attr)
+{
+  /* Density is premultiplied for interpolation, divide it out here. */
+  attr.rgb *= safe_rcp(attr.a);
+  attr.rgb *= drw_volume.color_mul.rgb;
+  attr.a = 1.0;
+  return attr;
+}
+
+#else /* Noop for any other surface. */
+
+float attr_load_temperature_post(float attr)
+{
+  return attr;
+}
+vec4 attr_load_color_post(vec4 attr)
+{
+  return attr;
+}
+
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Uniform Attributes
+ *
+ * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on
+ * the engine side. But as of now, the engines are responsible for loading the attributes.
+ *
+ * \{ */
+
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+#if defined(OBATTR_LIB)
+  uint index = floatBitsToUint(ObjectAttributeStart);
+  for (uint i = 0; i < floatBitsToUint(ObjectAttributeLen); i++, index++) {
+    if (drw_attrs[index].hash_code == attr_hash) {
+      return vec4(drw_attrs[index].data_x,
+                  drw_attrs[index].data_y,
+                  drw_attrs[index].data_z,
+                  drw_attrs[index].data_w);
+    }
+  }
+  return vec4(0.0);
+#else
+  return attr;
+#endif
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl
new file mode 100644
index 00000000000..0eea4a5ff33
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl
@@ -0,0 +1,104 @@
+
+/**
+ * Sampling data accessors and random number generators.
+ * Also contains some sample mapping functions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling data.
+ *
+ * Return a random values from Low Discrepancy Sequence in [0..1) range.
+ * This value is uniform (constant) for the whole scene sample.
+ * You might want to couple it with a noise function.
+ * \{ */
+
+#ifdef EEVEE_SAMPLING_DATA
+
+float sampling_rng_1D_get(const eSamplingDimension dimension)
+{
+  return sampling_buf.dimensions[dimension];
+}
+
+vec2 sampling_rng_2D_get(const eSamplingDimension dimension)
+{
+  return vec2(sampling_buf.dimensions[dimension], sampling_buf.dimensions[dimension + 1u]);
+}
+
+vec3 sampling_rng_3D_get(const eSamplingDimension dimension)
+{
+  return vec3(sampling_buf.dimensions[dimension],
+              sampling_buf.dimensions[dimension + 1u],
+              sampling_buf.dimensions[dimension + 2u]);
+}
+
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Random Number Generators.
+ * \{ */
+
+/* Interlieved gradient noise by Jorge Jimenez
+ * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+ * Seeding found by Epic Game. */
+float interlieved_gradient_noise(vec2 pixel, float seed, float offset)
+{
+  pixel += seed * (vec2(47, 17) * 0.695);
+  return fract(offset + 52.9829189 * fract(0.06711056 * pixel.x + 0.00583715 * pixel.y));
+}
+
+/* From: http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html */
+float van_der_corput_radical_inverse(uint bits)
+{
+#if 0 /* Reference */
+  bits = (bits << 16u) | (bits >> 16u);
+  bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+  bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+  bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+  bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+#else
+  bits = bitfieldReverse(bits);
+#endif
+  /* Same as dividing by 0x100000000. */
+  return float(bits) * 2.3283064365386963e-10;
+}
+
+vec2 hammersley_2d(float i, float sample_count)
+{
+  vec2 rand;
+  rand.x = i / sample_count;
+  rand.y = van_der_corput_radical_inverse(uint(i));
+  return rand;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Distribution mapping.
+ *
+ * Functions mapping input random numbers to sampling shapes (i.e: hemisphere).
+ * \{ */
+
+/* Given 2 random number in [0..1] range, return a random unit disk sample. */
+vec2 sample_disk(vec2 noise)
+{
+  float angle = noise.x * M_2PI;
+  return vec2(cos(angle), sin(angle)) * sqrt(noise.y);
+}
+
+/* This transform a 2d random sample (in [0..1] range) to a sample located on a cylinder of the
+ * same range. This is because the sampling functions expect such a random sample which is
+ * normally precomputed. */
+vec3 sample_cylinder(vec2 rand)
+{
+  float theta = rand.x;
+  float phi = (rand.y - 0.5) * M_2PI;
+  float cos_phi = cos(phi);
+  float sin_phi = sqrt(1.0 - sqr(cos_phi)) * sign(phi);
+  return vec3(theta, cos_phi, sin_phi);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
index 7ddf941df7c..183aac1e546 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
@@ -6,10 +6,23 @@
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 #pragma BLENDER_REQUIRE(common_math_lib.glsl)
 #pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_surf_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
 
+vec4 closure_to_rgba(Closure cl)
+{
+  vec4 out_color;
+  out_color.rgb = g_emission;
+  out_color.a = saturate(1.0 - avg(g_transmittance));
+
+  /* Reset for the next closure tree. */
+  closure_weights_reset();
+
+  return out_color;
+}
+
 /* From the paper "Hashed Alpha Testing" by Chris Wyman and Morgan McGuire. */
 float hash(vec2 a)
 {
@@ -61,8 +74,7 @@ void main()
 
   nodetree_surface();
 
-  // float noise_offset = sampling_rng_1D_get(sampling_buf, SAMPLING_TRANSPARENCY);
-  float noise_offset = 0.5;
+  float noise_offset = sampling_rng_1D_get(SAMPLING_TRANSPARENCY);
   float random_threshold = hashed_alpha_threshold(1.0, noise_offset, g_data.P);
 
   float transparency = avg(g_transmittance);
@@ -72,14 +84,7 @@ void main()
 #endif
 
 #ifdef MAT_VELOCITY
-  vec4 out_velocity_camera; /* TODO(fclem): Panoramic cameras. */
-  velocity_camera(interp.P + motion.prev,
-                  interp.P,
-                  interp.P - motion.next,
-                  out_velocity_camera,
-                  out_velocity_view);
-
-  /* For testing in viewport. */
-  out_velocity_view.zw = vec2(0.0);
+  out_velocity = velocity_surface(interp.P + motion.prev, interp.P, interp.P + motion.next);
+  out_velocity = velocity_pack(out_velocity);
 #endif
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
index 143e88dbe68..ab29067763d 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
@@ -5,35 +5,36 @@
  * This is used by alpha blended materials and materials using Shader to RGB nodes.
  **/
 
-#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-#pragma BLENDER_REQUIRE(common_math_lib.glsl)
 #pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_eval_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_surf_lib.glsl)
 
-float spec_light(ClosureReflection ref)
-{
-  float gloss = saturate(1.0 - ref.roughness);
-  float shininess = exp2(10.0 * gloss + 1.0);
-  vec3 N = ref.N;
-  vec3 L = vec3(0.0, 0.0, 1.0);
-  vec3 H = normalize(L + cameraVec(g_data.P));
-  float spec_angle = saturate(dot(N, H));
-  float normalization_factor = shininess * 0.125 + 1.0;
-  float spec_light = pow(spec_angle, shininess) * saturate(dot(N, L)) * normalization_factor;
-  return spec_light;
-}
-
 vec4 closure_to_rgba(Closure cl)
 {
+  vec3 diffuse_light = vec3(0.0);
+  vec3 reflection_light = vec3(0.0);
+  vec3 refraction_light = vec3(0.0);
+
+  float vP_z = dot(cameraForward, g_data.P) - dot(cameraForward, cameraPos);
+
+  light_eval(g_diffuse_data,
+             g_reflection_data,
+             g_data.P,
+             cameraVec(g_data.P),
+             vP_z,
+             0.01 /* TODO(fclem) thickness. */,
+             diffuse_light,
+             reflection_light);
+
   vec4 out_color;
   out_color.rgb = g_emission;
-  out_color.rgb += g_diffuse_data.color * g_diffuse_data.weight *
-                   saturate(g_diffuse_data.N.z * 0.5 + 0.5);
-  out_color.rgb += g_reflection_data.color * g_reflection_data.weight *
-                   spec_light(g_reflection_data);
-  out_color.rgb += g_refraction_data.color * g_refraction_data.weight *
-                   saturate(g_refraction_data.N.z * 0.5 + 0.5);
+  out_color.rgb += g_diffuse_data.color * g_diffuse_data.weight * diffuse_light;
+  out_color.rgb += g_reflection_data.color * g_reflection_data.weight * reflection_light;
+  out_color.rgb += g_refraction_data.color * g_refraction_data.weight * refraction_light;
 
   out_color.a = saturate(1.0 - avg(g_transmittance));
 
@@ -47,27 +48,72 @@ void main()
 {
   init_globals();
 
+  float noise = utility_tx_fetch(utility_tx, gl_FragCoord.xy, UTIL_BLUE_NOISE_LAYER).r;
+  g_closure_rand = fract(noise + sampling_rng_1D_get(SAMPLING_CLOSURE));
+
   fragment_displacement();
 
   nodetree_surface();
 
   g_holdout = saturate(g_holdout);
 
+  vec3 diffuse_light = vec3(0.0);
+  vec3 reflection_light = vec3(0.0);
+  vec3 refraction_light = vec3(0.0);
+
+  float vP_z = dot(cameraForward, g_data.P) - dot(cameraForward, cameraPos);
+
+  light_eval(g_diffuse_data,
+             g_reflection_data,
+             g_data.P,
+             cameraVec(g_data.P),
+             vP_z,
+             0.01 /* TODO(fclem) thickness. */,
+             diffuse_light,
+             reflection_light);
+
+  g_diffuse_data.color *= g_diffuse_data.weight;
+  g_reflection_data.color *= g_reflection_data.weight;
+  g_refraction_data.color *= g_refraction_data.weight;
+  diffuse_light *= step(1e-5, g_diffuse_data.weight);
+  reflection_light *= step(1e-5, g_reflection_data.weight);
+  refraction_light *= step(1e-5, g_refraction_data.weight);
+
   out_radiance.rgb = g_emission;
-  out_radiance.rgb += g_diffuse_data.color * g_diffuse_data.weight *
-                      saturate(g_diffuse_data.N.z * 0.5 + 0.5);
-  out_radiance.rgb += g_reflection_data.color * g_reflection_data.weight *
-                      spec_light(g_reflection_data);
-  out_radiance.rgb += g_refraction_data.color * g_refraction_data.weight *
-                      saturate(g_refraction_data.N.z * 0.5 + 0.5);
+  out_radiance.rgb += g_diffuse_data.color * diffuse_light;
+  out_radiance.rgb += g_reflection_data.color * reflection_light;
+  out_radiance.rgb += g_refraction_data.color * refraction_light;
   out_radiance.a = 0.0;
 
+  vec3 specular_light = reflection_light + refraction_light;
+  vec3 specular_color = g_reflection_data.color + g_refraction_data.color;
+
+  /* TODO(fclem): This feels way too complex for what is it. */
+  bool has_any_bsdf_weight = g_diffuse_data.weight != 0.0 || g_reflection_data.weight != 0.0 ||
+                             g_refraction_data.weight != 0.0;
+  vec3 out_normal = has_any_bsdf_weight ? vec3(0.0) : g_data.N;
+  out_normal += g_diffuse_data.N * g_diffuse_data.weight;
+  out_normal += g_reflection_data.N * g_reflection_data.weight;
+  out_normal += g_refraction_data.N * g_refraction_data.weight;
+  out_normal = safe_normalize(out_normal);
+
+#ifdef MAT_RENDER_PASS_SUPPORT
+  ivec2 out_texel = ivec2(gl_FragCoord.xy);
+  imageStore(rp_normal_img, out_texel, vec4(out_normal, 1.0));
+  imageStore(
+      rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_DIFFUSE_LIGHT), vec4(diffuse_light, 1.0));
+  imageStore(
+      rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_SPECULAR_LIGHT), vec4(specular_light, 1.0));
+  imageStore(rp_diffuse_color_img, out_texel, vec4(g_diffuse_data.color, 1.0));
+  imageStore(rp_specular_color_img, out_texel, vec4(specular_color, 1.0));
+  imageStore(rp_emission_img, out_texel, vec4(g_emission, 1.0));
+  imageStore(rp_cryptomatte_img,
+             out_texel,
+             vec4(cryptomatte_object_buf[resource_id], node_tree.crypto_hash, 0.0));
+#endif
+
   out_radiance.rgb *= 1.0 - g_holdout;
 
   out_transmittance.rgb = g_transmittance;
   out_transmittance.a = saturate(avg(g_transmittance));
-
-  /* Test */
-  out_transmittance.a = 1.0 - out_transmittance.a;
-  out_radiance.a = 1.0 - out_radiance.a;
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl
index 30b48edaa78..6c1fc818f41 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl
@@ -40,7 +40,7 @@ void init_globals_curves()
   /* Shade as a cylinder. */
   float cos_theta = interp.curves_time_width / interp.curves_thickness;
   float sin_theta = sqrt(max(0.0, 1.0 - cos_theta * cos_theta));
-  g_data.N = normalize(interp.N * sin_theta + interp.curves_binormal * cos_theta);
+  g_data.N = g_data.Ni = normalize(interp.N * sin_theta + interp.curves_binormal * cos_theta);
 
   /* Costly, but follows cycles per pixel tangent space (not following curve shape). */
   vec3 V = cameraVec(g_data.P);
@@ -60,13 +60,14 @@ void init_globals_curves()
 void init_globals_gpencil()
 {
   /* Undo backface flip as the gpencil normal is already pointing towards the camera. */
-  g_data.N = interp.N;
+  g_data.N = g_data.Ni = interp.N;
 }
 
 void init_globals()
 {
   /* Default values. */
   g_data.P = interp.P;
+  g_data.Ni = interp.N;
   g_data.N = safe_normalize(interp.N);
   g_data.Ng = g_data.N;
   g_data.is_strand = false;
@@ -81,6 +82,7 @@ void init_globals()
 
 #ifdef GPU_FRAGMENT_SHADER
   g_data.N = (FrontFacing) ? g_data.N : -g_data.N;
+  g_data.Ni = (FrontFacing) ? g_data.Ni : -g_data.Ni;
   g_data.Ng = safe_normalize(cross(dFdx(g_data.P), dFdy(g_data.P)));
 #endif
 
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl
index ac657afc922..442c2579c84 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl
@@ -24,6 +24,20 @@ void main()
 
   g_holdout = saturate(g_holdout);
 
+  ivec2 out_texel = ivec2(gl_FragCoord.xy);
+  imageStore(rp_normal_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+  imageStore(
+      rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_DIFFUSE_LIGHT), vec4(0.0, 0.0, 0.0, 1.0));
+  imageStore(
+      rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_SPECULAR_LIGHT), vec4(0.0, 0.0, 0.0, 1.0));
+  imageStore(rp_diffuse_color_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+  imageStore(rp_specular_color_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+  imageStore(rp_emission_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+  imageStore(rp_cryptomatte_img, out_texel, vec4(0.0));
+
   out_background.rgb = safe_color(g_emission) * (1.0 - g_holdout);
   out_background.a = saturate(avg(g_transmittance)) * g_holdout;
+
+  /* World opacity. */
+  out_background = mix(vec4(0.0, 0.0, 0.0, 1.0), out_background, world_opacity_fade);
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
index 435ae6658c9..8d02609fedc 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
@@ -2,23 +2,38 @@
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
 
+vec4 velocity_pack(vec4 data)
+{
+  return data * 0.01;
+}
+
+vec4 velocity_unpack(vec4 data)
+{
+  return data * 100.0;
+}
+
 #ifdef VELOCITY_CAMERA
 
 /**
  * Given a triple of position, compute the previous and next motion vectors.
- * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy)
+ * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy).
  */
-vec4 velocity_view(vec3 P_prev, vec3 P, vec3 P_next)
+vec4 velocity_surface(vec3 P_prv, vec3 P, vec3 P_nxt)
 {
-  vec2 prev_uv, curr_uv, next_uv;
-
-  prev_uv = transform_point(ProjectionMatrix, transform_point(camera_prev.viewmat, P_prev)).xy;
-  curr_uv = transform_point(ViewProjectionMatrix, P).xy;
-  next_uv = transform_point(ProjectionMatrix, transform_point(camera_next.viewmat, P_next)).xy;
-
-  vec4 motion;
-  motion.xy = prev_uv - curr_uv;
-  motion.zw = curr_uv - next_uv;
+  /* NOTE: We don't use the drw_view.persmat to avoid adding the TAA jitter to the velocity. */
+  vec2 prev_uv = project_point(camera_prev.persmat, P_prv).xy;
+  vec2 curr_uv = project_point(camera_curr.persmat, P).xy;
+  vec2 next_uv = project_point(camera_next.persmat, P_nxt).xy;
+  /* Fix issue with perspective division. */
+  if (any(isnan(prev_uv))) {
+    prev_uv = curr_uv;
+  }
+  if (any(isnan(next_uv))) {
+    next_uv = curr_uv;
+  }
+  /* NOTE: We output both vectors in the same direction so we can reuse the same vector
+   * with rgrg swizzle in viewport. */
+  vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv);
   /* Convert NDC velocity to UV velocity */
   motion *= 0.5;
 
@@ -26,37 +41,55 @@ vec4 velocity_view(vec3 P_prev, vec3 P, vec3 P_next)
 }
 
 /**
- * Given a triple of position, compute the previous and next motion vectors.
- * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy)
- * \a velocity_camera is the motion in film UV space after camera projection.
- * \a velocity_view is the motion in ShadingView UV space. It is different
- * from velocity_camera for multi-view rendering.
+ * Given a view space view vector \a vV, compute the previous and next motion vectors for
+ * background pixels.
+ * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy).
  */
-void velocity_camera(vec3 P_prev, vec3 P, vec3 P_next, out vec4 vel_camera, out vec4 vel_view)
+vec4 velocity_background(vec3 vV)
 {
-  vec2 prev_uv, curr_uv, next_uv;
-  prev_uv = camera_uv_from_world(camera_prev, P_prev);
-  curr_uv = camera_uv_from_world(camera_curr, P);
-  next_uv = camera_uv_from_world(camera_next, P_next);
-
-  vel_camera.xy = prev_uv - curr_uv;
-  vel_camera.zw = curr_uv - next_uv;
+  /* Only transform direction to avoid losing precision. */
+  vec3 V = transform_direction(camera_curr.viewinv, vV);
+  /* NOTE: We don't use the drw_view.winmat to avoid adding the TAA jitter to the velocity. */
+  vec2 prev_uv = project_point(camera_prev.winmat, V).xy;
+  vec2 curr_uv = project_point(camera_curr.winmat, V).xy;
+  vec2 next_uv = project_point(camera_next.winmat, V).xy;
+  /* NOTE: We output both vectors in the same direction so we can reuse the same vector
+   * with rgrg swizzle in viewport. */
+  vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv);
+  /* Convert NDC velocity to UV velocity */
+  motion *= 0.5;
 
-  if (is_panoramic(camera_curr.type)) {
-    /* This path is only used if using using panoramic projections. Since the views always have
-     * the same 45° aperture angle, we can safely reuse the projection matrix. */
-    prev_uv = transform_point(ProjectionMatrix, transform_point(camera_prev.viewmat, P_prev)).xy;
-    curr_uv = transform_point(ViewProjectionMatrix, P).xy;
-    next_uv = transform_point(ProjectionMatrix, transform_point(camera_next.viewmat, P_next)).xy;
+  return motion;
+}
 
-    vel_view.xy = prev_uv - curr_uv;
-    vel_view.zw = curr_uv - next_uv;
-    /* Convert NDC velocity to UV velocity */
-    vel_view *= 0.5;
-  }
-  else {
-    vel_view = vel_camera;
+vec4 velocity_resolve(vec4 vector, vec2 uv, float depth)
+{
+  if (vector.x == VELOCITY_INVALID) {
+    bool is_background = (depth == 1.0);
+    if (is_background) {
+      /* NOTE: Use viewCameraVec to avoid imprecision if camera is far from origin. */
+      vec3 vV = viewCameraVec(get_view_space_from_depth(uv, 1.0));
+      return velocity_background(vV);
+    }
+    else {
+      /* Static geometry. No translation in world space. */
+      vec3 P = get_world_space_from_depth(uv, depth);
+      return velocity_surface(P, P, P);
+    }
   }
+  return velocity_unpack(vector);
+}
+
+/**
+ * Load and resolve correct velocity as some pixels might still not have correct
+ * motion data for performance reasons.
+ * Returns motion vector in render UV space.
+ */
+vec4 velocity_resolve(sampler2D vector_tx, ivec2 texel, float depth)
+{
+  vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(vector_tx, 0).xy);
+  vec4 vector = texelFetch(vector_tx, texel, 0);
+  return velocity_resolve(vector, uv, depth);
 }
 
 #endif
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl
deleted file mode 100644
index b68b2eaf117..00000000000
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl
+++ /dev/null
@@ -1,58 +0,0 @@
-
-/**
- * Fullscreen pass that compute motion vector for static geometry.
- * Animated geometry has already written correct motion vectors.
- */
-
-#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
-
-#define is_valid_output(img_) (imageSize(img_).x > 1)
-
-void main()
-{
-  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
-  vec4 motion = imageLoad(velocity_view_img, texel);
-
-  bool pixel_has_valid_motion = (motion.x != VELOCITY_INVALID);
-  float depth = texelFetch(depth_tx, texel, 0).r;
-  bool is_background = (depth == 1.0f);
-
-  vec2 uv = vec2(texel) * drw_view.viewport_size_inverse;
-  vec3 P_next, P_prev, P_curr;
-
-  if (pixel_has_valid_motion) {
-    /* Animated geometry. View motion already computed during prepass. Convert only to camera. */
-    // P_prev = get_world_space_from_depth(uv + motion.xy, 0.5);
-    // P_curr = get_world_space_from_depth(uv, 0.5);
-    // P_next = get_world_space_from_depth(uv + motion.zw, 0.5);
-    return;
-  }
-  else if (is_background) {
-    /* NOTE: Use viewCameraVec to avoid imprecision if camera is far from origin. */
-    vec3 vV = viewCameraVec(get_view_space_from_depth(uv, 1.0));
-    vec3 V = transform_direction(ViewMatrixInverse, vV);
-    /* Background has no motion under camera translation. Translate view vector with the camera. */
-    /* WATCH(fclem): Might create precision issues. */
-    P_next = camera_next.viewinv[3].xyz + V;
-    P_curr = camera_curr.viewinv[3].xyz + V;
-    P_prev = camera_prev.viewinv[3].xyz + V;
-  }
-  else {
-    /* Static geometry. No translation in world space. */
-    P_curr = get_world_space_from_depth(uv, depth);
-    P_prev = P_curr;
-    P_next = P_curr;
-  }
-
-  vec4 vel_camera, vel_view;
-  velocity_camera(P_prev, P_curr, P_next, vel_camera, vel_view);
-
-  if (in_texture_range(texel, depth_tx)) {
-    imageStore(velocity_view_img, texel, vel_view);
-
-    if (is_valid_output(velocity_camera_img)) {
-      imageStore(velocity_camera_img, texel, vel_camera);
-    }
-  }
-}
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
new file mode 100644
index 00000000000..b689a7f53a2
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Setup
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_bokeh_lut)
+    .do_static_compilation(true)
+    .local_group_size(DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE)
+    .additional_info("eevee_shared", "draw_view")
+    .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+    .image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_gather_lut_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_scatter_lut_img")
+    .image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_resolve_lut_img")
+    .compute_source("eevee_depth_of_field_bokeh_lut_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
+    .do_static_compilation(true)
+    .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view")
+    .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::FLOAT_2D, "color_tx")
+    .sampler(1, ImageType::DEPTH_2D, "depth_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+    .compute_source("eevee_depth_of_field_setup_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
+    .do_static_compilation(true)
+    .local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+    .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::FLOAT_2D, "coc_tx")
+    .sampler(1, ImageType::FLOAT_2D, "color_tx")
+    .sampler(2, ImageType::FLOAT_2D, "velocity_tx")
+    .sampler(3, ImageType::FLOAT_2D, "in_history_tx")
+    .sampler(4, ImageType::DEPTH_2D, "depth_tx")
+    .push_constant(Type::BOOL, "use_history")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_history_img")
+    .compute_source("eevee_depth_of_field_stabilize_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample)
+    .do_static_compilation(true)
+    .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view")
+    .sampler(0, ImageType::FLOAT_2D, "color_tx")
+    .sampler(1, ImageType::FLOAT_2D, "coc_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .compute_source("eevee_depth_of_field_downsample_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_reduce)
+    .do_static_compilation(true)
+    .local_group_size(DOF_REDUCE_GROUP_SIZE, DOF_REDUCE_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view")
+    .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::FLOAT_2D, "downsample_tx")
+    .storage_buf(0, Qualifier::WRITE, "ScatterRect", "scatter_fg_list_buf[]")
+    .storage_buf(1, Qualifier::WRITE, "ScatterRect", "scatter_bg_list_buf[]")
+    .storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "scatter_fg_indirect_buf")
+    .storage_buf(3, Qualifier::READ_WRITE, "DrawCommand", "scatter_bg_indirect_buf")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "inout_color_lod0_img")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod1_img")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod2_img")
+    .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod3_img")
+    .image(4, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D, "in_coc_lod0_img")
+    .image(5, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod1_img")
+    .image(6, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod2_img")
+    .image(7, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod3_img")
+    .compute_source("eevee_depth_of_field_reduce_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Circle-Of-Confusion Tiles
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_flatten)
+    .do_static_compilation(true)
+    .local_group_size(DOF_TILES_FLATTEN_GROUP_SIZE, DOF_TILES_FLATTEN_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view")
+    .sampler(0, ImageType::FLOAT_2D, "coc_tx")
+    .image(2, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img")
+    .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img")
+    .compute_source("eevee_depth_of_field_tiles_flatten_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate)
+    .additional_info("eevee_shared", "draw_view", "eevee_depth_of_field_tiles_common")
+    .local_group_size(DOF_TILES_DILATE_GROUP_SIZE, DOF_TILES_DILATE_GROUP_SIZE)
+    .image(2, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img")
+    .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img")
+    .push_constant(Type::INT, "ring_count")
+    .push_constant(Type::INT, "ring_width_multiplier")
+    .compute_source("eevee_depth_of_field_tiles_dilate_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minabs)
+    .do_static_compilation(true)
+    .define("DILATE_MODE_MIN_MAX", "false")
+    .additional_info("eevee_depth_of_field_tiles_dilate");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minmax)
+    .do_static_compilation(true)
+    .define("DILATE_MODE_MIN_MAX", "true")
+    .additional_info("eevee_depth_of_field_tiles_dilate");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_common)
+    .image(0, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_fg_img")
+    .image(1, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_bg_img");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Variations
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_no_lut)
+    .define("DOF_BOKEH_TEXTURE", "false")
+    /**
+     * WORKAROUND(@fclem): This is to keep the code as is for now. The bokeh_lut_tx is referenced
+     * even if not used after optimization. But we don't want to include it in the create infos.
+     */
+    .define("bokeh_lut_tx", "color_tx");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lut)
+    .define("DOF_BOKEH_TEXTURE", "true")
+    .sampler(5, ImageType::FLOAT_2D, "bokeh_lut_tx");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_background).define("DOF_FOREGROUND_PASS", "false");
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_foreground).define("DOF_FOREGROUND_PASS", "true");
+
+#define EEVEE_DOF_FINAL_VARIATION(name, ...) \
+  GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true);
+
+#define EEVEE_DOF_LUT_VARIATIONS(prefix, ...) \
+  EEVEE_DOF_FINAL_VARIATION(prefix##_lut, "eevee_depth_of_field_lut", __VA_ARGS__) \
+  EEVEE_DOF_FINAL_VARIATION(prefix##_no_lut, "eevee_depth_of_field_no_lut", __VA_ARGS__)
+
+#define EEVEE_DOF_GROUND_VARIATIONS(name, ...) \
+  EEVEE_DOF_LUT_VARIATIONS(name##_background, "eevee_depth_of_field_background", __VA_ARGS__) \
+  EEVEE_DOF_LUT_VARIATIONS(name##_foreground, "eevee_depth_of_field_foreground", __VA_ARGS__)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gather
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather_common)
+    .additional_info("eevee_shared",
+                     "draw_view",
+                     "eevee_depth_of_field_tiles_common",
+                     "eevee_sampling_data")
+    .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+    .local_group_size(DOF_GATHER_GROUP_SIZE, DOF_GATHER_GROUP_SIZE)
+    .sampler(0, ImageType::FLOAT_2D, "color_tx")
+    .sampler(1, ImageType::FLOAT_2D, "color_bilinear_tx")
+    .sampler(2, ImageType::FLOAT_2D, "coc_tx")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .image(3, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather)
+    .image(4, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_occlusion_img")
+    .compute_source("eevee_depth_of_field_gather_comp.glsl")
+    .additional_info("eevee_depth_of_field_gather_common");
+
+EEVEE_DOF_GROUND_VARIATIONS(eevee_depth_of_field_gather, "eevee_depth_of_field_gather")
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hole_fill)
+    .do_static_compilation(true)
+    .compute_source("eevee_depth_of_field_hole_fill_comp.glsl")
+    .additional_info("eevee_depth_of_field_gather_common", "eevee_depth_of_field_no_lut");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_filter)
+    .do_static_compilation(true)
+    .local_group_size(DOF_FILTER_GROUP_SIZE, DOF_FILTER_GROUP_SIZE)
+    .additional_info("eevee_shared")
+    .sampler(0, ImageType::FLOAT_2D, "color_tx")
+    .sampler(1, ImageType::FLOAT_2D, "weight_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img")
+    .compute_source("eevee_depth_of_field_filter_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Scatter
+ * \{ */
+
+GPU_SHADER_INTERFACE_INFO(eevee_depth_of_field_scatter_iface, "interp")
+    /** Colors, weights, and Circle of confusion radii for the 4 pixels to scatter. */
+    .flat(Type::VEC4, "color_and_coc1")
+    .flat(Type::VEC4, "color_and_coc2")
+    .flat(Type::VEC4, "color_and_coc3")
+    .flat(Type::VEC4, "color_and_coc4")
+    /** Sprite pixel position with origin at sprite center. In pixels. */
+    .no_perspective(Type::VEC2, "rect_uv1")
+    .no_perspective(Type::VEC2, "rect_uv2")
+    .no_perspective(Type::VEC2, "rect_uv3")
+    .no_perspective(Type::VEC2, "rect_uv4")
+    /** Scaling factor for the bokeh distance. */
+    .flat(Type::FLOAT, "distance_scale");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_scatter)
+    .do_static_compilation(true)
+    .additional_info("eevee_shared", "draw_view")
+    .sampler(0, ImageType::FLOAT_2D, "occlusion_tx")
+    .sampler(1, ImageType::FLOAT_2D, "bokeh_lut_tx")
+    .storage_buf(0, Qualifier::READ, "ScatterRect", "scatter_list_buf[]")
+    .fragment_out(0, Type::VEC4, "out_color")
+    .push_constant(Type::BOOL, "use_bokeh_lut")
+    .vertex_out(eevee_depth_of_field_scatter_iface)
+    .vertex_source("eevee_depth_of_field_scatter_vert.glsl")
+    .fragment_source("eevee_depth_of_field_scatter_frag.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Resolve
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_resolve)
+    .define("DOF_RESOLVE_PASS", "true")
+    .local_group_size(DOF_RESOLVE_GROUP_SIZE, DOF_RESOLVE_GROUP_SIZE)
+    .additional_info("eevee_shared",
+                     "draw_view",
+                     "eevee_depth_of_field_tiles_common",
+                     "eevee_sampling_data")
+    .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .sampler(1, ImageType::FLOAT_2D, "color_tx")
+    .sampler(2, ImageType::FLOAT_2D, "color_bg_tx")
+    .sampler(3, ImageType::FLOAT_2D, "color_fg_tx")
+    .sampler(4, ImageType::FLOAT_2D, "color_hole_fill_tx")
+    .sampler(7, ImageType::FLOAT_2D, "weight_bg_tx")
+    .sampler(8, ImageType::FLOAT_2D, "weight_fg_tx")
+    .sampler(9, ImageType::FLOAT_2D, "weight_hole_fill_tx")
+    .sampler(10, ImageType::FLOAT_2D, "stable_color_tx")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .compute_source("eevee_depth_of_field_resolve_comp.glsl");
+
+EEVEE_DOF_LUT_VARIATIONS(eevee_depth_of_field_resolve, "eevee_depth_of_field_resolve")
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
new file mode 100644
index 00000000000..4541f14d96c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_film)
+    .uniform_buf(6, "FilmData", "film_buf")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .sampler(1, ImageType::FLOAT_2D, "combined_tx")
+    .sampler(2, ImageType::FLOAT_2D, "normal_tx")
+    .sampler(3, ImageType::FLOAT_2D, "vector_tx")
+    .sampler(4, ImageType::FLOAT_2D_ARRAY, "light_tx")
+    .sampler(5, ImageType::FLOAT_2D, "diffuse_color_tx")
+    .sampler(6, ImageType::FLOAT_2D, "specular_color_tx")
+    .sampler(7, ImageType::FLOAT_2D, "volume_light_tx")
+    .sampler(8, ImageType::FLOAT_2D, "emission_tx")
+    .sampler(9, ImageType::FLOAT_2D, "environment_tx")
+    .sampler(10, ImageType::FLOAT_2D, "shadow_tx")
+    .sampler(11, ImageType::FLOAT_2D, "ambient_occlusion_tx")
+    .sampler(12, ImageType::FLOAT_2D_ARRAY, "aov_color_tx")
+    .sampler(13, ImageType::FLOAT_2D_ARRAY, "aov_value_tx")
+    /* Color History for TAA needs to be sampler to leverage bilinear sampling. */
+    .sampler(14, ImageType::FLOAT_2D, "in_combined_tx")
+    .sampler(15, ImageType::FLOAT_2D, "cryptomatte_tx")
+    .image(0, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "in_weight_img")
+    .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D_ARRAY, "out_weight_img")
+    /* Color History for TAA needs to be sampler to leverage bilinear sampling. */
+    //.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img")
+    .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img")
+    .image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img")
+    .image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
+    .image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
+    .image(7, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
+    .additional_info("eevee_shared")
+    .additional_info("eevee_velocity_camera")
+    .additional_info("draw_view");
+
+GPU_SHADER_CREATE_INFO(eevee_film_frag)
+    .do_static_compilation(true)
+    .fragment_out(0, Type::VEC4, "out_color")
+    .fragment_source("eevee_film_frag.glsl")
+    .additional_info("draw_fullscreen", "eevee_film");
+
+GPU_SHADER_CREATE_INFO(eevee_film_comp)
+    .do_static_compilation(true)
+    .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
+    .compute_source("eevee_film_comp.glsl")
+    .additional_info("eevee_film");
+
+GPU_SHADER_CREATE_INFO(eevee_film_cryptomatte_post)
+    .do_static_compilation(true)
+    .image(0, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
+    .image(1, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "weight_img")
+    .push_constant(Type::INT, "cryptomatte_layer_len")
+    .push_constant(Type::INT, "cryptomatte_samples_per_layer")
+    .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
+    .compute_source("eevee_film_cryptomatte_post_comp.glsl")
+    .additional_info("eevee_shared");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
new file mode 100644
index 00000000000..5e32631a8f8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_data)
+    .sampler(15, ImageType::FLOAT_2D, "hiz_tx")
+    .uniform_buf(5, "HiZData", "hiz_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_update)
+    .do_static_compilation(true)
+    .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ_WRITE, "uint", "finished_tile_counter")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_0")
+    .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_1")
+    .image(2, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_2")
+    .image(3, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_3")
+    .image(4, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_4")
+    .image(5, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_mip_5")
+    .image(6, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_6")
+    .image(7, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_7")
+    .push_constant(Type::BOOL, "update_mip_0")
+    .compute_source("eevee_hiz_update_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_debug)
+    .do_static_compilation(true)
+    .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0)
+    .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1)
+    .fragment_source("eevee_hiz_debug_frag.glsl")
+    .additional_info("eevee_shared", "eevee_hiz_data", "draw_fullscreen");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
new file mode 100644
index 00000000000..41602426a1d
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Shared
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_data)
+    .storage_buf(LIGHT_CULL_BUF_SLOT, Qualifier::READ, "LightCullingData", "light_cull_buf")
+    .storage_buf(LIGHT_BUF_SLOT, Qualifier::READ, "LightData", "light_buf[]")
+    .storage_buf(LIGHT_ZBIN_BUF_SLOT, Qualifier::READ, "uint", "light_zbin_buf[]")
+    .storage_buf(LIGHT_TILE_BUF_SLOT, Qualifier::READ, "uint", "light_tile_buf[]");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Culling
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_select)
+    .do_static_compilation(true)
+    .additional_info("eevee_shared", "draw_view")
+    .local_group_size(CULLING_SELECT_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ_WRITE, "LightCullingData", "light_cull_buf")
+    .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]")
+    .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]")
+    .storage_buf(3, Qualifier::WRITE, "float", "out_zdist_buf[]")
+    .storage_buf(4, Qualifier::WRITE, "uint", "out_key_buf[]")
+    .compute_source("eevee_light_culling_select_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_sort)
+    .do_static_compilation(true)
+    .additional_info("eevee_shared", "draw_view")
+    .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+    .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]")
+    .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]")
+    .storage_buf(3, Qualifier::READ, "float", "in_zdist_buf[]")
+    .storage_buf(4, Qualifier::READ, "uint", "in_key_buf[]")
+    .local_group_size(CULLING_SORT_GROUP_SIZE)
+    .compute_source("eevee_light_culling_sort_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_zbin)
+    .do_static_compilation(true)
+    .additional_info("eevee_shared", "draw_view")
+    .local_group_size(CULLING_ZBIN_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+    .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+    .storage_buf(2, Qualifier::WRITE, "uint", "out_zbin_buf[]")
+    .compute_source("eevee_light_culling_zbin_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_tile)
+    .do_static_compilation(true)
+    .additional_info("eevee_shared", "draw_view")
+    .local_group_size(CULLING_TILE_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+    .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+    .storage_buf(2, Qualifier::WRITE, "uint", "out_light_tile_buf[]")
+    .compute_source("eevee_light_culling_tile_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_debug)
+    .do_static_compilation(true)
+    .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0)
+    .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1)
+    .fragment_source("eevee_light_culling_debug_frag.glsl")
+    .additional_info(
+        "eevee_shared", "draw_view", "draw_fullscreen", "eevee_light_data", "eevee_hiz_data");
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
index a944bea402e..78d52d4b90e 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "eevee_defines.hh"
 #include "gpu_shader_create_info.hh"
 
 /* -------------------------------------------------------------------- */
@@ -12,8 +13,12 @@ GPU_SHADER_CREATE_INFO(eevee_shared)
     .typedef_source("eevee_shader_shared.hh");
 
 GPU_SHADER_CREATE_INFO(eevee_sampling_data)
+    .define("EEVEE_SAMPLING_DATA")
     .additional_info("eevee_shared")
-    .uniform_buf(14, "SamplingData", "sampling_buf");
+    .storage_buf(6, Qualifier::READ, "SamplingData", "sampling_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_utility_texture)
+    .sampler(RBUFS_UTILITY_TEX_SLOT, ImageType::FLOAT_2D_ARRAY, "utility_tx");
 
 /** \} */
 
@@ -27,7 +32,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_mesh)
     .vertex_in(0, Type::VEC3, "pos")
     .vertex_in(1, Type::VEC3, "nor")
     .vertex_source("eevee_geom_mesh_vert.glsl")
-    .additional_info("draw_mesh", "draw_resource_id_varying", "draw_resource_handle");
+    .additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view");
 
 GPU_SHADER_CREATE_INFO(eevee_geom_gpencil)
     .additional_info("eevee_shared")
@@ -49,7 +54,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_world)
     .define("MAT_GEOM_WORLD")
     .builtins(BuiltinBits::VERTEX_ID)
     .vertex_source("eevee_geom_world_vert.glsl")
-    .additional_info("draw_modelmat", "draw_resource_id_varying", "draw_resource_handle");
+    .additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view");
 
 /** \} */
 
@@ -70,6 +75,26 @@ GPU_SHADER_INTERFACE_INFO(eevee_surf_iface, "interp")
 
 #define image_out(slot, qualifier, format, name) \
   image(slot, format, qualifier, ImageType::FLOAT_2D, name, Frequency::PASS)
+#define image_array_out(slot, qualifier, format, name) \
+  image(slot, format, qualifier, ImageType::FLOAT_2D_ARRAY, name, Frequency::PASS)
+
+GPU_SHADER_CREATE_INFO(eevee_aov_out)
+    .define("MAT_AOV_SUPPORT")
+    .image_array_out(RBUFS_AOV_COLOR_SLOT, Qualifier::WRITE, GPU_RGBA16F, "aov_color_img")
+    .image_array_out(RBUFS_AOV_VALUE_SLOT, Qualifier::WRITE, GPU_R16F, "aov_value_img")
+    .storage_buf(RBUFS_AOV_BUF_SLOT, Qualifier::READ, "AOVsInfoData", "aov_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_render_pass_out)
+    .define("MAT_RENDER_PASS_SUPPORT")
+    .image_out(RBUFS_NORMAL_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_normal_img")
+    .image_array_out(RBUFS_LIGHT_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_light_img")
+    .image_out(RBUFS_DIFF_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_diffuse_color_img")
+    .image_out(RBUFS_SPEC_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_specular_color_img")
+    .image_out(RBUFS_EMISSION_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_emission_img");
+
+GPU_SHADER_CREATE_INFO(eevee_cryptomatte_out)
+    .storage_buf(7, Qualifier::READ, "vec2", "cryptomatte_object_buf[]", Frequency::PASS)
+    .image_out(7, Qualifier::WRITE, GPU_RGBA32F, "rp_cryptomatte_img");
 
 GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
     .vertex_out(eevee_surf_iface)
@@ -85,44 +110,51 @@ GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
     // .image_out(3, Qualifier::WRITE, GPU_R11F_G11F_B10F, "gbuff_reflection_color")
     // .image_out(4, Qualifier::WRITE, GPU_RGBA16F, "gbuff_reflection_normal")
     // .image_out(5, Qualifier::WRITE, GPU_R11F_G11F_B10F, "gbuff_emission")
-    /* Renderpasses. */
+    /* Render-passes. */
     // .image_out(6, Qualifier::READ_WRITE, GPU_RGBA16F, "rpass_volume_light")
     /* TODO: AOVs maybe? */
     .fragment_source("eevee_surf_deferred_frag.glsl")
-    // .additional_info("eevee_sampling_data", "eevee_utility_texture")
+    // .additional_info("eevee_aov_out", "eevee_sampling_data", "eevee_utility_texture")
     ;
 
-#undef image_out
-
 GPU_SHADER_CREATE_INFO(eevee_surf_forward)
-    .auto_resource_location(true)
     .vertex_out(eevee_surf_iface)
+    /* Early fragment test is needed for render passes support for forward surfaces. */
+    /* NOTE: This removes the possibility of using gl_FragDepth. */
+    .early_fragment_test(true)
     .fragment_out(0, Type::VEC4, "out_radiance", DualBlend::SRC_0)
     .fragment_out(0, Type::VEC4, "out_transmittance", DualBlend::SRC_1)
     .fragment_source("eevee_surf_forward_frag.glsl")
-    // .additional_info("eevee_sampling_data",
-    //  "eevee_lightprobe_data",
-    /* Optionally added depending on the material. */
-    // "eevee_raytrace_data",
-    // "eevee_transmittance_data",
-    //  "eevee_utility_texture",
-    //  "eevee_light_data",
-    //  "eevee_shadow_data"
-    // )
-    ;
+    .additional_info("eevee_cryptomatte_out",
+                     "eevee_light_data",
+                     "eevee_utility_texture",
+                     "eevee_sampling_data"
+                     // "eevee_lightprobe_data",
+                     // "eevee_shadow_data"
+                     /* Optionally added depending on the material. */
+                     // "eevee_raytrace_data",
+                     // "eevee_transmittance_data",
+                     // "eevee_aov_out",
+                     // "eevee_render_pass_out",
+    );
 
 GPU_SHADER_CREATE_INFO(eevee_surf_depth)
     .vertex_out(eevee_surf_iface)
     .fragment_source("eevee_surf_depth_frag.glsl")
-    // .additional_info("eevee_sampling_data", "eevee_utility_texture")
-    ;
+    .additional_info("eevee_sampling_data", "eevee_utility_texture");
 
 GPU_SHADER_CREATE_INFO(eevee_surf_world)
     .vertex_out(eevee_surf_iface)
+    .push_constant(Type::FLOAT, "world_opacity_fade")
     .fragment_out(0, Type::VEC4, "out_background")
     .fragment_source("eevee_surf_world_frag.glsl")
-    // .additional_info("eevee_utility_texture")
-    ;
+    .additional_info("eevee_aov_out",
+                     "eevee_cryptomatte_out",
+                     "eevee_render_pass_out",
+                     "eevee_utility_texture");
+
+#undef image_out
+#undef image_array_out
 
 /** \} */
 
@@ -161,10 +193,7 @@ GPU_SHADER_CREATE_INFO(eevee_volume_deferred)
 GPU_SHADER_CREATE_INFO(eevee_material_stub).define("EEVEE_MATERIAL_STUBS");
 
 #  define EEVEE_MAT_FINAL_VARIATION(name, ...) \
-    GPU_SHADER_CREATE_INFO(name) \
-        .additional_info(__VA_ARGS__) \
-        .auto_resource_location(true) \
-        .do_static_compilation(true);
+    GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true);
 
 #  define EEVEE_MAT_GEOM_VARIATIONS(prefix, ...) \
     EEVEE_MAT_FINAL_VARIATION(prefix##_world, "eevee_geom_world", __VA_ARGS__) \
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
new file mode 100644
index 00000000000..ec302ec6770
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten)
+    .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+    .uniform_buf(6, "MotionBlurData", "motion_blur_buf")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_img")
+    .compute_source("eevee_motion_blur_flatten_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_viewport)
+    .do_static_compilation(true)
+    .define("FLATTEN_VIEWPORT")
+    .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img")
+    .additional_info("eevee_motion_blur_tiles_flatten");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_render)
+    .do_static_compilation(true)
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img")
+    .additional_info("eevee_motion_blur_tiles_flatten");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_dilate)
+    .do_static_compilation(true)
+    .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+    .additional_info("eevee_shared")
+    /* NOTE: See MotionBlurTileIndirection. */
+    .storage_buf(0, Qualifier::READ_WRITE, "uint", "tile_indirection_buf[]")
+    .image(1, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img")
+    .compute_source("eevee_motion_blur_dilate_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_gather)
+    .do_static_compilation(true)
+    .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_sampling_data")
+    .uniform_buf(6, "MotionBlurData", "motion_blur_buf")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .sampler(1, ImageType::FLOAT_2D, "velocity_tx")
+    .sampler(2, ImageType::FLOAT_2D, "in_color_tx")
+    /* NOTE: See MotionBlurTileIndirection. */
+    .storage_buf(0, Qualifier::READ, "uint", "tile_indirection_buf[]")
+    .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .compute_source("eevee_motion_blur_gather_comp.glsl");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
index a5f16363466..0a1c2721c61 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
@@ -1,4 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "eevee_defines.hh"
 #include "gpu_shader_create_info.hh"
 
 /* -------------------------------------------------------------------- */
@@ -16,40 +18,22 @@ GPU_SHADER_INTERFACE_INFO(eevee_velocity_surface_iface, "motion")
 
 GPU_SHADER_CREATE_INFO(eevee_velocity_camera)
     .define("VELOCITY_CAMERA")
-    .uniform_buf(1, "CameraData", "camera_prev")
-    .uniform_buf(2, "CameraData", "camera_curr")
-    .uniform_buf(3, "CameraData", "camera_next");
+    .uniform_buf(VELOCITY_CAMERA_PREV_BUF, "CameraData", "camera_prev")
+    .uniform_buf(VELOCITY_CAMERA_CURR_BUF, "CameraData", "camera_curr")
+    .uniform_buf(VELOCITY_CAMERA_NEXT_BUF, "CameraData", "camera_next");
 
 GPU_SHADER_CREATE_INFO(eevee_velocity_geom)
     .define("MAT_VELOCITY")
-    .auto_resource_location(true)
-    .storage_buf(4, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]", Frequency::PASS)
-    .storage_buf(5, Qualifier::READ, "mat4", "velocity_obj_next_buf[]", Frequency::PASS)
-    .storage_buf(6, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]", Frequency::PASS)
-    .storage_buf(7, Qualifier::READ, "vec4", "velocity_geo_next_buf[]", Frequency::PASS)
-    .storage_buf(
-        7, Qualifier::READ, "VelocityIndex", "velocity_indirection_buf[]", Frequency::PASS)
+    .storage_buf(VELOCITY_OBJ_PREV_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]")
+    .storage_buf(VELOCITY_OBJ_NEXT_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_next_buf[]")
+    .storage_buf(VELOCITY_GEO_PREV_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]")
+    .storage_buf(VELOCITY_GEO_NEXT_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_next_buf[]")
+    .storage_buf(VELOCITY_INDIRECTION_BUF_SLOT,
+                 Qualifier::READ,
+                 "VelocityIndex",
+                 "velocity_indirection_buf[]")
     .vertex_out(eevee_velocity_surface_iface)
-    .fragment_out(0, Type::VEC4, "out_velocity_view")
+    .fragment_out(0, Type::VEC4, "out_velocity")
     .additional_info("eevee_velocity_camera");
 
 /** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Velocity Resolve
- *
- * Computes velocity for static objects.
- * Also converts motion to camera space (as opposed to view space) if needed.
- * \{ */
-
-GPU_SHADER_CREATE_INFO(eevee_velocity_resolve)
-    .do_static_compilation(true)
-    .local_group_size(8, 8)
-    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
-    .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_view_img")
-    .image(1, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "velocity_camera_img")
-    .additional_info("eevee_shared")
-    .compute_source("eevee_velocity_resolve_comp.glsl")
-    .additional_info("draw_view", "eevee_velocity_camera");
-
-/** \} */
diff --git a/source/blender/draw/engines/external/external_engine.c b/source/blender/draw/engines/external/external_engine.c
index b9c09e2bc4f..3f047d8de68 100644
--- a/source/blender/draw/engines/external/external_engine.c
+++ b/source/blender/draw/engines/external/external_engine.c
@@ -236,7 +236,11 @@ static void external_draw_scene_do_v3d(void *vedata)
   RegionView3D *rv3d = draw_ctx->rv3d;
   ARegion *region = draw_ctx->region;
 
-  DRW_state_reset_ex(DRW_STATE_DEFAULT & ~DRW_STATE_DEPTH_LESS_EQUAL);
+  DRW_state_reset_ex(DRW_STATE_WRITE_COLOR);
+
+  /* The external engine can use the OpenGL rendering API directly, so make sure the state is
+   * already applied. */
+  GPU_apply_state();
 
   /* Create render engine. */
   if (!rv3d->render_engine) {
@@ -332,6 +336,12 @@ static void external_draw_scene_do_image(void *UNUSED(vedata))
   BLI_assert(re != NULL);
   BLI_assert(engine != NULL);
 
+  DRW_state_reset_ex(DRW_STATE_WRITE_COLOR);
+
+  /* The external engine can use the OpenGL rendering API directly, so make sure the state is
+   * already applied. */
+  GPU_apply_state();
+
   const DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
 
   /* Clear the depth buffer to the value used by the background overlay so that the overlay is not
diff --git a/source/blender/draw/engines/gpencil/gpencil_draw_data.c b/source/blender/draw/engines/gpencil/gpencil_draw_data.c
index 65ddb80ad55..e54ac99a888 100644
--- a/source/blender/draw/engines/gpencil/gpencil_draw_data.c
+++ b/source/blender/draw/engines/gpencil/gpencil_draw_data.c
@@ -460,7 +460,7 @@ GPENCIL_ViewLayerData *GPENCIL_view_layer_data_ensure(void)
   GPENCIL_ViewLayerData **vldata = (GPENCIL_ViewLayerData **)DRW_view_layer_engine_data_ensure(
       &draw_engine_gpencil_type, gpencil_view_layer_data_free);
 
-  /* NOTE(&fclem): Putting this stuff in viewlayer means it is shared by all viewports.
+  /* NOTE(@fclem): Putting this stuff in view-layer means it is shared by all viewports.
    * For now it is ok, but in the future, it could become a problem if we implement
    * the caching system. */
   if (*vldata == NULL) {
diff --git a/source/blender/draw/engines/gpencil/gpencil_engine.c b/source/blender/draw/engines/gpencil/gpencil_engine.c
index 4f520e61936..42c396a0d43 100644
--- a/source/blender/draw/engines/gpencil/gpencil_engine.c
+++ b/source/blender/draw/engines/gpencil/gpencil_engine.c
@@ -799,7 +799,7 @@ static void gpencil_draw_mask(GPENCIL_Data *vedata, GPENCIL_tObject *ob, GPENCIL
     }
 
     GPENCIL_tLayer *mask_layer = gpencil_layer_cache_get(ob, i);
-    /* When filtering by viewlayer, the mask could be null and must be ignored. */
+    /* When filtering by view-layer, the mask could be null and must be ignored. */
     if (mask_layer == NULL) {
       continue;
     }
diff --git a/source/blender/draw/engines/gpencil/gpencil_engine.h b/source/blender/draw/engines/gpencil/gpencil_engine.h
index 332c7f67c64..2f9d20b3902 100644
--- a/source/blender/draw/engines/gpencil/gpencil_engine.h
+++ b/source/blender/draw/engines/gpencil/gpencil_engine.h
@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+#define GP_LIGHT
+
 #include "gpencil_defines.h"
 #include "gpencil_shader_shared.h"
 
diff --git a/source/blender/draw/engines/gpencil/gpencil_shader_shared.h b/source/blender/draw/engines/gpencil/gpencil_shader_shared.h
index 50ff7e7efc7..4c621e955b9 100644
--- a/source/blender/draw/engines/gpencil/gpencil_shader_shared.h
+++ b/source/blender/draw/engines/gpencil/gpencil_shader_shared.h
@@ -7,7 +7,9 @@
 typedef struct gpMaterial gpMaterial;
 typedef struct gpLight gpLight;
 typedef enum gpMaterialFlag gpMaterialFlag;
+#    ifdef GP_LIGHT
 typedef enum gpLightType gpLightType;
+#    endif
 #  endif
 #endif
 
@@ -75,8 +77,9 @@ struct gpMaterial {
 };
 BLI_STATIC_ASSERT_ALIGN(gpMaterial, 16)
 
+#ifdef GP_LIGHT
 struct gpLight {
-#ifndef GPU_SHADER
+#  ifndef GPU_SHADER
   float3 color;
   gpLightType type;
   float3 right;
@@ -87,7 +90,7 @@ struct gpLight {
   float _pad0;
   float3 position;
   float _pad1;
-#else
+#  else
   /* Some drivers are completely messing the alignment or the fetches here.
    * We are forced to pack these into vec4 otherwise we only get 0.0 as value. */
   /* NOTE(@fclem): This was the case on MacOS OpenGL implementation.
@@ -97,17 +100,18 @@ struct gpLight {
   float4 packed2;
   float4 packed3;
   float4 packed4;
-#  define _color packed0.xyz
-#  define _type packed0.w
-#  define _right packed1.xyz
-#  define _spot_size packed1.w
-#  define _up packed2.xyz
-#  define _spot_blend packed2.w
-#  define _forward packed3.xyz
-#  define _position packed4.xyz
-#endif
+#    define _color packed0.xyz
+#    define _type packed0.w
+#    define _right packed1.xyz
+#    define _spot_size packed1.w
+#    define _up packed2.xyz
+#    define _spot_blend packed2.w
+#    define _forward packed3.xyz
+#    define _position packed4.xyz
+#  endif
 };
 BLI_STATIC_ASSERT_ALIGN(gpLight, 16)
+#endif
 
 #ifndef GPU_SHADER
 #  undef gpMaterialFlag
diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl
index 75bd3d30d68..6671c16aa0b 100644
--- a/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl
+++ b/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl
@@ -344,7 +344,7 @@ float stroke_thickness_modulate(float thickness)
   }
   else {
     /* World space point size. */
-    thickness *= thicknessWorldScale * ProjectionMatrix[1][1] * sizeViewport.y;
+    thickness *= thicknessWorldScale * drw_view.winmat[1][1] * sizeViewport.y;
   }
   return thickness;
 }
diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl
index e162c5bf45e..2fca8b69183 100644
--- a/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl
+++ b/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl
@@ -5,5 +5,5 @@ void main()
   int v = gl_VertexID % 3;
   float x = -1.0 + float((v & 1) << 2);
   float y = -1.0 + float((v & 2) << 1);
-  gl_Position = ViewProjectionMatrix * (model_matrix * vec4(x, y, 0.0, 1.0));
+  gl_Position = drw_view.persmat * (model_matrix * vec4(x, y, 0.0, 1.0));
 }
diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl
index af8aec85598..b0ee059cb9d 100644
--- a/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl
+++ b/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl
@@ -32,7 +32,7 @@ void main()
   vec3 vert_N;
 
   gpMaterial gp_mat = materials[ma1.x + gpMaterialOffset];
-  gpMaterialFlag gp_flag = floatBitsToInt(gp_mat._flag);
+  gpMaterialFlag gp_flag = floatBitsToUint(gp_mat._flag);
 
   gl_Position = gpencil_vertex(ma,
                                ma1,
@@ -125,7 +125,7 @@ void main()
     gpencil_color_output(fill_col, fcol_decode, 1.0, gp_mat._fill_texture_mix);
 
     gp_interp.mat_flag = gp_flag & GP_FILL_FLAGS;
-    gp_interp.mat_flag |= uint(ma1.x) << GPENCIl_MATID_SHIFT;
+    gp_interp.mat_flag |= uint(ma1.x + gpMaterialOffset) << GPENCIl_MATID_SHIFT;
 
     gp_interp.uv = mat2(gp_mat.fill_uv_rot_scale.xy, gp_mat.fill_uv_rot_scale.zw) * uv1.xy +
                    gp_mat._fill_uv_offset;
diff --git a/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh b/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh
index 3b4de704c00..1db98d13c4a 100644
--- a/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh
+++ b/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh
@@ -20,8 +20,8 @@ GPU_SHADER_INTERFACE_INFO(gpencil_geometry_iface, "gp_interp")
 
 GPU_SHADER_CREATE_INFO(gpencil_geometry)
     .do_static_compilation(true)
+    .define("GP_LIGHT")
     .typedef_source("gpencil_defines.h")
-    .typedef_source("gpencil_shader_shared.h")
     .sampler(0, ImageType::FLOAT_2D, "gpFillTexture")
     .sampler(1, ImageType::FLOAT_2D, "gpStrokeTexture")
     .sampler(2, ImageType::DEPTH_2D, "gpSceneDepthTexture")
diff --git a/source/blender/draw/engines/overlay/overlay_antialiasing.c b/source/blender/draw/engines/overlay/overlay_antialiasing.c
index 27ee479cf36..780915b7fc4 100644
--- a/source/blender/draw/engines/overlay/overlay_antialiasing.c
+++ b/source/blender/draw/engines/overlay/overlay_antialiasing.c
@@ -52,7 +52,7 @@ void OVERLAY_antialiasing_init(OVERLAY_Data *vedata)
   OVERLAY_PrivateData *pd = vedata->stl->pd;
   DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
 
-  /* Small texture which will have very small impact on rendertime. */
+  /* Small texture which will have very small impact on render-time. */
   if (txl->dummy_depth_tx == NULL) {
     const float pixel[1] = {1.0f};
     txl->dummy_depth_tx = DRW_texture_create_2d(1, 1, GPU_DEPTH_COMPONENT24, 0, pixel);
diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c
index ea0c2f287a6..df5ee6a18c0 100644
--- a/source/blender/draw/engines/overlay/overlay_armature.c
+++ b/source/blender/draw/engines/overlay/overlay_armature.c
@@ -2102,7 +2102,7 @@ static void pchan_culling_calc_bsphere(const Object *ob,
 {
   float min[3], max[3];
   INIT_MINMAX(min, max);
-  BKE_pchan_minmax(ob, pchan, min, max);
+  BKE_pchan_minmax(ob, pchan, true, min, max);
   mid_v3_v3v3(r_bsphere->center, min, max);
   r_bsphere->radius = len_v3v3(min, r_bsphere->center);
 }
@@ -2220,7 +2220,7 @@ static void draw_armature_edit(ArmatureDrawContext *ctx)
   const bool show_text = DRW_state_show_text();
 
   const Object *ob_orig = DEG_get_original_object(ob);
-  /* FIXME(campbell): We should be able to use the CoW object,
+  /* FIXME(@campbellbarton): We should be able to use the CoW object,
    * however the active bone isn't updated. Long term solution is an 'EditArmature' struct.
    * for now we can draw from the original armature. See: T66773. */
   // bArmature *arm = ob->data;
diff --git a/source/blender/draw/engines/overlay/overlay_edit_text.c b/source/blender/draw/engines/overlay/overlay_edit_text.c
index dfef5b3c241..bd8720042f1 100644
--- a/source/blender/draw/engines/overlay/overlay_edit_text.c
+++ b/source/blender/draw/engines/overlay/overlay_edit_text.c
@@ -7,6 +7,8 @@
 
 #include "DRW_render.h"
 
+#include "UI_resources.h"
+
 #include "BKE_vfont.h"
 
 #include "DNA_curve_types.h"
@@ -38,17 +40,24 @@ void OVERLAY_edit_text_cache_init(OVERLAY_Data *vedata)
     DRW_shgroup_uniform_vec4_copy(grp, "color", G_draw.block.color_wire);
   }
   {
+    /* Cursor (text caret). */
     state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA;
-    DRW_PASS_CREATE(psl->edit_text_overlay_ps, state | pd->clipping_state);
-
+    DRW_PASS_CREATE(psl->edit_text_cursor_ps, state | pd->clipping_state);
     sh = OVERLAY_shader_uniform_color();
-    pd->edit_text_overlay_grp = grp = DRW_shgroup_create(sh, psl->edit_text_overlay_ps);
+    pd->edit_text_cursor_grp = grp = DRW_shgroup_create(sh, psl->edit_text_cursor_ps);
+    DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.cursor_color, 1);
 
-    DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.overlay_color, 1);
+    /* Selection boxes. */
+    state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA;
+    DRW_PASS_CREATE(psl->edit_text_selection_ps, state | pd->clipping_state);
+    sh = OVERLAY_shader_uniform_color();
+    pd->edit_text_selection_grp = grp = DRW_shgroup_create(sh, psl->edit_text_selection_ps);
+    DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.selection_color, 1);
 
-    state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_MUL | DRW_STATE_DEPTH_GREATER_EQUAL |
+    /* Highlight text within selection boxes. */
+    state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA | DRW_STATE_DEPTH_GREATER_EQUAL |
             pd->clipping_state;
-    DRW_PASS_INSTANCE_CREATE(psl->edit_text_darken_ps, psl->edit_text_overlay_ps, state);
+    DRW_PASS_INSTANCE_CREATE(psl->edit_text_highlight_ps, psl->edit_text_selection_ps, state);
   }
   {
     /* Create view which will render everything (hopefully) behind the text geometry. */
@@ -112,7 +121,7 @@ static void edit_text_cache_populate_select(OVERLAY_Data *vedata, Object *ob)
     v2_quad_corners_to_mat4(box, final_mat);
     mul_m4_m4m4(final_mat, ob->obmat, final_mat);
 
-    DRW_shgroup_call_obmat(pd->edit_text_overlay_grp, geom, final_mat);
+    DRW_shgroup_call_obmat(pd->edit_text_selection_grp, geom, final_mat);
   }
 }
 
@@ -128,7 +137,7 @@ static void edit_text_cache_populate_cursor(OVERLAY_Data *vedata, Object *ob)
   mul_m4_m4m4(mat, ob->obmat, mat);
 
   struct GPUBatch *geom = DRW_cache_quad_get();
-  DRW_shgroup_call_obmat(pd->edit_text_overlay_grp, geom, mat);
+  DRW_shgroup_call_obmat(pd->edit_text_cursor_grp, geom, mat);
 }
 
 static void edit_text_cache_populate_boxes(OVERLAY_Data *vedata, Object *ob)
@@ -193,11 +202,18 @@ void OVERLAY_edit_text_draw(OVERLAY_Data *vedata)
 
   DRW_view_set_active(pd->view_edit_text);
 
-  /* Alpha blended. */
-  copy_v4_fl4(pd->edit_text.overlay_color, 0.8f, 0.8f, 0.8f, 0.5f);
-  DRW_draw_pass(psl->edit_text_overlay_ps);
+  /* Selection Boxes. */
+  UI_GetThemeColor4fv(TH_WIDGET_TEXT_SELECTION, pd->edit_text.selection_color);
+  srgb_to_linearrgb_v4(pd->edit_text.selection_color, pd->edit_text.selection_color);
+  DRW_draw_pass(psl->edit_text_selection_ps);
+
+  /* Highlight text within selection boxes. */
+  UI_GetThemeColor4fv(TH_WIDGET_TEXT_HIGHLIGHT, pd->edit_text.selection_color);
+  srgb_to_linearrgb_v4(pd->edit_text.selection_color, pd->edit_text.selection_color);
+  DRW_draw_pass(psl->edit_text_highlight_ps);
 
-  /* Multiply previous result where depth test fail. */
-  copy_v4_fl4(pd->edit_text.overlay_color, 0.0f, 0.0f, 0.0f, 1.0f);
-  DRW_draw_pass(psl->edit_text_darken_ps);
+  /* Cursor (text caret). */
+  UI_GetThemeColor4fv(TH_WIDGET_TEXT_CURSOR, pd->edit_text.cursor_color);
+  srgb_to_linearrgb_v4(pd->edit_text.cursor_color, pd->edit_text.cursor_color);
+  DRW_draw_pass(psl->edit_text_cursor_ps);
 }
diff --git a/source/blender/draw/engines/overlay/overlay_edit_uv.c b/source/blender/draw/engines/overlay/overlay_edit_uv.c
index 4cfe9fcea4e..d2737d73333 100644
--- a/source/blender/draw/engines/overlay/overlay_edit_uv.c
+++ b/source/blender/draw/engines/overlay/overlay_edit_uv.c
@@ -160,7 +160,6 @@ void OVERLAY_edit_uv_init(OVERLAY_Data *vedata)
   pd->edit_uv.draw_type = sima->dt_uvstretch;
   BLI_listbase_clear(&pd->edit_uv.totals);
   pd->edit_uv.total_area_ratio = 0.0f;
-  pd->edit_uv.total_area_ratio_inv = 0.0f;
 
   /* During engine initialization phase the `sima` isn't locked and
    * we are able to retrieve the needed data.
@@ -280,8 +279,6 @@ void OVERLAY_edit_uv_cache_init(OVERLAY_Data *vedata)
       DRW_shgroup_uniform_block(pd->edit_uv_stretching_grp, "globalsBlock", G_draw.block_ubo);
       DRW_shgroup_uniform_float(
           pd->edit_uv_stretching_grp, "totalAreaRatio", &pd->edit_uv.total_area_ratio, 1);
-      DRW_shgroup_uniform_float(
-          pd->edit_uv_stretching_grp, "totalAreaRatioInv", &pd->edit_uv.total_area_ratio_inv, 1);
     }
   }
 
@@ -411,7 +408,7 @@ void OVERLAY_edit_uv_cache_init(OVERLAY_Data *vedata)
       draw_ctx->obact->type == OB_MESH) {
     uint objects_len = 0;
     Object **objects = BKE_view_layer_array_from_objects_in_mode_unique_data(
-        draw_ctx->view_layer, NULL, &objects_len, draw_ctx->object_mode);
+        draw_ctx->scene, draw_ctx->view_layer, NULL, &objects_len, draw_ctx->object_mode);
     for (uint ob_index = 0; ob_index < objects_len; ob_index++) {
       Object *object_eval = DEG_get_evaluated_object(draw_ctx->depsgraph, objects[ob_index]);
       DRW_mesh_batch_cache_validate(object_eval, (Mesh *)object_eval->data);
@@ -510,7 +507,6 @@ static void edit_uv_stretching_update_ratios(OVERLAY_Data *vedata)
 
     if (total_area > FLT_EPSILON && total_area_uv > FLT_EPSILON) {
       pd->edit_uv.total_area_ratio = total_area / total_area_uv;
-      pd->edit_uv.total_area_ratio_inv = total_area_uv / total_area;
     }
   }
   BLI_freelistN(&pd->edit_uv.totals);
diff --git a/source/blender/draw/engines/overlay/overlay_engine.c b/source/blender/draw/engines/overlay/overlay_engine.c
index f8c28394b16..6e2da95e405 100644
--- a/source/blender/draw/engines/overlay/overlay_engine.c
+++ b/source/blender/draw/engines/overlay/overlay_engine.c
@@ -192,6 +192,8 @@ static void OVERLAY_cache_init(void *vedata)
       OVERLAY_edit_curves_cache_init(vedata);
       break;
     case CTX_MODE_SCULPT_CURVES:
+      OVERLAY_sculpt_curves_cache_init(vedata);
+      break;
     case CTX_MODE_OBJECT:
       break;
     default:
@@ -308,13 +310,16 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob)
                                      (pd->ctx_mode == CTX_MODE_PARTICLE);
   const bool in_paint_mode = (ob == draw_ctx->obact) &&
                              (draw_ctx->object_mode & OB_MODE_ALL_PAINT);
+  const bool in_sculpt_curve_mode = (ob == draw_ctx->obact) &&
+                                    (draw_ctx->object_mode & OB_MODE_SCULPT_CURVES);
   const bool in_sculpt_mode = (ob == draw_ctx->obact) && (ob->sculpt != NULL) &&
                               (ob->sculpt->mode_type == OB_MODE_SCULPT);
+  const bool in_curves_sculpt_mode = (ob == draw_ctx->obact) &&
+                                     (ob->mode == OB_MODE_SCULPT_CURVES);
   const bool has_surface = ELEM(ob->type,
                                 OB_MESH,
                                 OB_CURVES_LEGACY,
                                 OB_SURF,
-                                OB_MBALL,
                                 OB_FONT,
                                 OB_GPENCIL,
                                 OB_CURVES,
@@ -329,8 +334,8 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob)
   const bool draw_bones = (pd->overlay.flag & V3D_OVERLAY_HIDE_BONES) == 0;
   const bool draw_wires = draw_surface && has_surface &&
                           (pd->wireframe_mode || !pd->hide_overlays);
-  const bool draw_outlines = !in_edit_mode && !in_paint_mode && renderable && has_surface &&
-                             !instance_parent_in_edit_mode &&
+  const bool draw_outlines = !in_edit_mode && !in_paint_mode && !in_sculpt_curve_mode &&
+                             renderable && has_surface && !instance_parent_in_edit_mode &&
                              (pd->v3d_flag & V3D_SELECT_OUTLINE) &&
                              (ob->base_flag & BASE_SELECTED);
   const bool draw_bone_selection = (ob->type == OB_MESH) && pd->armature.do_pose_fade_geom &&
@@ -428,6 +433,9 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob)
   if (in_sculpt_mode) {
     OVERLAY_sculpt_cache_populate(vedata, ob);
   }
+  else if (in_curves_sculpt_mode) {
+    OVERLAY_sculpt_curves_cache_populate(vedata, ob);
+  }
 
   if (draw_motion_paths) {
     OVERLAY_motion_path_cache_populate(vedata, ob);
@@ -591,6 +599,9 @@ static void OVERLAY_draw_scene(void *vedata)
     case CTX_MODE_SCULPT:
       OVERLAY_sculpt_draw(vedata);
       break;
+    case CTX_MODE_SCULPT_CURVES:
+      OVERLAY_sculpt_curves_draw(vedata);
+      break;
     case CTX_MODE_EDIT_MESH:
     case CTX_MODE_POSE:
     case CTX_MODE_PAINT_WEIGHT:
diff --git a/source/blender/draw/engines/overlay/overlay_extra.c b/source/blender/draw/engines/overlay/overlay_extra.c
index f875254a685..5d80ab3d0ea 100644
--- a/source/blender/draw/engines/overlay/overlay_extra.c
+++ b/source/blender/draw/engines/overlay/overlay_extra.c
@@ -1315,9 +1315,14 @@ static void OVERLAY_relationship_lines(OVERLAY_ExtraCallBuffers *cb,
         if ((curcon->ui_expand_flag & (1 << 0)) && BKE_constraint_targets_get(curcon, &targets)) {
           bConstraintTarget *ct;
 
+          BKE_constraint_custom_object_space_init(cob, curcon);
+
           for (ct = targets.first; ct; ct = ct->next) {
             /* calculate target's matrix */
-            if (cti->get_target_matrix) {
+            if (ct->flag & CONSTRAINT_TAR_CUSTOM_SPACE) {
+              copy_m4_m4(ct->matrix, cob->space_obj_world_matrix);
+            }
+            else if (cti->get_target_matrix) {
               cti->get_target_matrix(depsgraph, curcon, cob, ct, DEG_get_ctime(depsgraph));
             }
             else {
@@ -1353,7 +1358,7 @@ static void OVERLAY_volume_extra(OVERLAY_ExtraCallBuffers *cb,
 
   /* Don't show smoke before simulation starts, this could be made an option in the future. */
   const bool draw_velocity = (fds->draw_velocity && fds->fluid &&
-                              CFRA >= fds->point_cache[0]->startframe);
+                              scene->r.cfra >= fds->point_cache[0]->startframe);
 
   /* Show gridlines only for slices with no interpolation. */
   const bool show_gridlines = (fds->show_gridlines && fds->fluid &&
@@ -1480,11 +1485,12 @@ static void OVERLAY_volume_extra(OVERLAY_ExtraCallBuffers *cb,
 static void OVERLAY_object_center(OVERLAY_ExtraCallBuffers *cb,
                                   Object *ob,
                                   OVERLAY_PrivateData *pd,
+                                  const Scene *scene,
                                   ViewLayer *view_layer)
 {
   const bool is_library = ID_REAL_USERS(&ob->id) > 1 || ID_IS_LINKED(ob);
-
-  if (ob == OBACT(view_layer)) {
+  BKE_view_layer_synced_ensure(scene, view_layer);
+  if (ob == BKE_view_layer_active_object_get(view_layer)) {
     DRW_buffer_add_entry(cb->center_active, ob->obmat[3]);
   }
   else if (ob->base_flag & BASE_SELECTED) {
@@ -1546,8 +1552,9 @@ void OVERLAY_extra_cache_populate(OVERLAY_Data *vedata, Object *ob)
                            (md = BKE_modifiers_findby_type(ob, eModifierType_Fluid)) &&
                            (BKE_modifier_is_enabled(scene, md, eModifierMode_Realtime)) &&
                            (((FluidModifierData *)md)->domain != NULL) &&
-                           (CFRA >= (((FluidModifierData *)md)->domain->cache_frame_start)) &&
-                           (CFRA <= (((FluidModifierData *)md)->domain->cache_frame_end));
+                           (scene->r.cfra >=
+                            (((FluidModifierData *)md)->domain->cache_frame_start)) &&
+                           (scene->r.cfra <= (((FluidModifierData *)md)->domain->cache_frame_end));
 
   float *color;
   int theme_id = DRW_object_wire_theme_get(ob, view_layer, &color);
@@ -1567,7 +1574,7 @@ void OVERLAY_extra_cache_populate(OVERLAY_Data *vedata, Object *ob)
   /* don't show object extras in set's */
   if (!from_dupli) {
     if (draw_obcenters) {
-      OVERLAY_object_center(cb, ob, pd, view_layer);
+      OVERLAY_object_center(cb, ob, pd, scene, view_layer);
     }
     if (draw_relations) {
       OVERLAY_relationship_lines(cb, draw_ctx->depsgraph, draw_ctx->scene, ob);
diff --git a/source/blender/draw/engines/overlay/overlay_outline.c b/source/blender/draw/engines/overlay/overlay_outline.c
index eea9a1a1bef..f2e2acc98a9 100644
--- a/source/blender/draw/engines/overlay/overlay_outline.c
+++ b/source/blender/draw/engines/overlay/overlay_outline.c
@@ -133,6 +133,10 @@ void OVERLAY_outline_cache_init(OVERLAY_Data *vedata)
     pd->outlines_gpencil_grp = grp = DRW_shgroup_create(sh_gpencil, psl->outlines_prepass_ps);
     DRW_shgroup_uniform_bool_copy(grp, "isTransform", (G.moving & G_TRANSFORM_OBJ) != 0);
     DRW_shgroup_uniform_float_copy(grp, "gpStrokeIndexOffset", 0.0);
+
+    GPUShader *sh_curves = OVERLAY_shader_outline_prepass_curves();
+    pd->outlines_curves_grp = grp = DRW_shgroup_create(sh_curves, psl->outlines_prepass_ps);
+    DRW_shgroup_uniform_bool_copy(grp, "isTransform", (G.moving & G_TRANSFORM_OBJ) != 0);
   }
 
   /* outlines_prepass_ps is still needed for selection of probes. */
@@ -267,6 +271,12 @@ static void OVERLAY_outline_volume(OVERLAY_PrivateData *pd, Object *ob)
   DRW_shgroup_call(shgroup, geom, ob);
 }
 
+static void OVERLAY_outline_curves(OVERLAY_PrivateData *pd, Object *ob)
+{
+  DRWShadingGroup *shgroup = pd->outlines_curves_grp;
+  DRW_shgroup_curves_create_sub(ob, shgroup, NULL);
+}
+
 void OVERLAY_outline_cache_populate(OVERLAY_Data *vedata,
                                     Object *ob,
                                     OVERLAY_DupliData *dupli,
@@ -293,6 +303,11 @@ void OVERLAY_outline_cache_populate(OVERLAY_Data *vedata,
     return;
   }
 
+  if (ob->type == OB_CURVES) {
+    OVERLAY_outline_curves(pd, ob);
+    return;
+  }
+
   if (ob->type == OB_POINTCLOUD && pd->wireframe_mode) {
     /* Looks bad in this case. Could be relaxed if we draw a
      * wireframe of some sort in the future. */
diff --git a/source/blender/draw/engines/overlay/overlay_private.h b/source/blender/draw/engines/overlay/overlay_private.h
index 23c20a186a0..0a783c44029 100644
--- a/source/blender/draw/engines/overlay/overlay_private.h
+++ b/source/blender/draw/engines/overlay/overlay_private.h
@@ -78,8 +78,9 @@ typedef struct OVERLAY_PassList {
   DRWPass *edit_mesh_analysis_ps;
   DRWPass *edit_mesh_normals_ps;
   DRWPass *edit_particle_ps;
-  DRWPass *edit_text_overlay_ps;
-  DRWPass *edit_text_darken_ps;
+  DRWPass *edit_text_cursor_ps;
+  DRWPass *edit_text_selection_ps;
+  DRWPass *edit_text_highlight_ps;
   DRWPass *edit_text_wire_ps[2];
   DRWPass *edit_uv_edges_ps;
   DRWPass *edit_uv_verts_ps;
@@ -116,6 +117,7 @@ typedef struct OVERLAY_PassList {
   DRWPass *particle_ps;
   DRWPass *pointcloud_ps;
   DRWPass *sculpt_mask_ps;
+  DRWPass *sculpt_curves_selection_ps;
   DRWPass *volume_ps;
   DRWPass *wireframe_ps;
   DRWPass *wireframe_xray_ps;
@@ -251,7 +253,8 @@ typedef struct OVERLAY_PrivateData {
   DRWShadingGroup *edit_mesh_analysis_grp;
   DRWShadingGroup *edit_particle_strand_grp;
   DRWShadingGroup *edit_particle_point_grp;
-  DRWShadingGroup *edit_text_overlay_grp;
+  DRWShadingGroup *edit_text_cursor_grp;
+  DRWShadingGroup *edit_text_selection_grp;
   DRWShadingGroup *edit_text_wire_grp[2];
   DRWShadingGroup *edit_uv_verts_grp;
   DRWShadingGroup *edit_uv_edges_grp;
@@ -267,6 +270,7 @@ typedef struct OVERLAY_PrivateData {
   DRWShadingGroup *motion_path_lines_grp;
   DRWShadingGroup *motion_path_points_grp;
   DRWShadingGroup *outlines_grp;
+  DRWShadingGroup *outlines_curves_grp;
   DRWShadingGroup *outlines_ptcloud_grp;
   DRWShadingGroup *outlines_gpencil_grp;
   DRWShadingGroup *paint_depth_grp;
@@ -279,6 +283,7 @@ typedef struct OVERLAY_PrivateData {
   DRWShadingGroup *particle_shapes_grp;
   DRWShadingGroup *pointcloud_dots_grp;
   DRWShadingGroup *sculpt_mask_grp;
+  DRWShadingGroup *sculpt_curves_selection_grp;
   DRWShadingGroup *volume_selection_surface_grp;
   DRWShadingGroup *wires_grp[2][2];      /* With and without coloring. */
   DRWShadingGroup *wires_all_grp[2][2];  /* With and without coloring. */
@@ -335,7 +340,8 @@ typedef struct OVERLAY_PrivateData {
     int handle_display;
   } edit_curve;
   struct {
-    float overlay_color[4];
+    float cursor_color[4];
+    float selection_color[4];
   } edit_text;
   struct {
     bool do_zbufclip;
@@ -381,7 +387,6 @@ typedef struct OVERLAY_PrivateData {
     eSpaceImage_UVDT_Stretch draw_type;
     ListBase totals;
     float total_area_ratio;
-    float total_area_ratio_inv;
 
     /* stencil overlay */
     struct Image *stencil_image;
@@ -669,6 +674,10 @@ void OVERLAY_sculpt_cache_init(OVERLAY_Data *vedata);
 void OVERLAY_sculpt_cache_populate(OVERLAY_Data *vedata, Object *ob);
 void OVERLAY_sculpt_draw(OVERLAY_Data *vedata);
 
+void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata);
+void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *ob);
+void OVERLAY_sculpt_curves_draw(OVERLAY_Data *vedata);
+
 void OVERLAY_wireframe_init(OVERLAY_Data *vedata);
 void OVERLAY_wireframe_cache_init(OVERLAY_Data *vedata);
 void OVERLAY_wireframe_cache_populate(OVERLAY_Data *vedata,
@@ -737,6 +746,7 @@ GPUShader *OVERLAY_shader_motion_path_line(void);
 GPUShader *OVERLAY_shader_motion_path_vert(void);
 GPUShader *OVERLAY_shader_uniform_color(void);
 GPUShader *OVERLAY_shader_outline_prepass(bool use_wire);
+GPUShader *OVERLAY_shader_outline_prepass_curves(void);
 GPUShader *OVERLAY_shader_outline_prepass_gpencil(void);
 GPUShader *OVERLAY_shader_outline_prepass_pointcloud(void);
 GPUShader *OVERLAY_shader_extra_grid(void);
@@ -750,6 +760,7 @@ GPUShader *OVERLAY_shader_paint_wire(void);
 GPUShader *OVERLAY_shader_particle_dot(void);
 GPUShader *OVERLAY_shader_particle_shape(void);
 GPUShader *OVERLAY_shader_sculpt_mask(void);
+GPUShader *OVERLAY_shader_sculpt_curves_selection(void);
 GPUShader *OVERLAY_shader_volume_velocity(bool use_needle, bool use_mac);
 GPUShader *OVERLAY_shader_volume_gridlines(bool color_with_flags, bool color_range);
 GPUShader *OVERLAY_shader_wireframe(bool custom_bias);
diff --git a/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
new file mode 100644
index 00000000000..b8021124f27
--- /dev/null
+++ b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw_engine
+ */
+
+#include "DRW_render.h"
+
+#include "draw_cache_impl.h"
+#include "overlay_private.h"
+
+#include "BKE_curves.hh"
+
+void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata)
+{
+  OVERLAY_PassList *psl = vedata->psl;
+  OVERLAY_PrivateData *pd = vedata->stl->pd;
+
+  const DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_BLEND_ALPHA;
+  DRW_PASS_CREATE(psl->sculpt_curves_selection_ps, state | pd->clipping_state);
+
+  GPUShader *sh = OVERLAY_shader_sculpt_curves_selection();
+  pd->sculpt_curves_selection_grp = DRW_shgroup_create(sh, psl->sculpt_curves_selection_ps);
+  DRWShadingGroup *grp = pd->sculpt_curves_selection_grp;
+
+  /* Reuse the same mask opacity from sculpt mode, since it wasn't worth it to add a different
+   * property yet. */
+  DRW_shgroup_uniform_float_copy(grp, "selection_opacity", pd->overlay.sculpt_mode_mask_opacity);
+}
+
+static bool everything_selected(const Curves &curves_id)
+{
+  if (!(curves_id.flag & CV_SCULPT_SELECTION_ENABLED)) {
+    /* When the selection is disabled, conceptually everything is selected. */
+    return true;
+  }
+  const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap(
+      curves_id.geometry);
+  blender::VArray<float> selection;
+  switch (curves_id.selection_domain) {
+    case ATTR_DOMAIN_POINT:
+      selection = curves.selection_point_float();
+      break;
+    case ATTR_DOMAIN_CURVE:
+      selection = curves.selection_curve_float();
+      break;
+  }
+  return selection.is_single() && selection.get_internal_single() == 1.0f;
+}
+
+void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object)
+{
+  OVERLAY_PrivateData *pd = vedata->stl->pd;
+  Curves *curves = static_cast<Curves *>(object->data);
+
+  /* As an optimization, return early if everything is selected. */
+  if (everything_selected(*curves)) {
+    return;
+  }
+
+  /* Retrieve the location of the texture. */
+  const char *name = curves->selection_domain == ATTR_DOMAIN_POINT ? ".selection_point_float" :
+                                                                     ".selection_curve_float";
+
+  bool is_point_domain;
+  GPUTexture **texture = DRW_curves_texture_for_evaluated_attribute(
+      curves, name, &is_point_domain);
+  if (texture == nullptr) {
+    return;
+  }
+
+  /* Evaluate curves and their attributes if necessary. */
+  DRWShadingGroup *grp = DRW_shgroup_curves_create_sub(
+      object, pd->sculpt_curves_selection_grp, nullptr);
+  if (*texture == nullptr) {
+    return;
+  }
+
+  DRW_shgroup_uniform_bool_copy(grp, "is_point_domain", is_point_domain);
+  DRW_shgroup_uniform_texture(grp, "selection_tx", *texture);
+}
+
+void OVERLAY_sculpt_curves_draw(OVERLAY_Data *vedata)
+{
+  OVERLAY_PassList *psl = vedata->psl;
+  OVERLAY_PrivateData *pd = vedata->stl->pd;
+  OVERLAY_FramebufferList *fbl = vedata->fbl;
+
+  if (DRW_state_is_fbo()) {
+    GPU_framebuffer_bind(pd->painting.in_front ? fbl->overlay_in_front_fb :
+                                                 fbl->overlay_default_fb);
+  }
+
+  DRW_draw_pass(psl->sculpt_curves_selection_ps);
+}
diff --git a/source/blender/draw/engines/overlay/overlay_shader.c b/source/blender/draw/engines/overlay/overlay_shader.c
index 48146fbddfb..2373363ab9d 100644
--- a/source/blender/draw/engines/overlay/overlay_shader.c
+++ b/source/blender/draw/engines/overlay/overlay_shader.c
@@ -76,6 +76,7 @@ typedef struct OVERLAY_Shaders {
   GPUShader *motion_path_line;
   GPUShader *motion_path_vert;
   GPUShader *outline_prepass;
+  GPUShader *outline_prepass_curves;
   GPUShader *outline_prepass_gpencil;
   GPUShader *outline_prepass_pointcloud;
   GPUShader *outline_prepass_wire;
@@ -90,6 +91,7 @@ typedef struct OVERLAY_Shaders {
   GPUShader *particle_shape;
   GPUShader *pointcloud_dot;
   GPUShader *sculpt_mask;
+  GPUShader *sculpt_curves_selection;
   GPUShader *uniform_color;
   GPUShader *volume_velocity_needle_sh;
   GPUShader *volume_velocity_mac_sh;
@@ -650,6 +652,18 @@ GPUShader *OVERLAY_shader_outline_prepass(bool use_wire)
   return use_wire ? sh_data->outline_prepass_wire : sh_data->outline_prepass;
 }
 
+GPUShader *OVERLAY_shader_outline_prepass_curves()
+{
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  OVERLAY_Shaders *sh_data = &e_data.sh_data[draw_ctx->sh_cfg];
+  if (!sh_data->outline_prepass_curves) {
+    sh_data->outline_prepass_curves = GPU_shader_create_from_info_name(
+        draw_ctx->sh_cfg ? "overlay_outline_prepass_curves_clipped" :
+                           "overlay_outline_prepass_curves");
+  }
+  return sh_data->outline_prepass_curves;
+}
+
 GPUShader *OVERLAY_shader_outline_prepass_gpencil(void)
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
@@ -792,6 +806,18 @@ GPUShader *OVERLAY_shader_sculpt_mask(void)
   return sh_data->sculpt_mask;
 }
 
+GPUShader *OVERLAY_shader_sculpt_curves_selection(void)
+{
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  OVERLAY_Shaders *sh_data = &e_data.sh_data[draw_ctx->sh_cfg];
+  if (!sh_data->sculpt_curves_selection) {
+    sh_data->sculpt_curves_selection = GPU_shader_create_from_info_name(
+        draw_ctx->sh_cfg == GPU_SHADER_CFG_CLIPPED ? "overlay_sculpt_curves_selection_clipped" :
+                                                     "overlay_sculpt_curves_selection");
+  }
+  return sh_data->sculpt_curves_selection;
+}
+
 struct GPUShader *OVERLAY_shader_uniform_color(void)
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh
index 58f96110887..9396a6d3f2f 100644
--- a/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh
@@ -293,7 +293,6 @@ GPU_SHADER_CREATE_INFO(overlay_edit_uv_stretching_area)
     .do_static_compilation(true)
     .vertex_in(1, Type::FLOAT, "ratio")
     .push_constant(Type::FLOAT, "totalAreaRatio")
-    .push_constant(Type::FLOAT, "totalAreaRatioInv")
     .additional_info("overlay_edit_uv_stretching");
 
 GPU_SHADER_CREATE_INFO(overlay_edit_uv_stretching_angle)
diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh
index 6f6a9c1622d..288fb3b3cbd 100644
--- a/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh
@@ -29,6 +29,16 @@ GPU_SHADER_CREATE_INFO(overlay_outline_prepass_mesh_clipped)
 
 GPU_SHADER_INTERFACE_INFO(overlay_outline_prepass_wire_iface, "vert").flat(Type::VEC3, "pos");
 
+GPU_SHADER_CREATE_INFO(overlay_outline_prepass_curves)
+    .do_static_compilation(true)
+    .vertex_source("overlay_outline_prepass_curves_vert.glsl")
+    .additional_info("draw_hair", "overlay_outline_prepass")
+    .additional_info("draw_object_infos");
+
+GPU_SHADER_CREATE_INFO(overlay_outline_prepass_curves_clipped)
+    .do_static_compilation(true)
+    .additional_info("overlay_outline_prepass_curves", "drw_clipped");
+
 GPU_SHADER_CREATE_INFO(overlay_outline_prepass_wire)
     .do_static_compilation(true)
     .define("USE_GEOM")
diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh
new file mode 100644
index 00000000000..46e3943b293
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_INTERFACE_INFO(overlay_sculpt_curves_selection_iface, "")
+    .smooth(Type::FLOAT, "mask_weight");
+
+GPU_SHADER_CREATE_INFO(overlay_sculpt_curves_selection)
+    .do_static_compilation(true)
+    .push_constant(Type::BOOL, "is_point_domain")
+    .push_constant(Type::FLOAT, "selection_opacity")
+    .sampler(0, ImageType::FLOAT_BUFFER, "selection_tx")
+    .vertex_out(overlay_sculpt_curves_selection_iface)
+    .vertex_source("overlay_sculpt_curves_selection_vert.glsl")
+    .fragment_source("overlay_sculpt_curves_selection_frag.glsl")
+    .fragment_out(0, Type::VEC4, "out_color")
+    .additional_info("draw_hair", "draw_globals");
+
+GPU_SHADER_CREATE_INFO(overlay_sculpt_curves_selection_clipped)
+    .do_static_compilation(true)
+    .additional_info("overlay_sculpt_curves_selection", "drw_clipped");
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl
index f28a809fdab..606292bbe83 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl
@@ -96,7 +96,7 @@ void main()
   float dist_raw = texelFetch(lineTex, center_texel, 0).b;
   float dist = decode_line_dist(dist_raw);
 
-  /* TODO: Opti: use textureGather. */
+  /* TODO: Optimization: use textureGather. */
   vec4 neightbor_col0 = texelFetchOffset(colorTex, center_texel, 0, ivec2(1, 0));
   vec4 neightbor_col1 = texelFetchOffset(colorTex, center_texel, 0, ivec2(-1, 0));
   vec4 neightbor_col2 = texelFetchOffset(colorTex, center_texel, 0, ivec2(0, 1));
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl
index 0a8e279e9b0..ca5a6aff2ca 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl
@@ -17,8 +17,8 @@ vec2 compute_dir(vec2 v0, vec2 v1, vec2 v2)
 
 mat3 compute_mat(vec4 sphere, vec3 bone_vec, out float z_ofs)
 {
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
-  vec3 cam_ray = (is_persp) ? sphere.xyz - ViewMatrixInverse[3].xyz : -ViewMatrixInverse[2].xyz;
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
+  vec3 cam_ray = (is_persp) ? sphere.xyz - drw_view.viewinv[3].xyz : -drw_view.viewinv[2].xyz;
 
   /* Sphere center distance from the camera (persp) in world space. */
   float cam_dist = length(cam_ray);
@@ -88,13 +88,13 @@ vec3 get_outline_point(vec2 pos,
 
 void main()
 {
-  float dst_head = distance(headSphere.xyz, ViewMatrixInverse[3].xyz);
-  float dst_tail = distance(tailSphere.xyz, ViewMatrixInverse[3].xyz);
-  // float dst_head = -dot(headSphere.xyz, ViewMatrix[2].xyz);
-  // float dst_tail = -dot(tailSphere.xyz, ViewMatrix[2].xyz);
+  float dst_head = distance(headSphere.xyz, drw_view.viewinv[3].xyz);
+  float dst_tail = distance(tailSphere.xyz, drw_view.viewinv[3].xyz);
+  // float dst_head = -dot(headSphere.xyz, drw_view.viewmat[2].xyz);
+  // float dst_tail = -dot(tailSphere.xyz, drw_view.viewmat[2].xyz);
 
   vec4 sph_near, sph_far;
-  if ((dst_head > dst_tail) && (ProjectionMatrix[3][3] == 0.0)) {
+  if ((dst_head > dst_tail) && (drw_view.winmat[3][3] == 0.0)) {
     sph_near = tailSphere;
     sph_far = headSphere;
   }
@@ -130,7 +130,7 @@ void main()
   gl_Position = p1;
 
   /* compute position from 3 vertex because the change in direction
-   * can happen very quicky and lead to very thin edges. */
+   * can happen very quickly and lead to very thin edges. */
   vec2 ss0 = proj(p0);
   vec2 ss1 = proj(p1);
   vec2 ss2 = proj(p2);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl
index 2dd86a57dfd..4d21ffd96b5 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl
@@ -30,7 +30,7 @@ void main()
   sp = bone_mat * sp.xzy + headSphere.xyz;
   nor = bone_mat * nor.xzy;
 
-  normalView = mat3(ViewMatrix) * nor;
+  normalView = mat3(drw_view.viewmat) * nor;
 
   finalStateColor = stateColor;
   finalBoneColor = boneColor;
@@ -38,5 +38,5 @@ void main()
   view_clipping_distances(sp);
 
   vec4 pos_4d = vec4(sp, 1.0);
-  gl_Position = ViewProjectionMatrix * pos_4d;
+  gl_Position = drw_view.persmat * pos_4d;
 }
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl
index 47c5dada708..b485b0a7807 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl
@@ -5,7 +5,7 @@ void main(void)
 {
   finalColor = vec4(geom_in[0].vColSize.rgb, 1.0);
 
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
 
   vec3 view_vec = (is_persp) ? normalize(geom_in[1].vPos) : vec3(0.0, 0.0, -1.0);
   vec3 v10 = geom_in[0].vPos - geom_in[1].vPos;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl
index 29319b3f7ac..91eb6265192 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl
@@ -14,10 +14,10 @@ void main()
   mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color);
 
   vec4 world_pos = model_mat * vec4(pos, 1.0);
-  vec4 view_pos = ViewMatrix * world_pos;
+  vec4 view_pos = drw_view.viewmat * world_pos;
 
   geom_in.vPos = view_pos.xyz;
-  geom_in.pPos = ProjectionMatrix * view_pos;
+  geom_in.pPos = drw_view.winmat * view_pos;
 
   geom_in.inverted = int(dot(cross(model_mat[0].xyz, model_mat[1].xyz), model_mat[2].xyz) < 0.0);
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl
index cdbe8c3d7df..68f7e75673f 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl
@@ -25,7 +25,7 @@ void main()
   finalColor.a = 1.0;
 
   vec4 world_pos = model_mat * vec4(pos, 1.0);
-  gl_Position = ViewProjectionMatrix * world_pos;
+  gl_Position = drw_view.persmat * world_pos;
 
   view_clipping_distances(world_pos.xyz);
 }
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl
index 31369e0c3df..4d79fab718f 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl
@@ -13,10 +13,10 @@ void main()
   vec4 bone_color, state_color;
   mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color);
 
-  mat4 model_view_matrix = ViewMatrix * model_mat;
+  mat4 model_view_matrix = drw_view.viewmat * model_mat;
   mat4 sphereMatrix = inverse(model_view_matrix);
 
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
 
   /* This is the local space camera ray (not normalize).
    * In perspective mode it's also the viewspace position
@@ -58,8 +58,8 @@ void main()
   vec3 cam_pos0 = x_axis * pos.x + y_axis * pos.y + z_axis * z_ofs;
 
   vec4 V = model_view_matrix * vec4(cam_pos0, 1.0);
-  gl_Position = ProjectionMatrix * V;
-  vec4 center = ProjectionMatrix * vec4(model_view_matrix[3].xyz, 1.0);
+  gl_Position = drw_view.winmat * V;
+  vec4 center = drw_view.winmat * vec4(model_view_matrix[3].xyz, 1.0);
 
   /* Offset away from the center to avoid overlap with solid shape. */
   vec2 ofs_dir = normalize(proj(gl_Position) - proj(center));
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl
index e60b6e94492..150701b78df 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl
@@ -5,7 +5,7 @@ void main()
 {
   const float sphere_radius = 0.05;
 
-  bool is_perp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_perp = (drw_view.winmat[3][3] == 0.0);
   vec3 ray_ori_view = (is_perp) ? vec3(0.0) : viewPosition.xyz;
   vec3 ray_dir_view = (is_perp) ? viewPosition : vec3(0.0, 0.0, -1.0);
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl
index abbaad8cd10..3d2dfc018bb 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl
@@ -10,10 +10,10 @@ void main()
   vec4 bone_color, state_color;
   mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color);
 
-  mat4 model_view_matrix = ViewMatrix * model_mat;
+  mat4 model_view_matrix = drw_view.viewmat * model_mat;
   sphereMatrix = inverse(model_view_matrix);
 
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
 
   /* This is the local space camera ray (not normalize).
    * In perspective mode it's also the viewspace position
@@ -65,7 +65,7 @@ void main()
 
   vec4 pos_4d = vec4(cam_pos, 1.0);
   vec4 V = model_view_matrix * pos_4d;
-  gl_Position = ProjectionMatrix * V;
+  gl_Position = drw_view.winmat * V;
   viewPosition = V.xyz;
 
   finalStateColor = state_color.xyz;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl
index b5edcd2858b..e7917a46312 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl
@@ -31,12 +31,12 @@ void main()
 
   vec4 boneStart_4d = vec4(boneStart, 1.0);
   vec4 boneEnd_4d = vec4(boneEnd, 1.0);
-  vec4 v0 = ViewMatrix * boneStart_4d;
-  vec4 v1 = ViewMatrix * boneEnd_4d;
+  vec4 v0 = drw_view.viewmat * boneStart_4d;
+  vec4 v1 = drw_view.viewmat * boneEnd_4d;
 
   /* Clip the bone to the camera origin plane (not the clip plane)
    * to avoid glitches if one end is behind the camera origin (in persp). */
-  float clip_dist = (ProjectionMatrix[3][3] == 0.0) ?
+  float clip_dist = (drw_view.winmat[3][3] == 0.0) ?
                         -1e-7 :
                         1e20; /* hardcoded, -1e-8 is giving gliches. */
   vec3 bvec = v1.xyz - v0.xyz;
@@ -48,8 +48,8 @@ void main()
     v1.xyz = clip_pt;
   }
 
-  vec4 p0 = ProjectionMatrix * v0;
-  vec4 p1 = ProjectionMatrix * v1;
+  vec4 p0 = drw_view.winmat * v0;
+  vec4 p1 = drw_view.winmat * v1;
 
   float h = (is_head) ? p0.w : p1.w;
 
@@ -58,7 +58,7 @@ void main()
 
   /* 2D screen aligned pos at the point */
   vec2 vpos = pos.x * x_screen_vec + pos.y * y_screen_vec;
-  vpos *= (ProjectionMatrix[3][3] == 0.0) ? h : 1.0;
+  vpos *= (drw_view.winmat[3][3] == 0.0) ? h : 1.0;
   vpos *= (do_wire) ? 1.0 : 0.5;
 
   if (finalInnerColor.a > 0.0) {
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl
index 6ff8d0665d1..dc5c43f417e 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl
@@ -45,7 +45,7 @@ void main()
 
   if (gl_VertexID == 0) {
     if (isConstantScreenSizeNormals) {
-      bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+      bool is_persp = (drw_view.winmat[3][3] == 0.0);
       if (is_persp) {
         float dist_fac = length(cameraPos - world_pos);
         float cos_fac = dot(cameraForward, cameraVec(world_pos));
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl
index f1fbdac7847..76a944c6987 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl
@@ -9,7 +9,7 @@ void main()
   vec3 up = normalize(imat * screenVecs[1].xyz);
   vec3 screen_pos = (right * pos.x + up * pos.z) * size;
   vec4 pos_4d = ModelMatrix * vec4(local_pos + screen_pos, 1.0);
-  gl_Position = ViewProjectionMatrix * pos_4d;
+  gl_Position = drw_view.persmat * pos_4d;
   /* Manual stipple: one segment out of 2 is transparent. */
   finalColor = ((gl_VertexID & 1) == 0) ? colorSkinRoot : vec4(0.0);
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl
index 374fb50af75..a50bc5e6e68 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl
@@ -74,7 +74,7 @@ void main()
   finalColor = EDIT_MESH_facedot_color(norAndFlag.w);
 
   /* Bias Facedot Z position in clipspace. */
-  gl_Position.z -= (ProjectionMatrix[3][3] == 0.0) ? 0.00035 : 1e-6;
+  gl_Position.z -= (drw_view.winmat[3][3] == 0.0) ? 0.00035 : 1e-6;
   gl_PointSize = sizeFaceDot;
 
   bool occluded = test_occlusion();
@@ -87,7 +87,7 @@ void main()
   /* Facing based color blend */
   vec3 vpos = point_world_to_view(world_pos);
   vec3 view_normal = normalize(normal_object_to_view(vnor) + 1e-4);
-  vec3 view_vec = (ProjectionMatrix[3][3] == 0.0) ? normalize(vpos) : vec3(0.0, 0.0, 1.0);
+  vec3 view_vec = (drw_view.winmat[3][3] == 0.0) ? normalize(vpos) : vec3(0.0, 0.0, 1.0);
   float facing = dot(view_vec, view_normal);
   facing = 1.0 - abs(facing) * 0.2;
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl
index bb086e8d9f5..9a3036d5940 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl
@@ -55,9 +55,9 @@ float angle_normalized_v2v2(vec2 v1, vec2 v2)
   return (q) ? a : M_PI - a;
 }
 
-float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_tot_ratio)
+float area_ratio_to_stretch(float ratio, float tot_ratio)
 {
-  ratio *= (ratio > 0.0f) ? tot_ratio : -inv_tot_ratio;
+  ratio *= tot_ratio;
   return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
 }
 
@@ -74,7 +74,7 @@ void main()
   stretch = stretch;
   stretch = 1.0 - stretch * stretch;
 #else
-  float stretch = 1.0 - area_ratio_to_stretch(ratio, totalAreaRatio, totalAreaRatioInv);
+  float stretch = 1.0 - area_ratio_to_stretch(ratio, totalAreaRatio);
 
 #endif
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl
index b2578970c9b..acaf04219c0 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl
@@ -198,8 +198,8 @@ void main()
     vec3 edge = obmat[3].xyz - world_pos;
     vec3 n0 = normalize(cross(edge, p0 - world_pos));
     vec3 n1 = normalize(cross(edge, world_pos - p1));
-    bool persp = (ProjectionMatrix[3][3] == 0.0);
-    vec3 V = (persp) ? normalize(ViewMatrixInverse[3].xyz - world_pos) : ViewMatrixInverse[2].xyz;
+    bool persp = (drw_view.winmat[3][3] == 0.0);
+    vec3 V = (persp) ? normalize(drw_view.viewinv[3].xyz - world_pos) : drw_view.viewinv[2].xyz;
     /* Discard non-silhouette edges. */
     bool facing0 = dot(n0, V) > 0.0;
     bool facing1 = dot(n1, V) > 0.0;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl
index 25f4984f119..54a4231590e 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl
@@ -53,7 +53,7 @@ void main()
   P += cameraPos * plane_axes;
 
   float dist, fade;
-  bool is_persp = ProjectionMatrix[3][3] == 0.0;
+  bool is_persp = drw_view.winmat[3][3] == 0.0;
   if (is_persp) {
     vec3 V = cameraPos - P;
     dist = length(V);
@@ -83,7 +83,7 @@ void main()
     dist = 1.0; /* Avoid branch after. */
 
     if (flag_test(grid_flag, PLANE_XY)) {
-      float angle = 1.0 - abs(ViewMatrixInverse[2].z);
+      float angle = 1.0 - abs(drw_view.viewinv[2].z);
       dist = 1.0 + angle * 2.0;
       angle *= angle;
       fade *= 1.0 - angle * angle;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl
index b81f1a24358..b43b1eb4a52 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl
@@ -39,5 +39,5 @@ void main()
     local_pos.z = clamp(local_pos.z, -1.0, 0.0);
   }
 
-  gl_Position = ViewProjectionMatrix * vec4(real_pos, 1.0);
+  gl_Position = drw_view.persmat * vec4(real_pos, 1.0);
 }
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl
index 29346a44863..25e13e7c212 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl
@@ -15,7 +15,7 @@ void main(void)
   vec2 edge_dir = compute_dir(interp_in[0].ss_pos, interp_in[1].ss_pos) *
                   drw_view.viewport_size_inverse;
 
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
   float line_size = float(lineThickness) * sizePixel;
 
   view_clipping_distances_set(gl_in[0]);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl
index bc74a436f5e..e6281f75b8f 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl
@@ -18,7 +18,7 @@ vec2 proj(vec4 pos)
 
 void main()
 {
-  gl_Position = ViewProjectionMatrix * vec4(pos, 1.0);
+  gl_Position = drw_view.persmat * vec4(pos, 1.0);
 
   interp.ss_pos = proj(gl_Position);
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl
index 5027525b9b3..70892954cd8 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl
@@ -9,7 +9,7 @@
 
 void main()
 {
-  gl_Position = ViewProjectionMatrix * vec4(pos, 1.0);
+  gl_Position = drw_view.persmat * vec4(pos, 1.0);
   gl_PointSize = float(pointSize + 2);
 
   int frame = gl_VertexID + cacheStart;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl
new file mode 100644
index 00000000000..f9ec475d21f
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl
@@ -0,0 +1,81 @@
+
+#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+
+uint outline_colorid_get(void)
+{
+  int flag = int(abs(ObjectInfo.w));
+  bool is_active = (flag & DRW_BASE_ACTIVE) != 0;
+
+  if (isTransform) {
+    return 0u; /* colorTransform */
+  }
+  else if (is_active) {
+    return 3u; /* colorActive */
+  }
+  else {
+    return 1u; /* colorSelect */
+  }
+
+  return 0u;
+}
+
+/* Replace top 2 bits (of the 16bit output) by outlineId.
+ * This leaves 16K different IDs to create outlines between objects.
+  vec3 world_pos = point_object_to_world(pos);
+ * SHIFT = (32 - (16 - 2)) */
+#define SHIFT 18u
+
+void main()
+{
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
+  float time, thickness;
+  vec3 center_wpos, tan, binor;
+
+  hair_get_center_pos_tan_binor_time(is_persp,
+                                     ModelMatrixInverse,
+                                     drw_view.viewinv[3].xyz,
+                                     drw_view.viewinv[2].xyz,
+                                     center_wpos,
+                                     tan,
+                                     binor,
+                                     time,
+                                     thickness);
+  vec3 world_pos;
+  if (hairThicknessRes > 1) {
+    /* Calculate the thickness, thicktime, worldpos taken into account the outline. */
+    float outline_width = point_world_to_ndc(center_wpos).w * 1.25 *
+                          drw_view.viewport_size_inverse.y * drw_view.wininv[1][1];
+    thickness += outline_width;
+    float thick_time = float(gl_VertexID % hairThicknessRes) / float(hairThicknessRes - 1);
+    thick_time = thickness * (thick_time * 2.0 - 1.0);
+    /* Take object scale into account.
+     * NOTE: This only works fine with uniform scaling. */
+    float scale = 1.0 / length(mat3(ModelMatrixInverse) * binor);
+    world_pos = center_wpos + binor * thick_time * scale;
+  }
+  else {
+    world_pos = center_wpos;
+  }
+
+  gl_Position = point_world_to_ndc(world_pos);
+
+#ifdef USE_GEOM
+  vert.pos = point_world_to_view(world_pos);
+#endif
+
+  /* Small bias to always be on top of the geom. */
+  gl_Position.z -= 1e-3;
+
+  /* ID 0 is nothing (background) */
+  interp.ob_id = uint(resource_handle + 1);
+
+  /* Should be 2 bits only [0..3]. */
+  uint outline_id = outline_colorid_get();
+
+  /* Combine for 16bit uint target. */
+  interp.ob_id = (outline_id << 14u) | ((interp.ob_id << SHIFT) >> SHIFT);
+
+  view_clipping_distances(world_pos);
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl
index 8a196620af9..5e0074e9f0b 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl
@@ -11,7 +11,7 @@ void vert_from_gl_in(int v)
 
 void main()
 {
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
 
   vec3 view_vec = (is_persp) ? normalize(vert[1].pos) : vec3(0.0, 0.0, -1.0);
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
index b6d5cd96c12..92be9ec3bcb 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
@@ -22,14 +22,14 @@ void main()
 
   if (!gpStrokeOrder3d) {
     /* Stroke order 2D. Project to gpDepthPlane. */
-    bool is_persp = ProjectionMatrix[3][3] == 0.0;
+    bool is_persp = drw_view.winmat[3][3] == 0.0;
     vec2 uvs = vec2(gl_FragCoord.xy) * drw_view.viewport_size_inverse;
     vec3 pos_ndc = vec3(uvs, gl_FragCoord.z) * 2.0 - 1.0;
-    vec4 pos_world = ViewProjectionMatrixInverse * vec4(pos_ndc, 1.0);
+    vec4 pos_world = drw_view.persinv * vec4(pos_ndc, 1.0);
     vec3 pos = pos_world.xyz / pos_world.w;
 
     vec3 ray_ori = pos;
-    vec3 ray_dir = (is_persp) ? (ViewMatrixInverse[3].xyz - pos) : ViewMatrixInverse[2].xyz;
+    vec3 ray_dir = (is_persp) ? (drw_view.viewinv[3].xyz - pos) : drw_view.viewinv[2].xyz;
     vec3 isect = ray_plane_intersection(ray_ori, ray_dir, gpDepthPlane);
     vec4 ndc = point_world_to_ndc(isect);
     gl_FragDepth = (ndc.z / ndc.w) * 0.5 + 0.5;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl
index fb981a8167a..c48e7cce550 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl
@@ -23,7 +23,7 @@ void main()
 #ifdef USE_DOTS
   gl_Position = point_world_to_ndc(world_pos);
   /* World sized points. */
-  gl_PointSize = sizePixel * draw_size * ProjectionMatrix[1][1] * sizeViewport.y / gl_Position.w;
+  gl_PointSize = sizePixel * draw_size * drw_view.winmat[1][1] * sizeViewport.y / gl_Position.w;
 #else
 
   if ((vclass & VCLASS_SCREENALIGNED) != 0) {
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl
new file mode 100644
index 00000000000..7af6bdb9fdb
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl
@@ -0,0 +1,5 @@
+
+void main()
+{
+  out_color = vec4(vec3(0.0), 1.0 - mask_weight);
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl
new file mode 100644
index 00000000000..7be3c8e6dfb
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl
@@ -0,0 +1,34 @@
+#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+
+float retrieve_selection()
+{
+  if (is_point_domain) {
+    return texelFetch(selection_tx, hair_get_base_id()).r;
+  }
+  return texelFetch(selection_tx, hair_get_strand_id()).r;
+}
+
+void main()
+{
+  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  float time, thick_time, thickness;
+  vec3 world_pos, tan, binor;
+  hair_get_pos_tan_binor_time(is_persp,
+                              ModelMatrixInverse,
+                              ViewMatrixInverse[3].xyz,
+                              ViewMatrixInverse[2].xyz,
+                              world_pos,
+                              tan,
+                              binor,
+                              time,
+                              thickness,
+                              thick_time);
+
+  gl_Position = point_world_to_ndc(world_pos);
+
+  mask_weight = 1.0 - (selection_opacity - retrieve_selection() * selection_opacity);
+
+  view_clipping_distances(world_pos);
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl
index 0a498471b46..e1a4a3602e3 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl
@@ -1,4 +1,4 @@
 void main()
 {
   fragColor = color;
-}
-\ No newline at end of file
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl
index 41bd7791dd7..d189ab1b72c 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl
@@ -72,7 +72,7 @@ void wire_object_color_get(out vec3 rim_col, out vec3 wire_col)
 void main()
 {
   bool no_attr = all(equal(nor, vec3(0)));
-  vec3 wnor = no_attr ? ViewMatrixInverse[2].xyz : normalize(normal_object_to_world(nor));
+  vec3 wnor = no_attr ? drw_view.viewinv[2].xyz : normalize(normal_object_to_world(nor));
   vec3 wpos = point_object_to_world(pos);
 
   if (isHair) {
@@ -81,8 +81,8 @@ void main()
     wnor = -normalize(mat3(obmat) * nor);
   }
 
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
-  vec3 V = (is_persp) ? normalize(ViewMatrixInverse[3].xyz - wpos) : ViewMatrixInverse[2].xyz;
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
+  vec3 V = (is_persp) ? normalize(drw_view.viewinv[3].xyz - wpos) : drw_view.viewinv[2].xyz;
 
   float facing = dot(wnor, V);
 
diff --git a/source/blender/draw/engines/select/select_engine.c b/source/blender/draw/engines/select/select_engine.c
index 88ae5ac707e..026a1f52ac1 100644
--- a/source/blender/draw/engines/select/select_engine.c
+++ b/source/blender/draw/engines/select/select_engine.c
@@ -201,7 +201,7 @@ static void select_cache_populate(void *vedata, Object *ob)
 
   if (!e_data.context.is_dirty && sel_data && sel_data->is_drawn) {
     /* The object indices have already been drawn. Fill depth pass.
-     * Opti: Most of the time this depth pass is not used. */
+     * Optimization: Most of the time this depth pass is not used. */
     struct Mesh *me = ob->data;
     if (e_data.context.select_mode & SCE_SELECT_FACE) {
       struct GPUBatch *geom_faces = DRW_mesh_batch_cache_get_triangles_with_select_id(me);
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl b/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl
index 880f17b0c9d..e7ca868a4ff 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl
@@ -31,9 +31,9 @@ void cavity_compute(vec2 screenco,
   /* find the offset in screen space by multiplying a point
    * in camera space at the depth of the point by the projection matrix. */
   vec2 offset;
-  float homcoord = ProjectionMatrix[2][3] * position.z + ProjectionMatrix[3][3];
-  offset.x = ProjectionMatrix[0][0] * world_data.cavity_distance / homcoord;
-  offset.y = ProjectionMatrix[1][1] * world_data.cavity_distance / homcoord;
+  float homcoord = drw_view.winmat[2][3] * position.z + drw_view.winmat[3][3];
+  offset.x = drw_view.winmat[0][0] * world_data.cavity_distance / homcoord;
+  offset.y = drw_view.winmat[1][1] * world_data.cavity_distance / homcoord;
   /* convert from -1.0...1.0 range to 0.0..1.0 for easy use with texture coordinates */
   offset *= 0.5;
 
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl
index d8f8a1cc03f..11d7c85d43a 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl
@@ -16,7 +16,7 @@
   (dof_aperturesize * (dof_distance / zdepth - 1.0) * dof_invsensorsize)
 
 #define linear_depth(z) \
-  ((ProjectionMatrix[3][3] == 0.0) ? \
+  ((drw_view.winmat[3][3] == 0.0) ? \
        (nearFar.x * nearFar.y) / (z * (nearFar.x - nearFar.y) + nearFar.y) : \
        (z * 2.0 - 1.0) * nearFar.y)
 
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
index cfc94ef7c9a..04fef8d8b32 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
@@ -43,13 +43,13 @@ void workbench_hair_random_material(float rand,
 
 void main()
 {
-  bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+  bool is_persp = (drw_view.winmat[3][3] == 0.0);
   float time, thick_time, thickness;
   vec3 world_pos, tan, binor;
   hair_get_pos_tan_binor_time(is_persp,
                               ModelMatrixInverse,
-                              ViewMatrixInverse[3].xyz,
-                              ViewMatrixInverse[2].xyz,
+                              drw_view.viewinv[3].xyz,
+                              drw_view.viewinv[2].xyz,
                               world_pos,
                               tan,
                               binor,
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl
index d8f1b83d747..213279b1913 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl
@@ -24,7 +24,7 @@ float linear_zdepth(float depth, vec4 viewvecs[2], mat4 proj_mat)
  */
 float calculate_transparent_weight(void)
 {
-  float z = linear_zdepth(gl_FragCoord.z, ViewVecs, ProjectionMatrix);
+  float z = linear_zdepth(gl_FragCoord.z, drw_view.viewvecs, drw_view.winmat);
 #if 0
   /* Eq 10 : Good for surfaces with varying opacity (like particles) */
   float a = min(1.0, alpha * 10.0) + 0.01;
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
index 49e26cd3e0c..afba3a0d784 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
@@ -237,7 +237,7 @@ void main()
   fragColor = vec4(Lscat, Tr);
 #else
   vec2 screen_uv = gl_FragCoord.xy / vec2(textureSize(depthBuffer, 0).xy);
-  bool is_persp = ProjectionMatrix[3][3] == 0.0;
+  bool is_persp = drw_view.winmat[3][3] == 0.0;
 
   vec3 volume_center = ModelMatrix[3].xyz;
 
diff --git a/source/blender/draw/engines/workbench/workbench_engine.c b/source/blender/draw/engines/workbench/workbench_engine.c
index 9eb35c25bf4..a0459a967f3 100644
--- a/source/blender/draw/engines/workbench/workbench_engine.c
+++ b/source/blender/draw/engines/workbench/workbench_engine.c
@@ -409,7 +409,7 @@ void workbench_cache_populate(void *ved, Object *ob)
     return;
   }
 
-  if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL, OB_POINTCLOUD)) {
+  if (ELEM(ob->type, OB_MESH, OB_POINTCLOUD)) {
     bool use_sculpt_pbvh, use_texpaint_mode, draw_shadow, has_transp_mat = false;
     eV3DShadingColorType color_type = workbench_color_type_get(
         wpd, ob, &use_sculpt_pbvh, &use_texpaint_mode, &draw_shadow);
diff --git a/source/blender/draw/engines/workbench/workbench_render.c b/source/blender/draw/engines/workbench/workbench_render.c
index e5dcf6c5624..931f6a2dc92 100644
--- a/source/blender/draw/engines/workbench/workbench_render.c
+++ b/source/blender/draw/engines/workbench/workbench_render.c
@@ -17,6 +17,7 @@
 
 #include "ED_view3d.h"
 
+#include "GPU_context.h"
 #include "GPU_shader.h"
 
 #include "DEG_depsgraph.h"
@@ -188,6 +189,10 @@ void workbench_render(void *ved, RenderEngine *engine, RenderLayer *render_layer
 
   workbench_draw_finish(data);
 
+  /* Perform render step between samples to allow
+   * flushing of freed GPUBackend resources. */
+  GPU_render_step();
+
   /* Write render output. */
   const char *viewname = RE_GetActiveRenderView(engine->re);
   RenderPass *rp = RE_pass_find_by_name(render_layer, RE_PASSNAME_COMBINED, viewname);
diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh
index 257f01a5562..890cd588527 100644
--- a/source/blender/draw/intern/DRW_gpu_wrapper.hh
+++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh
@@ -50,13 +50,13 @@
  *
  * `draw::Framebuffer`
  *   Simple wrapper to #GPUFramebuffer that can be moved.
- *
  */
 
 #include "DRW_render.h"
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_manager.h"
 #include "draw_texture_pool.h"
 
 #include "BLI_math_vec_types.hh"
@@ -182,7 +182,7 @@ class UniformCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
     GPU_uniformbuf_free(ubo_);
   }
 
-  void push_update(void)
+  void push_update()
   {
     GPU_uniformbuf_update(ubo_, this->data_);
   }
@@ -227,12 +227,22 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
     GPU_storagebuf_free(ssbo_);
   }
 
-  void push_update(void)
+  void push_update()
   {
     BLI_assert(device_only == false);
     GPU_storagebuf_update(ssbo_, this->data_);
   }
 
+  void clear_to_zero()
+  {
+    GPU_storagebuf_clear_to_zero(ssbo_);
+  }
+
+  void read()
+  {
+    GPU_storagebuf_read(ssbo_, this->data_);
+  }
+
   operator GPUStorageBuf *() const
   {
     return ssbo_;
@@ -319,6 +329,7 @@ class StorageArrayBuffer : public detail::StorageCommon<T, len, device_only> {
     MEM_freeN(this->data_);
   }
 
+  /* Resize to \a new_size elements. */
   void resize(int64_t new_size)
   {
     BLI_assert(new_size > 0);
@@ -392,10 +403,10 @@ class Texture : NonCopyable {
           int extent,
           float *data = nullptr,
           bool cubemap = false,
-          int mips = 1)
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(extent, 0, 0, mips, format, data, false, cubemap);
+    tx_ = create(extent, 0, 0, mip_len, format, data, false, cubemap);
   }
 
   Texture(const char *name,
@@ -404,17 +415,20 @@ class Texture : NonCopyable {
           int layers,
           float *data = nullptr,
           bool cubemap = false,
-          int mips = 1)
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(extent, layers, 0, mips, format, data, true, cubemap);
+    tx_ = create(extent, layers, 0, mip_len, format, data, true, cubemap);
   }
 
-  Texture(
-      const char *name, eGPUTextureFormat format, int2 extent, float *data = nullptr, int mips = 1)
+  Texture(const char *name,
+          eGPUTextureFormat format,
+          int2 extent,
+          float *data = nullptr,
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(UNPACK2(extent), 0, mips, format, data, false, false);
+    tx_ = create(UNPACK2(extent), 0, mip_len, format, data, false, false);
   }
 
   Texture(const char *name,
@@ -422,17 +436,20 @@ class Texture : NonCopyable {
           int2 extent,
           int layers,
           float *data = nullptr,
-          int mips = 1)
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(UNPACK2(extent), layers, mips, format, data, true, false);
+    tx_ = create(UNPACK2(extent), layers, mip_len, format, data, true, false);
   }
 
-  Texture(
-      const char *name, eGPUTextureFormat format, int3 extent, float *data = nullptr, int mips = 1)
+  Texture(const char *name,
+          eGPUTextureFormat format,
+          int3 extent,
+          float *data = nullptr,
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(UNPACK3(extent), mips, format, data, false, false);
+    tx_ = create(UNPACK3(extent), mip_len, format, data, false, false);
   }
 
   ~Texture()
@@ -467,9 +484,9 @@ class Texture : NonCopyable {
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_1d(eGPUTextureFormat format, int extent, float *data = nullptr, int mips = 1)
+  bool ensure_1d(eGPUTextureFormat format, int extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, 0, 0, mips, format, data, false, false);
+    return ensure_impl(extent, 0, 0, mip_len, format, data, false, false);
   }
 
   /**
@@ -477,18 +494,18 @@ class Texture : NonCopyable {
    * Return true if a texture has been created.
    */
   bool ensure_1d_array(
-      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mips = 1)
+      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, layers, 0, mips, format, data, true, false);
+    return ensure_impl(extent, layers, 0, mip_len, format, data, true, false);
   }
 
   /**
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_2d(eGPUTextureFormat format, int2 extent, float *data = nullptr, int mips = 1)
+  bool ensure_2d(eGPUTextureFormat format, int2 extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(UNPACK2(extent), 0, mips, format, data, false, false);
+    return ensure_impl(UNPACK2(extent), 0, mip_len, format, data, false, false);
   }
 
   /**
@@ -496,27 +513,27 @@ class Texture : NonCopyable {
    * Return true if a texture has been created.
    */
   bool ensure_2d_array(
-      eGPUTextureFormat format, int2 extent, int layers, float *data = nullptr, int mips = 1)
+      eGPUTextureFormat format, int2 extent, int layers, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(UNPACK2(extent), layers, mips, format, data, true, false);
+    return ensure_impl(UNPACK2(extent), layers, mip_len, format, data, true, false);
   }
 
   /**
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_3d(eGPUTextureFormat format, int3 extent, float *data = nullptr, int mips = 1)
+  bool ensure_3d(eGPUTextureFormat format, int3 extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(UNPACK3(extent), mips, format, data, false, false);
+    return ensure_impl(UNPACK3(extent), mip_len, format, data, false, false);
   }
 
   /**
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_cube(eGPUTextureFormat format, int extent, float *data = nullptr, int mips = 1)
+  bool ensure_cube(eGPUTextureFormat format, int extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, extent, 0, mips, format, data, false, true);
+    return ensure_impl(extent, extent, 0, mip_len, format, data, false, true);
   }
 
   /**
@@ -524,9 +541,9 @@ class Texture : NonCopyable {
    * Return true if a texture has been created.
    */
   bool ensure_cube_array(
-      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mips = 1)
+      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, extent, layers, mips, format, data, false, true);
+    return ensure_impl(extent, extent, layers, mip_len, format, data, false, true);
   }
 
   /**
@@ -555,9 +572,15 @@ class Texture : NonCopyable {
     return mip_views_[miplvl];
   }
 
+  int mip_count() const
+  {
+    return GPU_texture_mip_count(tx_);
+  }
+
   /**
    * Ensure the availability of mipmap views.
    * Layer views covers all layers of array textures.
+   * Returns true if the views were (re)created.
    */
   bool ensure_layer_views(bool cube_as_array = false)
   {
@@ -594,42 +617,47 @@ class Texture : NonCopyable {
   /**
    * Returns true if the texture has been allocated or acquired from the pool.
    */
-  bool is_valid(void) const
+  bool is_valid() const
   {
     return tx_ != nullptr;
   }
 
-  int width(void) const
+  int width() const
   {
     return GPU_texture_width(tx_);
   }
 
-  int height(void) const
+  int height() const
   {
     return GPU_texture_height(tx_);
   }
 
-  bool depth(void) const
+  int pixel_count() const
+  {
+    return GPU_texture_width(tx_) * GPU_texture_height(tx_);
+  }
+
+  bool depth() const
   {
     return GPU_texture_depth(tx_);
   }
 
-  bool is_stencil(void) const
+  bool is_stencil() const
   {
     return GPU_texture_stencil(tx_);
   }
 
-  bool is_integer(void) const
+  bool is_integer() const
   {
     return GPU_texture_integer(tx_);
   }
 
-  bool is_cube(void) const
+  bool is_cube() const
   {
     return GPU_texture_cube(tx_);
   }
 
-  bool is_array(void) const
+  bool is_array() const
   {
     return GPU_texture_array(tx_);
   }
@@ -708,7 +736,7 @@ class Texture : NonCopyable {
   bool ensure_impl(int w,
                    int h = 0,
                    int d = 0,
-                   int mips = 1,
+                   int mip_len = 1,
                    eGPUTextureFormat format = GPU_RGBA8,
                    float *data = nullptr,
                    bool layered = false,
@@ -721,11 +749,11 @@ class Texture : NonCopyable {
       int3 size = this->size();
       if (size != int3(w, h, d) || GPU_texture_format(tx_) != format ||
           GPU_texture_cube(tx_) != cubemap || GPU_texture_array(tx_) != layered) {
-        GPU_TEXTURE_FREE_SAFE(tx_);
+        free();
       }
     }
     if (tx_ == nullptr) {
-      tx_ = create(w, h, d, mips, format, data, layered, cubemap);
+      tx_ = create(w, h, d, mip_len, format, data, layered, cubemap);
       return true;
     }
     return false;
@@ -734,87 +762,82 @@ class Texture : NonCopyable {
   GPUTexture *create(int w,
                      int h,
                      int d,
-                     int mips,
+                     int mip_len,
                      eGPUTextureFormat format,
                      float *data,
                      bool layered,
                      bool cubemap)
   {
     if (h == 0) {
-      return GPU_texture_create_1d(name_, w, mips, format, data);
+      return GPU_texture_create_1d(name_, w, mip_len, format, data);
     }
     else if (cubemap) {
       if (layered) {
-        return GPU_texture_create_cube_array(name_, w, d, mips, format, data);
+        return GPU_texture_create_cube_array(name_, w, d, mip_len, format, data);
       }
       else {
-        return GPU_texture_create_cube(name_, w, mips, format, data);
+        return GPU_texture_create_cube(name_, w, mip_len, format, data);
       }
     }
     else if (d == 0) {
       if (layered) {
-        return GPU_texture_create_1d_array(name_, w, h, mips, format, data);
+        return GPU_texture_create_1d_array(name_, w, h, mip_len, format, data);
       }
       else {
-        return GPU_texture_create_2d(name_, w, h, mips, format, data);
+        return GPU_texture_create_2d(name_, w, h, mip_len, format, data);
       }
     }
     else {
       if (layered) {
-        return GPU_texture_create_2d_array(name_, w, h, d, mips, format, data);
+        return GPU_texture_create_2d_array(name_, w, h, d, mip_len, format, data);
       }
       else {
-        return GPU_texture_create_3d(name_, w, h, d, mips, format, GPU_DATA_FLOAT, data);
+        return GPU_texture_create_3d(name_, w, h, d, mip_len, format, GPU_DATA_FLOAT, data);
       }
     }
   }
 };
 
 class TextureFromPool : public Texture, NonMovable {
- private:
-  GPUTexture *tx_tmp_saved_ = nullptr;
-
  public:
   TextureFromPool(const char *name = "gpu::Texture") : Texture(name){};
 
-  /* Always use `release()` after rendering and `sync()` in sync phase. */
-  void acquire(int2 extent, eGPUTextureFormat format, void *owner_)
+  /* Always use `release()` after rendering. */
+  void acquire(int2 extent, eGPUTextureFormat format)
   {
     BLI_assert(this->tx_ == nullptr);
-    if (this->tx_ != nullptr) {
-      return;
-    }
-    if (tx_tmp_saved_ != nullptr) {
-      if (GPU_texture_width(tx_tmp_saved_) != extent.x ||
-          GPU_texture_height(tx_tmp_saved_) != extent.y ||
-          GPU_texture_format(tx_tmp_saved_) != format) {
-        this->tx_tmp_saved_ = nullptr;
-      }
-      else {
-        this->tx_ = tx_tmp_saved_;
-        return;
-      }
-    }
-    DrawEngineType *owner = (DrawEngineType *)owner_;
-    this->tx_ = DRW_texture_pool_query_2d(UNPACK2(extent), format, owner);
+
+    this->tx_ = DRW_texture_pool_texture_acquire(
+        DST.vmempool->texture_pool, UNPACK2(extent), format);
   }
 
-  void release(void)
+  void release()
   {
     /* Allows multiple release. */
-    if (this->tx_ != nullptr) {
-      tx_tmp_saved_ = this->tx_;
-      this->tx_ = nullptr;
+    if (this->tx_ == nullptr) {
+      return;
     }
+    DRW_texture_pool_texture_release(DST.vmempool->texture_pool, this->tx_);
+    this->tx_ = nullptr;
   }
 
   /**
-   * Clears any reference. Workaround for pool texture not being able to release on demand.
-   * Needs to be called at during the sync phase.
+   * Swap the content of the two textures.
+   * Also change ownership accordingly if needed.
    */
-  void sync(void)
+  static void swap(TextureFromPool &a, Texture &b)
+  {
+    Texture::swap(a, b);
+    DRW_texture_pool_give_texture_ownership(DST.vmempool->texture_pool, a);
+    DRW_texture_pool_take_texture_ownership(DST.vmempool->texture_pool, b);
+  }
+  static void swap(Texture &a, TextureFromPool &b)
   {
-    tx_tmp_saved_ = nullptr;
+    swap(b, a);
+  }
+  static void swap(TextureFromPool &a, TextureFromPool &b)
+  {
+    Texture::swap(a, b);
   }
 
   /** Remove methods that are forbidden with this type of textures. */
@@ -832,6 +855,33 @@ class TextureFromPool : public Texture, NonMovable {
   GPUTexture *stencil_view() = delete;
 };
 
+/**
+ * Dummy type to bind texture as image.
+ * It is just a GPUTexture in disguise.
+ */
+class Image {
+};
+
+static inline Image *as_image(GPUTexture *tex)
+{
+  return reinterpret_cast<Image *>(tex);
+}
+
+static inline Image **as_image(GPUTexture **tex)
+{
+  return reinterpret_cast<Image **>(tex);
+}
+
+static inline GPUTexture *as_texture(Image *img)
+{
+  return reinterpret_cast<GPUTexture *>(img);
+}
+
+static inline GPUTexture **as_texture(Image **img)
+{
+  return reinterpret_cast<GPUTexture **>(img);
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -901,45 +951,47 @@ class Framebuffer : NonCopyable {
 
 template<typename T, int64_t len> class SwapChain {
  private:
+  BLI_STATIC_ASSERT(len > 1, "A swap-chain needs more than 1 unit in length.");
   std::array<T, len> chain_;
-  int64_t index_ = 0;
 
  public:
   void swap()
   {
-    index_ = (index_ + 1) % len;
+    for (auto i : IndexRange(len - 1)) {
+      T::swap(chain_[i], chain_[(i + 1) % len]);
+    }
   }
 
   T &current()
   {
-    return chain_[index_];
+    return chain_[0];
   }
 
   T &previous()
   {
     /* Avoid modulo operation with negative numbers. */
-    return chain_[(index_ + len - 1) % len];
+    return chain_[(0 + len - 1) % len];
   }
 
   T &next()
   {
-    return chain_[(index_ + 1) % len];
+    return chain_[(0 + 1) % len];
   }
 
   const T &current() const
   {
-    return chain_[index_];
+    return chain_[0];
   }
 
   const T &previous() const
   {
     /* Avoid modulo operation with negative numbers. */
-    return chain_[(index_ + len - 1) % len];
+    return chain_[(0 + len - 1) % len];
   }
 
   const T &next() const
   {
-    return chain_[(index_ + 1) % len];
+    return chain_[(0 + 1) % len];
   }
 };
 
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index fa4a1d93d3e..b49203d85f6 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -41,6 +41,7 @@
 
 #include "draw_debug.h"
 #include "draw_manager_profiling.h"
+#include "draw_state.h"
 #include "draw_view_data.h"
 
 #include "MEM_guardedalloc.h"
@@ -206,6 +207,10 @@ struct GPUShader *DRW_shader_create_with_lib_ex(const char *vert,
                                                 const char *lib,
                                                 const char *defines,
                                                 const char *name);
+struct GPUShader *DRW_shader_create_compute_with_shaderlib(const char *comp,
+                                                           const DRWShaderLibrary *lib,
+                                                           const char *defines,
+                                                           const char *name);
 struct GPUShader *DRW_shader_create_with_shaderlib_ex(const char *vert,
                                                       const char *geom,
                                                       const char *frag,
@@ -288,83 +293,6 @@ void DRW_shader_library_free(DRWShaderLibrary *lib);
 
 /* Batches */
 
-/**
- * DRWState is a bit-mask that stores the current render state and the desired render state. Based
- * on the differences the minimum state changes can be invoked to setup the desired render state.
- *
- * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
- * therefore they aren't ordered as a bit mask.
- */
-typedef enum {
-  /** To be used for compute passes. */
-  DRW_STATE_NO_DRAW = 0,
-  /** Write mask */
-  DRW_STATE_WRITE_DEPTH = (1 << 0),
-  DRW_STATE_WRITE_COLOR = (1 << 1),
-  /* Write Stencil. These options are mutual exclusive and packed into 2 bits */
-  DRW_STATE_WRITE_STENCIL = (1 << 2),
-  DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
-  DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
-  /** Depth test. These options are mutual exclusive and packed into 3 bits */
-  DRW_STATE_DEPTH_ALWAYS = (1 << 4),
-  DRW_STATE_DEPTH_LESS = (2 << 4),
-  DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
-  DRW_STATE_DEPTH_EQUAL = (4 << 4),
-  DRW_STATE_DEPTH_GREATER = (5 << 4),
-  DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
-  /** Culling test */
-  DRW_STATE_CULL_BACK = (1 << 7),
-  DRW_STATE_CULL_FRONT = (1 << 8),
-  /** Stencil test. These options are mutually exclusive and packed into 2 bits. */
-  DRW_STATE_STENCIL_ALWAYS = (1 << 9),
-  DRW_STATE_STENCIL_EQUAL = (2 << 9),
-  DRW_STATE_STENCIL_NEQUAL = (3 << 9),
-
-  /** Blend state. These options are mutual exclusive and packed into 4 bits */
-  DRW_STATE_BLEND_ADD = (1 << 11),
-  /** Same as additive but let alpha accumulate without pre-multiply. */
-  DRW_STATE_BLEND_ADD_FULL = (2 << 11),
-  /** Standard alpha blending. */
-  DRW_STATE_BLEND_ALPHA = (3 << 11),
-  /** Use that if color is already pre-multiply by alpha. */
-  DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
-  DRW_STATE_BLEND_BACKGROUND = (5 << 11),
-  DRW_STATE_BLEND_OIT = (6 << 11),
-  DRW_STATE_BLEND_MUL = (7 << 11),
-  DRW_STATE_BLEND_SUB = (8 << 11),
-  /** Use dual source blending. WARNING: Only one color buffer allowed. */
-  DRW_STATE_BLEND_CUSTOM = (9 << 11),
-  DRW_STATE_LOGIC_INVERT = (10 << 11),
-  DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
-
-  DRW_STATE_IN_FRONT_SELECT = (1 << 27),
-  DRW_STATE_SHADOW_OFFSET = (1 << 28),
-  DRW_STATE_CLIP_PLANES = (1 << 29),
-  DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
-  /** DO NOT USE. Assumed always enabled. Only used internally. */
-  DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
-} DRWState;
-
-ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
-
-#define DRW_STATE_DEFAULT \
-  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
-#define DRW_STATE_BLEND_ENABLED \
-  (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
-   DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
-   DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
-#define DRW_STATE_RASTERIZER_ENABLED \
-  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
-   DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
-#define DRW_STATE_DEPTH_TEST_ENABLED \
-  (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
-   DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
-#define DRW_STATE_STENCIL_TEST_ENABLED \
-  (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
-#define DRW_STATE_WRITE_STENCIL_ENABLED \
-  (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
-   DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
-
 typedef enum {
   DRW_ATTR_INT,
   DRW_ATTR_FLOAT,
@@ -409,7 +337,7 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup,
                          void *user_data);
 
 /**
- * If ob is NULL, unit modelmatrix is assumed and culling is bypassed.
+ * If ob is NULL, unit model-matrix is assumed and culling is bypassed.
  */
 #define DRW_shgroup_call(shgroup, geom, ob) \
   DRW_shgroup_call_ex(shgroup, ob, NULL, geom, false, NULL)
@@ -420,8 +348,8 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup,
 #define DRW_shgroup_call_obmat(shgroup, geom, obmat) \
   DRW_shgroup_call_ex(shgroup, NULL, obmat, geom, false, NULL)
 
-/* TODO(fclem): remove this when we have DRWView */
-/* user_data is used by DRWCallVisibilityFn defined in DRWView. */
+/* TODO(fclem): remove this when we have #DRWView */
+/* user_data is used by #DRWCallVisibilityFn defined in #DRWView. */
 #define DRW_shgroup_call_with_callback(shgroup, geom, ob, user_data) \
   DRW_shgroup_call_ex(shgroup, ob, NULL, geom, false, user_data)
 
@@ -454,6 +382,10 @@ void DRW_shgroup_call_compute_indirect(DRWShadingGroup *shgroup, GPUStorageBuf *
 void DRW_shgroup_call_procedural_points(DRWShadingGroup *sh, Object *ob, uint point_count);
 void DRW_shgroup_call_procedural_lines(DRWShadingGroup *sh, Object *ob, uint line_count);
 void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *sh, Object *ob, uint tri_count);
+void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup,
+                                          GPUPrimType primitive_type,
+                                          Object *ob,
+                                          GPUStorageBuf *indirect_buf);
 /**
  * \warning Only use with Shaders that have `IN_PLACE_INSTANCES` defined.
  * TODO: Should be removed.
@@ -639,10 +571,10 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup,
     DRW_shgroup_uniform_block_ex(shgroup, name, ubo, __FILE__, __LINE__)
 #  define DRW_shgroup_uniform_block_ref(shgroup, name, ubo) \
     DRW_shgroup_uniform_block_ref_ex(shgroup, name, ubo, __FILE__, __LINE__)
-#  define DRW_shgroup_storage_block(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ex(shgroup, name, ubo, __FILE__, __LINE__)
-#  define DRW_shgroup_storage_block_ref(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ref_ex(shgroup, name, ubo, __FILE__, __LINE__)
+#  define DRW_shgroup_storage_block(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ex(shgroup, name, ssbo, __FILE__, __LINE__)
+#  define DRW_shgroup_storage_block_ref(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ref_ex(shgroup, name, ssbo, __FILE__, __LINE__)
 #else
 #  define DRW_shgroup_vertex_buffer(shgroup, name, vert) \
     DRW_shgroup_vertex_buffer_ex(shgroup, name, vert)
@@ -652,10 +584,10 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup,
     DRW_shgroup_uniform_block_ex(shgroup, name, ubo)
 #  define DRW_shgroup_uniform_block_ref(shgroup, name, ubo) \
     DRW_shgroup_uniform_block_ref_ex(shgroup, name, ubo)
-#  define DRW_shgroup_storage_block(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ex(shgroup, name, ubo)
-#  define DRW_shgroup_storage_block_ref(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ref_ex(shgroup, name, ubo)
+#  define DRW_shgroup_storage_block(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ex(shgroup, name, ssbo)
+#  define DRW_shgroup_storage_block_ref(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ref_ex(shgroup, name, ssbo)
 #endif
 
 bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup);
@@ -791,7 +723,7 @@ bool DRW_culling_box_test(const DRWView *view, const BoundBox *bbox);
 bool DRW_culling_plane_test(const DRWView *view, const float plane[4]);
 /**
  * Return True if the given box intersect the current view frustum.
- * This function will have to be replaced when world space bb per objects is implemented.
+ * This function will have to be replaced when world space bounding-box per objects is implemented.
  */
 bool DRW_culling_min_max_test(const DRWView *view, float obmat[4][4], float min[3], float max[3]);
 
@@ -887,7 +819,6 @@ bool DRW_object_is_in_edit_mode(const struct Object *ob);
  * we are rendering or drawing in the viewport.
  */
 int DRW_object_visibility_in_active_context(const struct Object *ob);
-bool DRW_object_is_flat_normal(const struct Object *ob);
 bool DRW_object_use_hide_faces(const struct Object *ob);
 
 bool DRW_object_is_visible_psys_in_active_context(const struct Object *object,
@@ -981,7 +912,7 @@ typedef struct DRWContextState {
   struct ViewLayer *view_layer; /* 'CTX_data_view_layer(C)' */
 
   /* Use 'object_edit' for edit-mode */
-  struct Object *obact; /* 'OBACT' */
+  struct Object *obact;
 
   struct RenderEngineType *engine_type;
 
diff --git a/source/blender/draw/intern/draw_attributes.cc b/source/blender/draw/intern/draw_attributes.cc
index 8fb4210901f..011d72e9e8f 100644
--- a/source/blender/draw/intern/draw_attributes.cc
+++ b/source/blender/draw/intern/draw_attributes.cc
@@ -65,9 +65,10 @@ bool drw_attributes_overlap(const DRW_Attributes *a, const DRW_Attributes *b)
 }
 
 DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs,
-                                                 eCustomDataType type,
-                                                 int layer,
-                                                 eAttrDomain domain)
+                                                 const char *name,
+                                                 const eCustomDataType type,
+                                                 const int layer_index,
+                                                 const eAttrDomain domain)
 {
   if (attrs->num_requests >= GPU_MAX_ATTR) {
     return nullptr;
@@ -75,7 +76,8 @@ DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs,
 
   DRW_AttributeRequest *req = &attrs->requests[attrs->num_requests];
   req->cd_type = type;
-  req->layer_index = layer;
+  BLI_strncpy(req->attribute_name, name, sizeof(req->attribute_name));
+  req->layer_index = layer_index;
   req->domain = domain;
   attrs->num_requests += 1;
   return req;
@@ -86,7 +88,7 @@ bool drw_custom_data_match_attribute(const CustomData *custom_data,
                                      int *r_layer_index,
                                      eCustomDataType *r_type)
 {
-  const eCustomDataType possible_attribute_types[7] = {
+  const eCustomDataType possible_attribute_types[8] = {
       CD_PROP_BOOL,
       CD_PROP_INT8,
       CD_PROP_INT32,
@@ -94,6 +96,7 @@ bool drw_custom_data_match_attribute(const CustomData *custom_data,
       CD_PROP_FLOAT2,
       CD_PROP_FLOAT3,
       CD_PROP_COLOR,
+      CD_PROP_BYTE_COLOR,
   };
 
   for (int i = 0; i < ARRAY_SIZE(possible_attribute_types); i++) {
diff --git a/source/blender/draw/intern/draw_attributes.h b/source/blender/draw/intern/draw_attributes.h
index 4f82f3b94e9..b577c6c4162 100644
--- a/source/blender/draw/intern/draw_attributes.h
+++ b/source/blender/draw/intern/draw_attributes.h
@@ -46,8 +46,9 @@ void drw_attributes_merge(DRW_Attributes *dst,
 bool drw_attributes_overlap(const DRW_Attributes *a, const DRW_Attributes *b);
 
 DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs,
-                                                 eCustomDataType type,
-                                                 int layer,
+                                                 const char *name,
+                                                 eCustomDataType data_type,
+                                                 int layer_index,
                                                  eAttrDomain domain);
 
 bool drw_custom_data_match_attribute(const CustomData *custom_data,
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index f846251c66b..6537490c06c 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -90,6 +90,7 @@ static struct DRWShapeCache {
   GPUBatch *drw_procedural_verts;
   GPUBatch *drw_procedural_lines;
   GPUBatch *drw_procedural_tris;
+  GPUBatch *drw_procedural_tri_strips;
   GPUBatch *drw_cursor;
   GPUBatch *drw_cursor_only_circle;
   GPUBatch *drw_fullscreen_quad;
@@ -208,6 +209,21 @@ GPUBatch *drw_cache_procedural_triangles_get(void)
   return SHC.drw_procedural_tris;
 }
 
+GPUBatch *drw_cache_procedural_triangle_strips_get()
+{
+  if (!SHC.drw_procedural_tri_strips) {
+    /* TODO(fclem): get rid of this dummy VBO. */
+    GPUVertFormat format = {0};
+    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
+    GPU_vertbuf_data_alloc(vbo, 1);
+
+    SHC.drw_procedural_tri_strips = GPU_batch_create_ex(
+        GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO);
+  }
+  return SHC.drw_procedural_tri_strips;
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -764,6 +780,39 @@ GPUBatch *DRW_cache_normal_arrow_get(void)
   return SHC.drw_normal_arrow;
 }
 
+void DRW_vertbuf_create_wiredata(GPUVertBuf *vbo, const int vert_len)
+{
+  static GPUVertFormat format = {0};
+  static struct {
+    uint wd;
+  } attr_id;
+  if (format.attr_len == 0) {
+    /* initialize vertex format */
+    if (!GPU_crappy_amd_driver()) {
+      /* Some AMD drivers strangely crash with a vbo with this format. */
+      attr_id.wd = GPU_vertformat_attr_add(
+          &format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    }
+    else {
+      attr_id.wd = GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    }
+  }
+
+  GPU_vertbuf_init_with_format(vbo, &format);
+  GPU_vertbuf_data_alloc(vbo, vert_len);
+
+  if (GPU_vertbuf_get_format(vbo)->stride == 1) {
+    memset(GPU_vertbuf_get_data(vbo), 0xFF, (size_t)vert_len);
+  }
+  else {
+    GPUVertBufRaw wd_step;
+    GPU_vertbuf_attr_get_raw_data(vbo, attr_id.wd, &wd_step);
+    for (int i = 0; i < vert_len; i++) {
+      *((float *)GPU_vertbuf_raw_step(&wd_step)) = 1.0f;
+    }
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -777,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void)
 {
   if (SHC.drw_gpencil_dummy_quad == NULL) {
     GPUVertFormat format = {0};
-    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT);
+    /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */
+    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
     GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
     GPU_vertbuf_data_alloc(vbo, 4);
 
@@ -802,7 +852,6 @@ GPUBatch *DRW_cache_object_all_edges_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_all_edges_get(ob);
-
     /* TODO: should match #DRW_cache_object_surface_get. */
     default:
       return NULL;
@@ -814,20 +863,6 @@ GPUBatch *DRW_cache_object_edge_detection_get(Object *ob, bool *r_is_manifold)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_edge_detection_get(ob, r_is_manifold);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_edge_detection_get(ob, r_is_manifold);
-    case OB_CURVES:
-      return NULL;
-    case OB_POINTCLOUD:
-      return NULL;
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -838,23 +873,12 @@ GPUBatch *DRW_cache_object_face_wireframe_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_face_wireframe_get(ob);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_face_wireframe_get(ob);
-    case OB_CURVES:
-      return NULL;
     case OB_POINTCLOUD:
       return DRW_pointcloud_batch_cache_get_dots(ob);
     case OB_VOLUME:
       return DRW_cache_volume_face_wireframe_get(ob);
-    case OB_GPENCIL: {
+    case OB_GPENCIL:
       return DRW_cache_gpencil_face_wireframe_get(ob);
-    }
     default:
       return NULL;
   }
@@ -865,20 +889,6 @@ GPUBatch *DRW_cache_object_loose_edges_get(struct Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_loose_edges_get(ob);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return NULL;
-    case OB_CURVES:
-      return NULL;
-    case OB_POINTCLOUD:
-      return NULL;
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -889,20 +899,8 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_surface_get(ob);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_surface_get(ob);
-    case OB_CURVES:
-      return NULL;
     case OB_POINTCLOUD:
       return DRW_cache_pointcloud_surface_get(ob);
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -916,18 +914,6 @@ GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob)
   switch (type) {
     case OB_MESH:
       return DRW_mesh_batch_cache_pos_vertbuf_get((me != NULL) ? me : ob->data);
-    case OB_CURVES_LEGACY:
-    case OB_SURF:
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_mball_batch_cache_pos_vertbuf_get(ob);
-    case OB_CURVES:
-      return NULL;
-    case OB_POINTCLOUD:
-      return NULL;
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -952,8 +938,6 @@ int DRW_cache_object_material_count_get(struct Object *ob)
     case OB_SURF:
     case OB_FONT:
       return DRW_curve_material_count_get(ob->data);
-    case OB_MBALL:
-      return DRW_metaball_material_count_get(ob->data);
     case OB_CURVES:
       return DRW_curves_material_count_get(ob->data);
     case OB_POINTCLOUD:
@@ -975,20 +959,8 @@ GPUBatch **DRW_cache_object_surface_material_get(struct Object *ob,
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_CURVES:
-      return NULL;
     case OB_POINTCLOUD:
       return DRW_cache_pointcloud_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -2956,39 +2928,6 @@ GPUBatch *DRW_cache_curve_vert_overlay_get(Object *ob)
 /** \} */
 
 /* -------------------------------------------------------------------- */
-/** \name MetaBall
- * \{ */
-
-GPUBatch *DRW_cache_mball_surface_get(Object *ob)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  return DRW_metaball_batch_cache_get_triangles_with_normals(ob);
-}
-
-GPUBatch *DRW_cache_mball_edge_detection_get(Object *ob, bool *r_is_manifold)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  return DRW_metaball_batch_cache_get_edge_detection(ob, r_is_manifold);
-}
-
-GPUBatch *DRW_cache_mball_face_wireframe_get(Object *ob)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  return DRW_metaball_batch_cache_get_wireframes_face(ob);
-}
-
-GPUBatch **DRW_cache_mball_surface_shaded_get(Object *ob,
-                                              struct GPUMaterial **gpumat_array,
-                                              uint gpumat_array_len)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  MetaBall *mb = ob->data;
-  return DRW_metaball_batch_cache_get_surface_shaded(ob, mb, gpumat_array, gpumat_array_len);
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
 /** \name Font
  * \{ */
 
@@ -3306,9 +3245,6 @@ void drw_batch_cache_validate(Object *ob)
     case OB_SURF:
       DRW_curve_batch_cache_validate((Curve *)ob->data);
       break;
-    case OB_MBALL:
-      DRW_mball_batch_cache_validate((MetaBall *)ob->data);
-      break;
     case OB_LATTICE:
       DRW_lattice_batch_cache_validate((Lattice *)ob->data);
       break;
diff --git a/source/blender/draw/intern/draw_cache.h b/source/blender/draw/intern/draw_cache.h
index a107eb7c75c..4e8788ada08 100644
--- a/source/blender/draw/intern/draw_cache.h
+++ b/source/blender/draw/intern/draw_cache.h
@@ -213,15 +213,6 @@ struct GPUBatch *DRW_cache_particles_get_edit_tip_points(struct Object *object,
                                                          struct PTCacheEdit *edit);
 struct GPUBatch *DRW_cache_particles_get_prim(int type);
 
-/* Metaball */
-
-struct GPUBatch *DRW_cache_mball_surface_get(struct Object *ob);
-struct GPUBatch **DRW_cache_mball_surface_shaded_get(struct Object *ob,
-                                                     struct GPUMaterial **gpumat_array,
-                                                     uint gpumat_array_len);
-struct GPUBatch *DRW_cache_mball_face_wireframe_get(struct Object *ob);
-struct GPUBatch *DRW_cache_mball_edge_detection_get(struct Object *ob, bool *r_is_manifold);
-
 /* Curves */
 
 struct GPUBatch *DRW_cache_curves_surface_get(struct Object *ob);
diff --git a/source/blender/draw/intern/draw_cache_extract.hh b/source/blender/draw/intern/draw_cache_extract.hh
index c7127d169e1..203da22406c 100644
--- a/source/blender/draw/intern/draw_cache_extract.hh
+++ b/source/blender/draw/intern/draw_cache_extract.hh
@@ -55,7 +55,6 @@ enum {
 struct DRW_MeshCDMask {
   uint32_t uv : 8;
   uint32_t tan : 8;
-  uint32_t vcol : 8;
   uint32_t orco : 1;
   uint32_t tan_orco : 1;
   uint32_t sculpt_overlays : 1;
@@ -111,7 +110,6 @@ struct MeshBufferList {
     GPUVertBuf *weights;  /* extend */
     GPUVertBuf *uv;
     GPUVertBuf *tan;
-    GPUVertBuf *vcol;
     GPUVertBuf *sculpt_data;
     GPUVertBuf *orco;
     /* Only for edit mode. */
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc
index 00005fd7b4c..b1d1631cb6d 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@@ -155,7 +155,7 @@ struct ExtractTaskData {
   bool use_threading = false;
 
   ExtractTaskData(const MeshRenderData *mr,
-                  struct MeshBatchCache *cache,
+                  MeshBatchCache *cache,
                   ExtractorRunDatas *extractors,
                   MeshBufferList *mbuflist,
                   const bool use_threading)
@@ -193,7 +193,7 @@ static void extract_task_data_free(void *data)
  * \{ */
 
 BLI_INLINE void extract_init(const MeshRenderData *mr,
-                             struct MeshBatchCache *cache,
+                             MeshBatchCache *cache,
                              ExtractorRunDatas &extractors,
                              MeshBufferList *mbuflist,
                              void *data_stack)
@@ -209,7 +209,7 @@ BLI_INLINE void extract_init(const MeshRenderData *mr,
 }
 
 BLI_INLINE void extract_finish(const MeshRenderData *mr,
-                               struct MeshBatchCache *cache,
+                               MeshBatchCache *cache,
                                const ExtractorRunDatas &extractors,
                                void *data_stack)
 {
@@ -619,7 +619,6 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
   EXTRACT_ADD_REQUESTED(vbo, lnor);
   EXTRACT_ADD_REQUESTED(vbo, uv);
   EXTRACT_ADD_REQUESTED(vbo, tan);
-  EXTRACT_ADD_REQUESTED(vbo, vcol);
   EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
   EXTRACT_ADD_REQUESTED(vbo, orco);
   EXTRACT_ADD_REQUESTED(vbo, edge_fac);
@@ -848,7 +847,6 @@ void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
   EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle);
   EXTRACT_ADD_REQUESTED(ibo, lines_paint_mask);
   EXTRACT_ADD_REQUESTED(ibo, lines_adjacency);
-  EXTRACT_ADD_REQUESTED(vbo, vcol);
   EXTRACT_ADD_REQUESTED(vbo, weights);
   EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
 
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc
index baea0c7b646..eea19cbebf3 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc
@@ -14,6 +14,7 @@
 #include "BLI_math.h"
 #include "BLI_task.h"
 
+#include "BKE_attribute.hh"
 #include "BKE_editmesh.h"
 #include "BKE_editmesh_cache.h"
 #include "BKE_mesh.h"
@@ -228,10 +229,10 @@ static void mesh_render_data_polys_sorted_build(MeshRenderData *mr, MeshBufferCa
     }
   }
   else {
-    const MPoly *mp = &mr->mpoly[0];
-    for (int i = 0; i < mr->poly_len; i++, mp++) {
-      if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-        const int mat = min_ii(mp->mat_nr, mat_last);
+    for (int i = 0; i < mr->poly_len; i++) {
+      if (!(mr->use_hide && mr->hide_poly && mr->hide_poly[i])) {
+        const MPoly *mp = &mr->mpoly[i];
+        const int mat = min_ii(mr->material_indices ? mr->material_indices[i] : 0, mat_last);
         tri_first_index[i] = mat_tri_offs[mat];
         mat_tri_offs[mat] += mp->totloop - 2;
       }
@@ -269,8 +270,8 @@ static void mesh_render_data_mat_tri_len_mesh_range_fn(void *__restrict userdata
   int *mat_tri_len = static_cast<int *>(tls->userdata_chunk);
 
   const MPoly *mp = &mr->mpoly[iter];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    int mat = min_ii(mp->mat_nr, mr->mat_len - 1);
+  if (!(mr->use_hide && mr->hide_poly && mr->hide_poly[iter])) {
+    int mat = min_ii(mr->material_indices ? mr->material_indices[iter] : 0, mr->mat_len - 1);
     mat_tri_len[mat] += mp->totloop - 2;
   }
 }
@@ -332,15 +333,15 @@ void mesh_render_data_update_looptris(MeshRenderData *mr,
   if (mr->extract_type != MR_EXTRACT_BMESH) {
     /* Mesh */
     if ((iter_type & MR_ITER_LOOPTRI) || (data_flag & MR_DATA_LOOPTRI)) {
-      /* NOTE(campbell): It's possible to skip allocating tessellation,
+      /* NOTE(@campbellbarton): It's possible to skip allocating tessellation,
        * the tessellation can be calculated as part of the iterator, see: P2188.
        * The overall advantage is small (around 1%), so keep this as-is. */
       mr->mlooptri = static_cast<MLoopTri *>(
           MEM_mallocN(sizeof(*mr->mlooptri) * mr->tri_len, "MR_DATATYPE_LOOPTRI"));
       if (mr->poly_normals != nullptr) {
-        BKE_mesh_recalc_looptri_with_normals(me->mloop,
-                                             me->mpoly,
-                                             me->mvert,
+        BKE_mesh_recalc_looptri_with_normals(mr->mloop,
+                                             mr->mpoly,
+                                             mr->mvert,
                                              me->totloop,
                                              me->totpoly,
                                              mr->mlooptri,
@@ -348,7 +349,7 @@ void mesh_render_data_update_looptris(MeshRenderData *mr,
       }
       else {
         BKE_mesh_recalc_looptri(
-            me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri);
+            mr->mloop, mr->mpoly, mr->mvert, me->totloop, me->totpoly, mr->mlooptri);
       }
     }
   }
@@ -378,15 +379,15 @@ void mesh_render_data_update_normals(MeshRenderData *mr, const eMRDataType data_
           MEM_mallocN(sizeof(*mr->loop_normals) * mr->loop_len, __func__));
       short(*clnors)[2] = static_cast<short(*)[2]>(
           CustomData_get_layer(&mr->me->ldata, CD_CUSTOMLOOPNORMAL));
-      BKE_mesh_normals_loop_split(mr->me->mvert,
+      BKE_mesh_normals_loop_split(mr->mvert,
                                   mr->vert_normals,
                                   mr->vert_len,
-                                  mr->me->medge,
+                                  mr->medge,
                                   mr->edge_len,
-                                  mr->me->mloop,
+                                  mr->mloop,
                                   mr->loop_normals,
                                   mr->loop_len,
-                                  mr->me->mpoly,
+                                  mr->mpoly,
                                   mr->poly_normals,
                                   mr->poly_len,
                                   is_auto_smooth,
@@ -431,6 +432,30 @@ void mesh_render_data_update_normals(MeshRenderData *mr, const eMRDataType data_
   }
 }
 
+static void retrieve_active_attribute_names(MeshRenderData &mr,
+                                            const Object &object,
+                                            const Mesh &mesh)
+{
+  const Mesh *mesh_final = editmesh_final_or_this(&object, &mesh);
+  const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(mesh_final);
+  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(mesh_final);
+
+  /* Necessary because which attributes are active/default is stored in #CustomData. */
+  Mesh me_query = blender::dna::shallow_zero_initialize();
+  BKE_id_attribute_copy_domains_temp(
+      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
+
+  mr.active_color_name = nullptr;
+  mr.default_color_name = nullptr;
+
+  if (const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id)) {
+    mr.active_color_name = active->name;
+  }
+  if (const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id)) {
+    mr.default_color_name = render->name;
+  }
+}
+
 MeshRenderData *mesh_render_data_create(Object *object,
                                         Mesh *me,
                                         const bool is_editmode,
@@ -470,17 +495,6 @@ MeshRenderData *mesh_render_data_create(Object *object,
       mr->bm_poly_centers = mr->edit_data->polyCos;
     }
 
-    /* A subdivision wrapper may be created in edit mode when X-ray is turned on to ensure that the
-     * topology seen by the user matches the one used for the selection routines. This wrapper
-     * seemingly takes precedence over the MDATA one, however the mesh we use for rendering is not
-     * the subdivided one, but the one where the MDATA wrapper would have been added. So consider
-     * the subdivision wrapper as well for the `has_mdata` case. */
-    bool has_mdata = is_mode_active && ELEM(mr->me->runtime.wrapper_type,
-                                            ME_WRAPPER_TYPE_MDATA,
-                                            ME_WRAPPER_TYPE_SUBD);
-    bool use_mapped = is_mode_active &&
-                      (has_mdata && !do_uvedit && mr->me && !mr->me->runtime.is_original);
-
     int bm_ensure_types = BM_VERT | BM_EDGE | BM_LOOP | BM_FACE;
 
     BM_mesh_elem_index_ensure(mr->bm, bm_ensure_types);
@@ -499,43 +513,51 @@ MeshRenderData *mesh_render_data_create(Object *object,
     mr->freestyle_face_ofs = CustomData_get_offset(&mr->bm->pdata, CD_FREESTYLE_FACE);
 #endif
 
-    if (use_mapped) {
-      mr->v_origindex = static_cast<const int *>(
-          CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
-      mr->e_origindex = static_cast<const int *>(
-          CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
-      mr->p_origindex = static_cast<const int *>(
-          CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
-
-      use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex);
+    /* Use bmesh directly when the object is in edit mode unchanged by any modifiers.
+     * For non-final UVs, always use original bmesh since the UV editor does not support
+     * using the cage mesh with deformed coordinates. */
+    if ((is_mode_active && mr->me->runtime.is_original_bmesh &&
+         mr->me->runtime.wrapper_type == ME_WRAPPER_TYPE_BMESH) ||
+        (do_uvedit && !do_final)) {
+      mr->extract_type = MR_EXTRACT_BMESH;
     }
-
-    mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_BMESH;
-
-    /* Seems like the mesh_eval_final do not have the right origin indices.
-     * Force not mapped in this case. */
-    if (has_mdata && do_final && editmesh_eval_final != editmesh_eval_cage) {
-      // mr->edit_bmesh = nullptr;
+    else {
       mr->extract_type = MR_EXTRACT_MESH;
+
+      /* Use mapping from final to original mesh when the object is in edit mode. */
+      if (is_mode_active && do_final) {
+        mr->v_origindex = static_cast<const int *>(
+            CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
+        mr->e_origindex = static_cast<const int *>(
+            CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
+        mr->p_origindex = static_cast<const int *>(
+            CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+      }
+      else {
+        mr->v_origindex = nullptr;
+        mr->e_origindex = nullptr;
+        mr->p_origindex = nullptr;
+      }
     }
   }
   else {
     mr->me = me;
     mr->edit_bmesh = nullptr;
+    mr->extract_type = MR_EXTRACT_MESH;
 
-    bool use_mapped = is_paint_mode && mr->me && !mr->me->runtime.is_original;
-    if (use_mapped) {
+    if (is_paint_mode && mr->me) {
       mr->v_origindex = static_cast<const int *>(
           CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
       mr->e_origindex = static_cast<const int *>(
           CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
       mr->p_origindex = static_cast<const int *>(
           CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
-
-      use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex);
     }
-
-    mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_MESH;
+    else {
+      mr->v_origindex = nullptr;
+      mr->e_origindex = nullptr;
+      mr->p_origindex = nullptr;
+    }
   }
 
   if (mr->extract_type != MR_EXTRACT_BMESH) {
@@ -546,14 +568,24 @@ MeshRenderData *mesh_render_data_create(Object *object,
     mr->poly_len = mr->me->totpoly;
     mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len);
 
-    mr->mvert = static_cast<MVert *>(CustomData_get_layer(&mr->me->vdata, CD_MVERT));
-    mr->medge = static_cast<MEdge *>(CustomData_get_layer(&mr->me->edata, CD_MEDGE));
-    mr->mloop = static_cast<MLoop *>(CustomData_get_layer(&mr->me->ldata, CD_MLOOP));
-    mr->mpoly = static_cast<MPoly *>(CustomData_get_layer(&mr->me->pdata, CD_MPOLY));
+    mr->mvert = BKE_mesh_verts(mr->me);
+    mr->medge = BKE_mesh_edges(mr->me);
+    mr->mpoly = BKE_mesh_polys(mr->me);
+    mr->mloop = BKE_mesh_loops(mr->me);
 
     mr->v_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
     mr->e_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
     mr->p_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+
+    mr->material_indices = static_cast<const int *>(
+        CustomData_get_layer_named(&me->pdata, CD_PROP_INT32, "material_index"));
+
+    mr->hide_vert = static_cast<const bool *>(
+        CustomData_get_layer_named(&me->vdata, CD_PROP_BOOL, ".hide_vert"));
+    mr->hide_edge = static_cast<const bool *>(
+        CustomData_get_layer_named(&me->edata, CD_PROP_BOOL, ".hide_edge"));
+    mr->hide_poly = static_cast<const bool *>(
+        CustomData_get_layer_named(&me->pdata, CD_PROP_BOOL, ".hide_poly"));
   }
   else {
     /* #BMesh */
@@ -566,6 +598,8 @@ MeshRenderData *mesh_render_data_create(Object *object,
     mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len);
   }
 
+  retrieve_active_attribute_names(*mr, *object, *me);
+
   return mr;
 }
 
diff --git a/source/blender/draw/intern/draw_cache_impl.h b/source/blender/draw/intern/draw_cache_impl.h
index 4fa5813d476..7f7d0a7613f 100644
--- a/source/blender/draw/intern/draw_cache_impl.h
+++ b/source/blender/draw/intern/draw_cache_impl.h
@@ -36,10 +36,6 @@ extern "C" {
 /** \name Expose via BKE callbacks
  * \{ */
 
-void DRW_mball_batch_cache_dirty_tag(struct MetaBall *mb, int mode);
-void DRW_mball_batch_cache_validate(struct MetaBall *mb);
-void DRW_mball_batch_cache_free(struct MetaBall *mb);
-
 void DRW_curve_batch_cache_dirty_tag(struct Curve *cu, int mode);
 void DRW_curve_batch_cache_validate(struct Curve *cu);
 void DRW_curve_batch_cache_free(struct Curve *cu);
@@ -111,39 +107,6 @@ struct GPUBatch *DRW_curve_batch_cache_get_edit_verts(struct Curve *cu);
 /** \} */
 
 /* -------------------------------------------------------------------- */
-/** \name Metaball
- * \{ */
-
-int DRW_metaball_material_count_get(struct MetaBall *mb);
-
-struct GPUBatch *DRW_metaball_batch_cache_get_triangles_with_normals(struct Object *ob);
-struct GPUBatch **DRW_metaball_batch_cache_get_surface_shaded(struct Object *ob,
-                                                              struct MetaBall *mb,
-                                                              struct GPUMaterial **gpumat_array,
-                                                              uint gpumat_array_len);
-struct GPUBatch *DRW_metaball_batch_cache_get_wireframes_face(struct Object *ob);
-struct GPUBatch *DRW_metaball_batch_cache_get_edge_detection(struct Object *ob,
-                                                             bool *r_is_manifold);
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name DispList
- * \{ */
-
-void DRW_displist_vertbuf_create_pos_and_nor(struct ListBase *lb,
-                                             struct GPUVertBuf *vbo,
-                                             const struct Scene *scene);
-void DRW_displist_vertbuf_create_wiredata(struct ListBase *lb, struct GPUVertBuf *vbo);
-void DRW_displist_indexbuf_create_lines_in_order(struct ListBase *lb, struct GPUIndexBuf *ibo);
-void DRW_displist_indexbuf_create_triangles_in_order(struct ListBase *lb, struct GPUIndexBuf *ibo);
-void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb,
-                                                        struct GPUIndexBuf *ibo,
-                                                        bool *r_is_manifold);
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
 /** \name Lattice
  * \{ */
 
@@ -161,6 +124,16 @@ struct GPUBatch *DRW_lattice_batch_cache_get_edit_verts(struct Lattice *lt);
 
 int DRW_curves_material_count_get(struct Curves *curves);
 
+/**
+ * Provide GPU access to a specific evaluated attribute on curves.
+ *
+ * \return A pointer to location where the texture will be
+ * stored, which will be filled by #DRW_shgroup_curves_create_sub.
+ */
+struct GPUTexture **DRW_curves_texture_for_evaluated_attribute(struct Curves *curves,
+                                                               const char *name,
+                                                               bool *r_is_point_domain);
+
 struct GPUBatch *DRW_curves_batch_cache_get_edit_points(struct Curves *curves);
 
 void DRW_curves_batch_cache_create_requested(struct Object *ob);
@@ -299,7 +272,6 @@ struct GPUBatch *DRW_mesh_batch_cache_get_edit_mesh_analysis(struct Mesh *me);
  * \{ */
 
 struct GPUVertBuf *DRW_mesh_batch_cache_pos_vertbuf_get(struct Mesh *me);
-struct GPUVertBuf *DRW_mball_batch_cache_pos_vertbuf_get(struct Object *ob);
 
 int DRW_mesh_material_count_get(const struct Object *object, const struct Mesh *me);
 
diff --git a/source/blender/draw/intern/draw_cache_impl_curve.cc b/source/blender/draw/intern/draw_cache_impl_curve.cc
index ebcdabe4942..695c348d8e2 100644
--- a/source/blender/draw/intern/draw_cache_impl_curve.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curve.cc
@@ -108,7 +108,7 @@ static void curve_eval_render_wire_verts_edges_len_get(const blender::bke::Curve
   const blender::VArray<bool> cyclic = curves.cyclic();
   for (const int i : curves.curves_range()) {
     const IndexRange points = curves.evaluated_points_for_curve(i);
-    *r_edge_len += blender::bke::curves::curve_segment_num(points.size(), cyclic[i]);
+    *r_edge_len += blender::bke::curves::segments_num(points.size(), cyclic[i]);
   }
 }
 
diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc
index 68ca1153c96..3bca17d9c56 100644
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@@ -75,13 +75,14 @@ static void curves_batch_cache_init(Curves &curves)
 
   if (!cache) {
     cache = MEM_cnew<CurvesBatchCache>(__func__);
-    BLI_mutex_init(&cache->render_mutex);
     curves.batch_cache = cache;
   }
   else {
     memset(cache, 0, sizeof(*cache));
   }
 
+  BLI_mutex_init(&cache->render_mutex);
+
   cache->is_dirty = false;
 }
 
@@ -258,7 +259,7 @@ static void curves_batch_cache_fill_segments_proc_pos(
   }
 }
 
-static void curves_batch_cache_ensure_procedural_pos(Curves &curves,
+static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
                                                      CurvesEvalCache &cache,
                                                      GPUMaterial *gpu_material)
 {
@@ -268,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(Curves &curves,
     GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     GPU_vertformat_alias_add(&format, "pos");
 
-    cache.proc_point_buf = GPU_vertbuf_create_with_format(&format);
+    cache.proc_point_buf = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len);
 
     MutableSpan posTime_data{
@@ -278,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(Curves &curves,
     GPUVertFormat length_format = {0};
     GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
 
-    cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+    cache.proc_length_buf = GPU_vertbuf_create_with_format_ex(
+        &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len);
 
     MutableSpan hairLength_data{
@@ -311,12 +314,15 @@ void drw_curves_get_attribute_sampler_name(const char *layer_name, char r_sample
   BLI_snprintf(r_sampler_name, 32, "a%s", attr_safe_name);
 }
 
-static void curves_batch_cache_ensure_procedural_final_attr(
-    CurvesEvalCache &cache, GPUVertFormat *format, int subdiv, int index, const char *name)
+static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cache,
+                                                            const GPUVertFormat *format,
+                                                            const int subdiv,
+                                                            const int index,
+                                                            const char *name)
 {
   CurvesEvalFinalCache &final_cache = cache.final[subdiv];
-  final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format,
-                                                                        GPU_USAGE_DEVICE_ONLY);
+  final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+      format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -333,8 +339,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(
 static void curves_batch_ensure_attribute(const Curves &curves,
                                           CurvesEvalCache &cache,
                                           const DRW_AttributeRequest &request,
-                                          int subdiv,
-                                          int index)
+                                          const int subdiv,
+                                          const int index)
 {
   GPU_VERTBUF_DISCARD_SAFE(cache.proc_attributes_buf[index]);
   DRW_TEXTURE_FREE_SAFE(cache.proc_attributes_tex[index]);
@@ -347,27 +353,28 @@ static void curves_batch_ensure_attribute(const Curves &curves,
   /* All attributes use vec4, see comment below. */
   GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format);
+  cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+      &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index];
 
   GPU_vertbuf_data_alloc(attr_vbo,
                          request.domain == ATTR_DOMAIN_POINT ? curves.geometry.point_num :
                                                                curves.geometry.curve_num);
 
-  CurveComponent component;
-  component.replace(const_cast<Curves *>(&curves), GeometryOwnershipType::ReadOnly);
+  const blender::bke::AttributeAccessor attributes =
+      blender::bke::CurvesGeometry::wrap(curves.geometry).attributes();
 
   /* TODO(@kevindietrich): float4 is used for scalar attributes as the implicit conversion done
    * by OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following
    * the Blender convention, it should be `vec4(s, s, s, 1)`. This could be resolved using a
    * similar texture state swizzle to map the attribute correctly as for volume attributes, so we
    * can control the conversion ourselves. */
-  blender::VArray<ColorGeometry4f> attribute = component.attribute_get_for_read<ColorGeometry4f>(
+  blender::VArray<ColorGeometry4f> attribute = attributes.lookup_or_default<ColorGeometry4f>(
       request.attribute_name, request.domain, {0.0f, 0.0f, 0.0f, 1.0f});
 
   MutableSpan<ColorGeometry4f> vbo_span{
       static_cast<ColorGeometry4f *>(GPU_vertbuf_get_data(attr_vbo)),
-      component.attribute_domain_num(request.domain)};
+      attributes.domain_size(request.domain)};
 
   attribute.materialize(vbo_span);
 
@@ -393,10 +400,10 @@ static void curves_batch_cache_fill_strands_data(const Curves &curves_id,
       curves_id.geometry);
 
   for (const int i : IndexRange(curves.curves_num())) {
-    const IndexRange curve_range = curves.points_for_curve(i);
+    const IndexRange points = curves.points_for_curve(i);
 
-    *(uint *)GPU_vertbuf_raw_step(&data_step) = curve_range.start();
-    *(ushort *)GPU_vertbuf_raw_step(&seg_step) = curve_range.size() - 1;
+    *(uint *)GPU_vertbuf_raw_step(&data_step) = points.start();
+    *(ushort *)GPU_vertbuf_raw_step(&seg_step) = points.size() - 1;
   }
 }
 
@@ -412,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
   uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT);
 
   /* Curve Data. */
-  cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+  cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+      &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len);
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step);
 
-  cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+  cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+      &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len);
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
 
@@ -437,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY);
+  cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+      &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -509,58 +519,41 @@ static bool curves_ensure_attributes(const Curves &curves,
   ThreadMutex *render_mutex = &cache.render_mutex;
   const CustomData *cd_curve = &curves.geometry.curve_data;
   const CustomData *cd_point = &curves.geometry.point_data;
+  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
 
-  DRW_Attributes attrs_needed;
-  drw_attributes_clear(&attrs_needed);
-  ListBase gpu_attrs = GPU_material_attributes(gpu_material);
-  LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
-    const char *name = gpu_attr->name;
-
-    int layer_index;
-    eCustomDataType type;
-    eAttrDomain domain;
-    if (drw_custom_data_match_attribute(cd_curve, name, &layer_index, &type)) {
-      domain = ATTR_DOMAIN_CURVE;
-    }
-    else if (drw_custom_data_match_attribute(cd_point, name, &layer_index, &type)) {
-      domain = ATTR_DOMAIN_POINT;
-    }
-    else {
-      continue;
-    }
-
-    switch (type) {
-      case CD_PROP_BOOL:
-      case CD_PROP_INT8:
-      case CD_PROP_INT32:
-      case CD_PROP_FLOAT:
-      case CD_PROP_FLOAT2:
-      case CD_PROP_FLOAT3:
-      case CD_PROP_COLOR: {
-        DRW_AttributeRequest *request = drw_attributes_add_request(
-            &attrs_needed, type, layer_index, domain);
-        if (request) {
-          BLI_strncpy(request->attribute_name, name, sizeof(request->attribute_name));
-        }
-
-        break;
+  if (gpu_material) {
+    DRW_Attributes attrs_needed;
+    drw_attributes_clear(&attrs_needed);
+    ListBase gpu_attrs = GPU_material_attributes(gpu_material);
+    LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
+      const char *name = gpu_attr->name;
+
+      int layer_index;
+      eCustomDataType type;
+      eAttrDomain domain;
+      if (drw_custom_data_match_attribute(cd_curve, name, &layer_index, &type)) {
+        domain = ATTR_DOMAIN_CURVE;
+      }
+      else if (drw_custom_data_match_attribute(cd_point, name, &layer_index, &type)) {
+        domain = ATTR_DOMAIN_POINT;
+      }
+      else {
+        continue;
       }
-      default:
-        break;
-    }
-  }
 
-  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
+      drw_attributes_add_request(&attrs_needed, name, type, layer_index, domain);
+    }
 
-  if (!drw_attributes_overlap(&final_cache.attr_used, &attrs_needed)) {
-    /* Some new attributes have been added, free all and start over. */
-    for (const int i : IndexRange(GPU_MAX_ATTR)) {
-      GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]);
-      DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]);
+    if (!drw_attributes_overlap(&final_cache.attr_used, &attrs_needed)) {
+      /* Some new attributes have been added, free all and start over. */
+      for (const int i : IndexRange(GPU_MAX_ATTR)) {
+        GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]);
+        DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]);
+      }
+      drw_attributes_merge(&final_cache.attr_used, &attrs_needed, render_mutex);
     }
-    drw_attributes_merge(&final_cache.attr_used, &attrs_needed, render_mutex);
+    drw_attributes_merge(&final_cache.attr_used_over_time, &attrs_needed, render_mutex);
   }
-  drw_attributes_merge(&final_cache.attr_used_over_time, &attrs_needed, render_mutex);
 
   bool need_tf_update = false;
 
@@ -581,16 +574,15 @@ static bool curves_ensure_attributes(const Curves &curves,
   return need_tf_update;
 }
 
-bool curves_ensure_procedural_data(Object *object,
+bool curves_ensure_procedural_data(Curves *curves,
                                    CurvesEvalCache **r_hair_cache,
                                    GPUMaterial *gpu_material,
                                    const int subdiv,
                                    const int thickness_res)
 {
   bool need_ft_update = false;
-  Curves &curves = *static_cast<Curves *>(object->data);
 
-  CurvesBatchCache &cache = curves_batch_cache_get(curves);
+  CurvesBatchCache &cache = curves_batch_cache_get(*curves);
   *r_hair_cache = &cache.curves_cache;
 
   const int steps = 3; /* TODO: don't hard-code? */
@@ -598,14 +590,14 @@ bool curves_ensure_procedural_data(Object *object,
 
   /* Refreshed on combing and simulation. */
   if ((*r_hair_cache)->proc_point_buf == nullptr) {
-    ensure_seg_pt_count(curves, cache.curves_cache);
-    curves_batch_cache_ensure_procedural_pos(curves, cache.curves_cache, gpu_material);
+    ensure_seg_pt_count(*curves, cache.curves_cache);
+    curves_batch_cache_ensure_procedural_pos(*curves, cache.curves_cache, gpu_material);
     need_ft_update = true;
   }
 
   /* Refreshed if active layer or custom data changes. */
   if ((*r_hair_cache)->strand_tex == nullptr) {
-    curves_batch_cache_ensure_procedural_strand_data(curves, cache.curves_cache);
+    curves_batch_cache_ensure_procedural_strand_data(*curves, cache.curves_cache);
   }
 
   /* Refreshed only on subdiv count change. */
@@ -615,12 +607,10 @@ bool curves_ensure_procedural_data(Object *object,
   }
   if ((*r_hair_cache)->final[subdiv].proc_hairs[thickness_res - 1] == nullptr) {
     curves_batch_cache_ensure_procedural_indices(
-        curves, cache.curves_cache, thickness_res, subdiv);
+        *curves, cache.curves_cache, thickness_res, subdiv);
   }
 
-  if (gpu_material) {
-    need_ft_update |= curves_ensure_attributes(curves, cache, gpu_material, subdiv);
-  }
+  need_ft_update |= curves_ensure_attributes(*curves, cache, gpu_material, subdiv);
 
   return need_ft_update;
 }
@@ -636,6 +626,70 @@ GPUBatch *DRW_curves_batch_cache_get_edit_points(Curves *curves)
   return DRW_batch_request(&cache.edit_points);
 }
 
+static void request_attribute(Curves &curves, const char *name)
+{
+  CurvesBatchCache &cache = curves_batch_cache_get(curves);
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  const Scene *scene = draw_ctx->scene;
+  const int subdiv = scene->r.hair_subdiv;
+  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
+
+  DRW_Attributes attributes{};
+
+  blender::bke::CurvesGeometry &curves_geometry = blender::bke::CurvesGeometry::wrap(
+      curves.geometry);
+  std::optional<blender::bke::AttributeMetaData> meta_data =
+      curves_geometry.attributes().lookup_meta_data(name);
+  if (!meta_data) {
+    return;
+  }
+  const eAttrDomain domain = meta_data->domain;
+  const eCustomDataType type = meta_data->data_type;
+  const CustomData &custom_data = domain == ATTR_DOMAIN_POINT ? curves.geometry.point_data :
+                                                                curves.geometry.curve_data;
+
+  drw_attributes_add_request(
+      &attributes, name, type, CustomData_get_named_layer(&custom_data, type, name), domain);
+
+  drw_attributes_merge(&final_cache.attr_used, &attributes, &cache.render_mutex);
+}
+
+GPUTexture **DRW_curves_texture_for_evaluated_attribute(Curves *curves,
+                                                        const char *name,
+                                                        bool *r_is_point_domain)
+{
+  CurvesBatchCache &cache = curves_batch_cache_get(*curves);
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  const Scene *scene = draw_ctx->scene;
+  const int subdiv = scene->r.hair_subdiv;
+  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
+
+  request_attribute(*curves, name);
+
+  int request_i = -1;
+  for (const int i : IndexRange(final_cache.attr_used.num_requests)) {
+    if (STREQ(final_cache.attr_used.requests[i].attribute_name, name)) {
+      request_i = i;
+      break;
+    }
+  }
+  if (request_i == -1) {
+    *r_is_point_domain = false;
+    return nullptr;
+  }
+  switch (final_cache.attr_used.requests[request_i].domain) {
+    case ATTR_DOMAIN_POINT:
+      *r_is_point_domain = true;
+      return &final_cache.attributes_tex[request_i];
+    case ATTR_DOMAIN_CURVE:
+      *r_is_point_domain = false;
+      return &cache.curves_cache.proc_attributes_tex[request_i];
+    default:
+      BLI_assert_unreachable();
+      return nullptr;
+  }
+}
+
 void DRW_curves_batch_cache_create_requested(Object *ob)
 {
   Curves *curves = static_cast<Curves *>(ob->data);
diff --git a/source/blender/draw/intern/draw_cache_impl_displist.c b/source/blender/draw/intern/draw_cache_impl_displist.c
deleted file mode 100644
index 96c088c3ee9..00000000000
--- a/source/blender/draw/intern/draw_cache_impl_displist.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2017 Blender Foundation. All rights reserved. */
-
-/** \file
- * \ingroup draw
- *
- * \brief DispList API for render engines
- *
- * \note DispList may be removed soon! This is a utility for object types that use render.
- */
-
-#include "BLI_edgehash.h"
-#include "BLI_listbase.h"
-#include "BLI_math_vector.h"
-#include "BLI_utildefines.h"
-
-#include "DNA_curve_types.h"
-#include "DNA_scene_types.h"
-
-#include "BKE_displist.h"
-
-#include "GPU_batch.h"
-#include "GPU_capabilities.h"
-
-#include "draw_cache_inline.h"
-
-#include "draw_cache_impl.h" /* own include */
-
-static int dl_vert_len(const DispList *dl)
-{
-  switch (dl->type) {
-    case DL_INDEX3:
-    case DL_INDEX4:
-      return dl->nr;
-    case DL_SURF:
-      return dl->parts * dl->nr;
-  }
-  return 0;
-}
-
-static int dl_tri_len(const DispList *dl)
-{
-  switch (dl->type) {
-    case DL_INDEX3:
-      return dl->parts;
-    case DL_INDEX4:
-      return dl->parts * 2;
-    case DL_SURF:
-      return dl->totindex * 2;
-  }
-  return 0;
-}
-
-/* see: displist_vert_coords_alloc */
-static int curve_render_surface_vert_len_get(const ListBase *lb)
-{
-  int vert_len = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    vert_len += dl_vert_len(dl);
-  }
-  return vert_len;
-}
-
-static int curve_render_surface_tri_len_get(const ListBase *lb)
-{
-  int tri_len = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    tri_len += dl_tri_len(dl);
-  }
-  return tri_len;
-}
-
-typedef void(SetTriIndicesFn)(void *thunk, uint v1, uint v2, uint v3);
-
-static void displist_indexbufbuilder_set(
-    SetTriIndicesFn *set_tri_indices,
-    SetTriIndicesFn *set_quad_tri_indices, /* meh, find a better solution. */
-    void *thunk,
-    const DispList *dl,
-    const int ofs)
-{
-  if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) {
-    const int *idx = dl->index;
-    if (dl->type == DL_INDEX3) {
-      const int i_end = dl->parts;
-      for (int i = 0; i < i_end; i++, idx += 3) {
-        set_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[1] + ofs);
-      }
-    }
-    else if (dl->type == DL_SURF) {
-      const int i_end = dl->totindex;
-      for (int i = 0; i < i_end; i++, idx += 4) {
-        set_quad_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[1] + ofs);
-        set_quad_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[3] + ofs);
-      }
-    }
-    else {
-      BLI_assert(dl->type == DL_INDEX4);
-      const int i_end = dl->parts;
-      for (int i = 0; i < i_end; i++, idx += 4) {
-        if (idx[2] != idx[3]) {
-          set_quad_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[1] + ofs);
-          set_quad_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[3] + ofs);
-        }
-        else {
-          set_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[1] + ofs);
-        }
-      }
-    }
-  }
-}
-
-void DRW_displist_vertbuf_create_pos_and_nor(ListBase *lb, GPUVertBuf *vbo, const Scene *scene)
-{
-  const bool do_hq_normals = (scene->r.perf_flag & SCE_PERF_HQ_NORMALS) != 0 ||
-                             GPU_use_hq_normals_workaround();
-
-  static GPUVertFormat format = {0};
-  static GPUVertFormat format_hq = {0};
-  static struct {
-    uint pos, nor;
-    uint pos_hq, nor_hq;
-  } attr_id;
-  if (format.attr_len == 0) {
-    /* initialize vertex format */
-    attr_id.pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    attr_id.nor = GPU_vertformat_attr_add(
-        &format, "nor", GPU_COMP_I10, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    /* initialize vertex format */
-    attr_id.pos_hq = GPU_vertformat_attr_add(&format_hq, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    attr_id.nor_hq = GPU_vertformat_attr_add(
-        &format_hq, "nor", GPU_COMP_I16, 3, GPU_FETCH_INT_TO_FLOAT_UNIT);
-  }
-
-  uint pos_id = do_hq_normals ? attr_id.pos_hq : attr_id.pos;
-  uint nor_id = do_hq_normals ? attr_id.nor_hq : attr_id.nor;
-
-  GPU_vertbuf_init_with_format(vbo, do_hq_normals ? &format_hq : &format);
-  GPU_vertbuf_data_alloc(vbo, curve_render_surface_vert_len_get(lb));
-
-  BKE_displist_normals_add(lb);
-
-  int vbo_len_used = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    const bool ndata_is_single = dl->type == DL_INDEX3;
-    if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) {
-      const float *fp_co = dl->verts;
-      const float *fp_no = dl->nors;
-      const int vbo_end = vbo_len_used + dl_vert_len(dl);
-      while (vbo_len_used < vbo_end) {
-        GPU_vertbuf_attr_set(vbo, pos_id, vbo_len_used, fp_co);
-        if (fp_no) {
-          GPUNormal vnor_pack;
-          GPU_normal_convert_v3(&vnor_pack, fp_no, do_hq_normals);
-          GPU_vertbuf_attr_set(vbo, nor_id, vbo_len_used, &vnor_pack);
-          if (ndata_is_single == false) {
-            fp_no += 3;
-          }
-        }
-        fp_co += 3;
-        vbo_len_used += 1;
-      }
-    }
-  }
-}
-
-void DRW_vertbuf_create_wiredata(GPUVertBuf *vbo, const int vert_len)
-{
-  static GPUVertFormat format = {0};
-  static struct {
-    uint wd;
-  } attr_id;
-  if (format.attr_len == 0) {
-    /* initialize vertex format */
-    if (!GPU_crappy_amd_driver()) {
-      /* Some AMD drivers strangely crash with a vbo with this format. */
-      attr_id.wd = GPU_vertformat_attr_add(
-          &format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    }
-    else {
-      attr_id.wd = GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
-    }
-  }
-
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, vert_len);
-
-  if (GPU_vertbuf_get_format(vbo)->stride == 1) {
-    memset(GPU_vertbuf_get_data(vbo), 0xFF, (size_t)vert_len);
-  }
-  else {
-    GPUVertBufRaw wd_step;
-    GPU_vertbuf_attr_get_raw_data(vbo, attr_id.wd, &wd_step);
-    for (int i = 0; i < vert_len; i++) {
-      *((float *)GPU_vertbuf_raw_step(&wd_step)) = 1.0f;
-    }
-  }
-}
-
-void DRW_displist_vertbuf_create_wiredata(ListBase *lb, GPUVertBuf *vbo)
-{
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-  DRW_vertbuf_create_wiredata(vbo, vert_len);
-}
-
-void DRW_displist_indexbuf_create_triangles_in_order(ListBase *lb, GPUIndexBuf *ibo)
-{
-  const int tri_len = curve_render_surface_tri_len_get(lb);
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-
-  GPUIndexBufBuilder elb;
-  GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tri_len, vert_len);
-
-  int ofs = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    displist_indexbufbuilder_set((SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
-                                 (SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
-                                 &elb,
-                                 dl,
-                                 ofs);
-    ofs += dl_vert_len(dl);
-  }
-
-  GPU_indexbuf_build_in_place(&elb, ibo);
-}
-
-static void set_overlay_wires_tri_indices(void *thunk, uint v1, uint v2, uint v3)
-{
-  GPUIndexBufBuilder *eld = (GPUIndexBufBuilder *)thunk;
-  GPU_indexbuf_add_line_verts(eld, v1, v2);
-  GPU_indexbuf_add_line_verts(eld, v2, v3);
-  GPU_indexbuf_add_line_verts(eld, v3, v1);
-}
-
-static void set_overlay_wires_quad_tri_indices(void *thunk, uint v1, uint v2, uint v3)
-{
-  GPUIndexBufBuilder *eld = (GPUIndexBufBuilder *)thunk;
-  GPU_indexbuf_add_line_verts(eld, v1, v3);
-  GPU_indexbuf_add_line_verts(eld, v3, v2);
-}
-
-void DRW_displist_indexbuf_create_lines_in_order(ListBase *lb, GPUIndexBuf *ibo)
-{
-  const int tri_len = curve_render_surface_tri_len_get(lb);
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-
-  GPUIndexBufBuilder elb;
-  GPU_indexbuf_init(&elb, GPU_PRIM_LINES, tri_len * 3, vert_len);
-
-  int ofs = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    displist_indexbufbuilder_set(
-        set_overlay_wires_tri_indices, set_overlay_wires_quad_tri_indices, &elb, dl, ofs);
-    ofs += dl_vert_len(dl);
-  }
-
-  GPU_indexbuf_build_in_place(&elb, ibo);
-}
-
-/* Edge detection/adjacency. */
-#define NO_EDGE INT_MAX
-static void set_edge_adjacency_lines_indices(
-    EdgeHash *eh, GPUIndexBufBuilder *elb, bool *r_is_manifold, uint v1, uint v2, uint v3)
-{
-  bool inv_indices = (v2 > v3);
-  void **pval;
-  bool value_is_init = BLI_edgehash_ensure_p(eh, v2, v3, &pval);
-  int v_data = POINTER_AS_INT(*pval);
-  if (!value_is_init || v_data == NO_EDGE) {
-    /* Save the winding order inside the sign bit. Because the
-     * edgehash sort the keys and we need to compare winding later. */
-    int value = (int)v1 + 1; /* Int 0 bm_looptricannot be signed */
-    *pval = POINTER_FROM_INT((inv_indices) ? -value : value);
-  }
-  else {
-    /* HACK Tag as not used. Prevent overhead of BLI_edgehash_remove. */
-    *pval = POINTER_FROM_INT(NO_EDGE);
-    bool inv_opposite = (v_data < 0);
-    uint v_opposite = (uint)abs(v_data) - 1;
-
-    if (inv_opposite == inv_indices) {
-      /* Don't share edge if triangles have non matching winding. */
-      GPU_indexbuf_add_line_adj_verts(elb, v1, v2, v3, v1);
-      GPU_indexbuf_add_line_adj_verts(elb, v_opposite, v2, v3, v_opposite);
-      *r_is_manifold = false;
-    }
-    else {
-      GPU_indexbuf_add_line_adj_verts(elb, v1, v2, v3, v_opposite);
-    }
-  }
-}
-
-static void set_edges_adjacency_lines_indices(void *thunk, uint v1, uint v2, uint v3)
-{
-  void **packed = (void **)thunk;
-  GPUIndexBufBuilder *elb = (GPUIndexBufBuilder *)packed[0];
-  EdgeHash *eh = (EdgeHash *)packed[1];
-  bool *r_is_manifold = (bool *)packed[2];
-
-  set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v1, v2, v3);
-  set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v2, v3, v1);
-  set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v3, v1, v2);
-}
-
-void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb,
-                                                        struct GPUIndexBuf *ibo,
-                                                        bool *r_is_manifold)
-{
-  const int tri_len = curve_render_surface_tri_len_get(lb);
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-
-  *r_is_manifold = true;
-
-  /* Allocate max but only used indices are sent to GPU. */
-  GPUIndexBufBuilder elb;
-  GPU_indexbuf_init(&elb, GPU_PRIM_LINES_ADJ, tri_len * 3, vert_len);
-
-  EdgeHash *eh = BLI_edgehash_new_ex(__func__, tri_len * 3);
-
-  /* pack values to pass to `set_edges_adjacency_lines_indices` function. */
-  void *thunk[3] = {&elb, eh, r_is_manifold};
-  int v_idx = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    displist_indexbufbuilder_set((SetTriIndicesFn *)set_edges_adjacency_lines_indices,
-                                 (SetTriIndicesFn *)set_edges_adjacency_lines_indices,
-                                 thunk,
-                                 dl,
-                                 v_idx);
-    v_idx += dl_vert_len(dl);
-  }
-
-  /* Create edges for remaining non manifold edges. */
-  EdgeHashIterator *ehi;
-  for (ehi = BLI_edgehashIterator_new(eh); BLI_edgehashIterator_isDone(ehi) == false;
-       BLI_edgehashIterator_step(ehi)) {
-    uint v1, v2;
-    int v_data = POINTER_AS_INT(BLI_edgehashIterator_getValue(ehi));
-    if (v_data == NO_EDGE) {
-      continue;
-    }
-    BLI_edgehashIterator_getKey(ehi, &v1, &v2);
-    uint v0 = (uint)abs(v_data) - 1;
-    if (v_data < 0) { /* inv_opposite */
-      SWAP(uint, v1, v2);
-    }
-    GPU_indexbuf_add_line_adj_verts(&elb, v0, v1, v2, v0);
-    *r_is_manifold = false;
-  }
-  BLI_edgehashIterator_free(ehi);
-  BLI_edgehash_free(eh, NULL);
-
-  GPU_indexbuf_build_in_place(&elb, ibo);
-}
-#undef NO_EDGE
diff --git a/source/blender/draw/intern/draw_cache_impl_lattice.c b/source/blender/draw/intern/draw_cache_impl_lattice.c
index cb621c6ceb9..0f12e78d60e 100644
--- a/source/blender/draw/intern/draw_cache_impl_lattice.c
+++ b/source/blender/draw/intern/draw_cache_impl_lattice.c
@@ -27,12 +27,6 @@
 
 #define SELECT 1
 
-/**
- * TODO
- * - 'DispList' is currently not used
- *   (we could avoid using since it will be removed)
- */
-
 static void lattice_batch_cache_clear(Lattice *lt);
 
 /* ---------------------------------------------------------------------- */
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.cc b/source/blender/draw/intern/draw_cache_impl_mesh.cc
index 7c02ee2c033..c22382b3e09 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.cc
@@ -21,6 +21,7 @@
 #include "BLI_math_vector.h"
 #include "BLI_span.hh"
 #include "BLI_string.h"
+#include "BLI_string_ref.hh"
 #include "BLI_task.h"
 #include "BLI_utildefines.h"
 
@@ -67,6 +68,7 @@
 using blender::IndexRange;
 using blender::Map;
 using blender::Span;
+using blender::StringRefNull;
 
 /* ---------------------------------------------------------------------- */
 /** \name Dependencies between buffer and batch
@@ -115,8 +117,6 @@ static constexpr DRWBatchFlag batches_that_use_buffer(const int buffer_index)
              MBC_SURFACE_PER_MAT;
     case BUFFER_INDEX(vbo.tan):
       return MBC_SURFACE_PER_MAT;
-    case BUFFER_INDEX(vbo.vcol):
-      return MBC_SURFACE | MBC_SURFACE_PER_MAT;
     case BUFFER_INDEX(vbo.sculpt_data):
       return MBC_SCULPT_OVERLAYS;
     case BUFFER_INDEX(vbo.orco):
@@ -236,87 +236,11 @@ BLI_INLINE void mesh_cd_layers_type_clear(DRW_MeshCDMask *a)
   *((uint32_t *)a) = 0;
 }
 
-BLI_INLINE const Mesh *editmesh_final_or_this(const Object *object, const Mesh *me)
-{
-  if (me->edit_mesh != nullptr) {
-    Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(object);
-    if (editmesh_eval_final != nullptr) {
-      return editmesh_eval_final;
-    }
-  }
-
-  return me;
-}
-
 static void mesh_cd_calc_edit_uv_layer(const Mesh *UNUSED(me), DRW_MeshCDMask *cd_used)
 {
   cd_used->edit_uv = 1;
 }
 
-BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->ldata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->ldata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->ldata;
-}
-
-BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->pdata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->pdata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->pdata;
-}
-
-BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->edata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->edata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->edata;
-}
-
-BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->vdata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->vdata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->vdata;
-}
-
 static void mesh_cd_calc_active_uv_layer(const Object *object,
                                          const Mesh *me,
                                          DRW_MeshCDMask *cd_used)
@@ -341,75 +265,6 @@ static void mesh_cd_calc_active_mask_uv_layer(const Object *object,
   }
 }
 
-static void mesh_cd_calc_active_mloopcol_layer(const Object *object,
-                                               const Mesh *me,
-                                               DRW_MeshCDMask *cd_used)
-{
-  const Mesh *me_final = editmesh_final_or_this(object, me);
-  Mesh me_query = blender::dna::shallow_zero_initialize();
-
-  const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(me_final);
-  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
-
-  BKE_id_attribute_copy_domains_temp(
-      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
-
-  const CustomDataLayer *layer = BKE_id_attributes_active_color_get(&me_query.id);
-  int layer_i = BKE_id_attribute_to_index(
-      &me_query.id, layer, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
-
-  if (layer_i != -1) {
-    cd_used->vcol |= (1UL << (uint)layer_i);
-  }
-}
-
-static uint mesh_cd_calc_gpu_layers_vcol_used(const Mesh *me_query,
-                                              const CustomData *cd_vdata,
-                                              const CustomData *cd_ldata,
-                                              const char name[])
-{
-  const CustomDataLayer *layer = nullptr;
-  eAttrDomain domain;
-
-  if (name[0]) {
-    int layer_i = 0;
-
-    domain = ATTR_DOMAIN_POINT;
-    layer_i = CustomData_get_named_layer_index(cd_vdata, CD_PROP_COLOR, name);
-    layer_i = layer_i == -1 ?
-                  CustomData_get_named_layer_index(cd_vdata, CD_PROP_BYTE_COLOR, name) :
-                  layer_i;
-
-    if (layer_i == -1) {
-      domain = ATTR_DOMAIN_CORNER;
-      layer_i = layer_i == -1 ? CustomData_get_named_layer_index(cd_ldata, CD_PROP_COLOR, name) :
-                                layer_i;
-      layer_i = layer_i == -1 ?
-                    CustomData_get_named_layer_index(cd_ldata, CD_PROP_BYTE_COLOR, name) :
-                    layer_i;
-    }
-
-    /* NOTE: this is not the same as the layer_i below. */
-    if (layer_i != -1) {
-      layer = (domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata)->layers + layer_i;
-    }
-  }
-  else {
-    layer = BKE_id_attributes_render_color_get(&me_query->id);
-  }
-
-  if (!layer) {
-    return -1;
-  }
-
-  /* NOTE: this is the logical index into the color attribute list,
-   * not the customdata index. */
-  int vcol_i = BKE_id_attribute_to_index(
-      (ID *)me_query, layer, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
-
-  return vcol_i;
-}
-
 static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object,
                                                    const Mesh *me,
                                                    struct GPUMaterial **gpumat_array,
@@ -433,56 +288,33 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object,
   DRW_MeshCDMask cd_used;
   mesh_cd_layers_type_clear(&cd_used);
 
+  const CustomDataLayer *default_color = BKE_id_attributes_render_color_get(&me_query.id);
+  const StringRefNull default_color_name = default_color ? default_color->name : "";
+
   for (int i = 0; i < gpumat_array_len; i++) {
     GPUMaterial *gpumat = gpumat_array[i];
-    if (gpumat) {
-      ListBase gpu_attrs = GPU_material_attributes(gpumat);
-      LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
-        const char *name = gpu_attr->name;
-        eCustomDataType type = static_cast<eCustomDataType>(gpu_attr->type);
-        int layer = -1;
-        std::optional<eAttrDomain> domain;
-
-        if (type == CD_AUTO_FROM_NAME) {
-          /* We need to deduce what exact layer is used.
-           *
-           * We do it based on the specified name.
-           */
-          if (name[0] != '\0') {
-            layer = CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name);
-            type = CD_MTFACE;
-
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_vdata, CD_PROP_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_COLOR;
-                domain = ATTR_DOMAIN_POINT;
-              }
-            }
-
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_ldata, CD_PROP_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_COLOR;
-                domain = ATTR_DOMAIN_CORNER;
-              }
-            }
-
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_vdata, CD_PROP_BYTE_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_BYTE_COLOR;
-                domain = ATTR_DOMAIN_POINT;
-              }
-            }
+    if (gpumat == nullptr) {
+      continue;
+    }
+    ListBase gpu_attrs = GPU_material_attributes(gpumat);
+    LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
+      const char *name = gpu_attr->name;
+      eCustomDataType type = static_cast<eCustomDataType>(gpu_attr->type);
+      int layer = -1;
+      std::optional<eAttrDomain> domain;
+
+      if (gpu_attr->is_default_color) {
+        name = default_color_name.c_str();
+      }
 
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_ldata, CD_PROP_BYTE_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_BYTE_COLOR;
-                domain = ATTR_DOMAIN_CORNER;
-              }
-            }
+      if (type == CD_AUTO_FROM_NAME) {
+        /* We need to deduce what exact layer is used.
+         *
+         * We do it based on the specified name.
+         */
+        if (name[0] != '\0') {
+          layer = CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name);
+          type = CD_MTFACE;
 
 #if 0 /* Tangents are always from UV's - this will never happen. */
             if (layer == -1) {
@@ -490,108 +322,87 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object,
               type = CD_TANGENT;
             }
 #endif
-            if (layer == -1) {
-              /* Try to match a generic attribute, we use the first attribute domain with a
-               * matching name. */
-              if (drw_custom_data_match_attribute(cd_vdata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_POINT;
-              }
-              else if (drw_custom_data_match_attribute(cd_ldata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_CORNER;
-              }
-              else if (drw_custom_data_match_attribute(cd_pdata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_FACE;
-              }
-              else if (drw_custom_data_match_attribute(cd_edata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_EDGE;
-              }
-              else {
-                layer = -1;
-              }
+          if (layer == -1) {
+            /* Try to match a generic attribute, we use the first attribute domain with a
+             * matching name. */
+            if (drw_custom_data_match_attribute(cd_vdata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_POINT;
             }
-
-            if (layer == -1) {
-              continue;
+            else if (drw_custom_data_match_attribute(cd_ldata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_CORNER;
             }
-          }
-          else {
-            /* Fall back to the UV layer, which matches old behavior. */
-            type = CD_MTFACE;
-          }
-        }
-
-        switch (type) {
-          case CD_MTFACE: {
-            if (layer == -1) {
-              layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
-                                          CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
+            else if (drw_custom_data_match_attribute(cd_pdata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_FACE;
             }
-            if (layer != -1) {
-              cd_used.uv |= (1 << layer);
-            }
-            break;
-          }
-          case CD_TANGENT: {
-            if (layer == -1) {
-              layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
-                                          CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
-
-              /* Only fallback to orco (below) when we have no UV layers, see: T56545 */
-              if (layer == -1 && name[0] != '\0') {
-                layer = CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
-              }
-            }
-            if (layer != -1) {
-              cd_used.tan |= (1 << layer);
+            else if (drw_custom_data_match_attribute(cd_edata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_EDGE;
             }
             else {
-              /* no UV layers at all => requesting orco */
-              cd_used.tan_orco = 1;
-              cd_used.orco = 1;
+              layer = -1;
             }
-            break;
           }
 
-          case CD_ORCO: {
-            cd_used.orco = 1;
-            break;
+          if (layer == -1) {
+            continue;
           }
+        }
+        else {
+          /* Fall back to the UV layer, which matches old behavior. */
+          type = CD_MTFACE;
+        }
+      }
 
-          /* NOTE: attr->type will always be CD_PROP_COLOR even for
-           * CD_PROP_BYTE_COLOR layers, see node_shader_gpu_vertex_color in
-           * node_shader_vertex_color.cc.
-           */
-          case CD_MCOL:
-          case CD_PROP_BYTE_COLOR:
-          case CD_PROP_COLOR: {
-            /* First check Color attributes, when not found check mesh attributes. Geometry nodes
-             * can generate those layers. */
-            int vcol_bit = mesh_cd_calc_gpu_layers_vcol_used(&me_query, cd_vdata, cd_ldata, name);
-
-            if (vcol_bit != -1) {
-              cd_used.vcol |= 1UL << (uint)vcol_bit;
-              break;
-            }
-
-            if (layer != -1 && domain.has_value()) {
-              drw_attributes_add_request(attributes, type, layer, *domain);
-            }
-            break;
+      switch (type) {
+        case CD_MTFACE: {
+          if (layer == -1) {
+            layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
+                                        CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
+          }
+          if (layer != -1) {
+            cd_used.uv |= (1 << layer);
           }
-          case CD_PROP_FLOAT3:
-          case CD_PROP_BOOL:
-          case CD_PROP_INT8:
-          case CD_PROP_INT32:
-          case CD_PROP_FLOAT:
-          case CD_PROP_FLOAT2: {
-            if (layer != -1 && domain.has_value()) {
-              drw_attributes_add_request(attributes, type, layer, *domain);
+          break;
+        }
+        case CD_TANGENT: {
+          if (layer == -1) {
+            layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
+                                        CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
+
+            /* Only fallback to orco (below) when we have no UV layers, see: T56545 */
+            if (layer == -1 && name[0] != '\0') {
+              layer = CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
             }
-            break;
           }
-          default:
-            break;
+          if (layer != -1) {
+            cd_used.tan |= (1 << layer);
+          }
+          else {
+            /* no UV layers at all => requesting orco */
+            cd_used.tan_orco = 1;
+            cd_used.orco = 1;
+          }
+          break;
         }
+
+        case CD_ORCO: {
+          cd_used.orco = 1;
+          break;
+        }
+        case CD_PROP_BYTE_COLOR:
+        case CD_PROP_COLOR:
+        case CD_PROP_FLOAT3:
+        case CD_PROP_BOOL:
+        case CD_PROP_INT8:
+        case CD_PROP_INT32:
+        case CD_PROP_FLOAT:
+        case CD_PROP_FLOAT2: {
+          if (layer != -1 && domain.has_value()) {
+            drw_attributes_add_request(attributes, name, type, layer, *domain);
+          }
+          break;
+        }
+        default:
+          break;
       }
     }
   }
@@ -745,8 +556,7 @@ static bool mesh_batch_cache_valid(Object *object, Mesh *me)
   }
 
   if (object->sculpt && object->sculpt->pbvh) {
-    if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh) ||
-        BKE_pbvh_draw_cache_invalid(object->sculpt->pbvh)) {
+    if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh)) {
       return false;
     }
 
@@ -863,10 +673,9 @@ static void mesh_batch_cache_discard_shaded_tri(MeshBatchCache *cache)
   FOREACH_MESH_BUFFER_CACHE (cache, mbc) {
     GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.uv);
     GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.tan);
-    GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.vcol);
     GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.orco);
   }
-  DRWBatchFlag batch_map = BATCH_MAP(vbo.uv, vbo.tan, vbo.vcol, vbo.orco);
+  DRWBatchFlag batch_map = BATCH_MAP(vbo.uv, vbo.tan, vbo.orco);
   mesh_batch_cache_discard_batch(cache, batch_map);
   mesh_cd_layers_type_clear(&cache->cd_used);
 }
@@ -1070,42 +879,35 @@ static void texpaint_request_active_uv(MeshBatchCache *cache, Object *object, Me
   mesh_cd_layers_type_merge(&cache->cd_needed, cd_needed);
 }
 
-static void texpaint_request_active_vcol(MeshBatchCache *cache, Object *object, Mesh *me)
+static void request_active_and_default_color_attributes(const Object &object,
+                                                        const Mesh &mesh,
+                                                        DRW_Attributes &attributes)
 {
-  DRW_MeshCDMask cd_needed;
-  mesh_cd_layers_type_clear(&cd_needed);
-  mesh_cd_calc_active_mloopcol_layer(object, me, &cd_needed);
-
-  BLI_assert(cd_needed.vcol != 0 &&
-             "No MLOOPCOL layer available in vertpaint, but batches requested anyway!");
-
-  mesh_cd_layers_type_merge(&cache->cd_needed, cd_needed);
-}
-
-static void sculpt_request_active_vcol(MeshBatchCache *cache, Object *object, Mesh *me)
-{
-  const Mesh *me_final = editmesh_final_or_this(object, me);
+  const Mesh *me_final = editmesh_final_or_this(&object, &mesh);
   const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(me_final);
   const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
 
+  /* Necessary because which attributes are active/default is stored in #CustomData. */
   Mesh me_query = blender::dna::shallow_zero_initialize();
   BKE_id_attribute_copy_domains_temp(
       ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
 
-  const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id);
-  const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id);
-
-  int active_i = BKE_id_attribute_to_index(
-      &me_query.id, active, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
-  int render_i = BKE_id_attribute_to_index(
-      &me_query.id, render, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
+  auto request_color_attribute = [&](const char *name) {
+    int layer_index;
+    eCustomDataType type;
+    if (drw_custom_data_match_attribute(cd_vdata, name, &layer_index, &type)) {
+      drw_attributes_add_request(&attributes, name, type, layer_index, ATTR_DOMAIN_POINT);
+    }
+    else if (drw_custom_data_match_attribute(cd_ldata, name, &layer_index, &type)) {
+      drw_attributes_add_request(&attributes, name, type, layer_index, ATTR_DOMAIN_CORNER);
+    }
+  };
 
-  if (active_i >= 0) {
-    cache->cd_needed.vcol |= 1UL << (uint)active_i;
+  if (const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id)) {
+    request_color_attribute(active->name);
   }
-
-  if (render_i >= 0) {
-    cache->cd_needed.vcol |= 1UL << (uint)render_i;
+  if (const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id)) {
+    request_color_attribute(render->name);
   }
 }
 
@@ -1214,7 +1016,13 @@ GPUBatch *DRW_mesh_batch_cache_get_surface_texpaint_single(Object *object, Mesh
 GPUBatch *DRW_mesh_batch_cache_get_surface_vertpaint(Object *object, Mesh *me)
 {
   MeshBatchCache *cache = mesh_batch_cache_get(me);
-  texpaint_request_active_vcol(cache, object, me);
+
+  DRW_Attributes attrs_needed{};
+  request_active_and_default_color_attributes(*object, *me, attrs_needed);
+
+  ThreadMutex *mesh_render_mutex = (ThreadMutex *)me->runtime.render_mutex;
+  drw_attributes_merge(&cache->attr_needed, &attrs_needed, mesh_render_mutex);
+
   mesh_batch_cache_request_surface_batches(cache);
   return cache->batch.surface;
 }
@@ -1222,7 +1030,13 @@ GPUBatch *DRW_mesh_batch_cache_get_surface_vertpaint(Object *object, Mesh *me)
 GPUBatch *DRW_mesh_batch_cache_get_surface_sculpt(Object *object, Mesh *me)
 {
   MeshBatchCache *cache = mesh_batch_cache_get(me);
-  sculpt_request_active_vcol(cache, object, me);
+
+  DRW_Attributes attrs_needed{};
+  request_active_and_default_color_attributes(*object, *me, attrs_needed);
+
+  ThreadMutex *mesh_render_mutex = (ThreadMutex *)me->runtime.render_mutex;
+  drw_attributes_merge(&cache->attr_needed, &attrs_needed, mesh_render_mutex);
+
   mesh_batch_cache_request_surface_batches(cache);
   return cache->batch.surface;
 }
@@ -1621,9 +1435,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
         if (cache->cd_used.sculpt_overlays != cache->cd_needed.sculpt_overlays) {
           GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.sculpt_data);
         }
-        if ((cache->cd_used.vcol & cache->cd_needed.vcol) != cache->cd_needed.vcol) {
-          GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.vcol);
-        }
         if (!drw_attributes_overlap(&cache->attr_used, &cache->attr_needed)) {
           for (int i = 0; i < GPU_MAX_ATTR; i++) {
             GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.attr[i]);
@@ -1697,12 +1508,13 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   cache->batch_ready |= batch_requested;
 
   bool do_cage = false, do_uvcage = false;
-  if (is_editmode) {
+  if (is_editmode && is_mode_active) {
     Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(ob);
     Mesh *editmesh_eval_cage = BKE_object_get_editmesh_eval_cage(ob);
 
     do_cage = editmesh_eval_final != editmesh_eval_cage;
-    do_uvcage = !editmesh_eval_final->runtime.is_original;
+    do_uvcage = !(editmesh_eval_final->runtime.is_original_bmesh &&
+                  editmesh_eval_final->runtime.wrapper_type == ME_WRAPPER_TYPE_BMESH);
   }
 
   const bool do_subdivision = BKE_subsurf_modifier_has_gpu_subdiv(me);
@@ -1710,15 +1522,26 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   MeshBufferList *mbuflist = &cache->final.buff;
 
   /* Initialize batches and request VBO's & IBO's. */
-  assert_deps_valid(
-      MBC_SURFACE,
-      {BUFFER_INDEX(ibo.tris),     BUFFER_INDEX(vbo.lnor),     BUFFER_INDEX(vbo.pos_nor),
-       BUFFER_INDEX(vbo.uv),       BUFFER_INDEX(vbo.vcol),     BUFFER_INDEX(vbo.attr[0]),
-       BUFFER_INDEX(vbo.attr[1]),  BUFFER_INDEX(vbo.attr[2]),  BUFFER_INDEX(vbo.attr[3]),
-       BUFFER_INDEX(vbo.attr[4]),  BUFFER_INDEX(vbo.attr[5]),  BUFFER_INDEX(vbo.attr[6]),
-       BUFFER_INDEX(vbo.attr[7]),  BUFFER_INDEX(vbo.attr[8]),  BUFFER_INDEX(vbo.attr[9]),
-       BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), BUFFER_INDEX(vbo.attr[12]),
-       BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])});
+  assert_deps_valid(MBC_SURFACE,
+                    {BUFFER_INDEX(ibo.tris),
+                     BUFFER_INDEX(vbo.lnor),
+                     BUFFER_INDEX(vbo.pos_nor),
+                     BUFFER_INDEX(vbo.uv),
+                     BUFFER_INDEX(vbo.attr[0]),
+                     BUFFER_INDEX(vbo.attr[1]),
+                     BUFFER_INDEX(vbo.attr[2]),
+                     BUFFER_INDEX(vbo.attr[3]),
+                     BUFFER_INDEX(vbo.attr[4]),
+                     BUFFER_INDEX(vbo.attr[5]),
+                     BUFFER_INDEX(vbo.attr[6]),
+                     BUFFER_INDEX(vbo.attr[7]),
+                     BUFFER_INDEX(vbo.attr[8]),
+                     BUFFER_INDEX(vbo.attr[9]),
+                     BUFFER_INDEX(vbo.attr[10]),
+                     BUFFER_INDEX(vbo.attr[11]),
+                     BUFFER_INDEX(vbo.attr[12]),
+                     BUFFER_INDEX(vbo.attr[13]),
+                     BUFFER_INDEX(vbo.attr[14])});
   if (DRW_batch_requested(cache->batch.surface, GPU_PRIM_TRIS)) {
     DRW_ibo_request(cache->batch.surface, &mbuflist->ibo.tris);
     /* Order matters. First ones override latest VBO's attributes. */
@@ -1727,9 +1550,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
     if (cache->cd_used.uv != 0) {
       DRW_vbo_request(cache->batch.surface, &mbuflist->vbo.uv);
     }
-    if (cache->cd_used.vcol != 0) {
-      DRW_vbo_request(cache->batch.surface, &mbuflist->vbo.vcol);
-    }
     drw_add_attributes_vbo(cache->batch.surface, mbuflist, &cache->attr_used);
   }
   assert_deps_valid(MBC_ALL_VERTS, {BUFFER_INDEX(vbo.pos_nor)});
@@ -1807,12 +1627,12 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   assert_deps_valid(
       MBC_SURFACE_PER_MAT,
       {BUFFER_INDEX(vbo.lnor),     BUFFER_INDEX(vbo.pos_nor),  BUFFER_INDEX(vbo.uv),
-       BUFFER_INDEX(vbo.tan),      BUFFER_INDEX(vbo.vcol),     BUFFER_INDEX(vbo.orco),
-       BUFFER_INDEX(vbo.attr[0]),  BUFFER_INDEX(vbo.attr[1]),  BUFFER_INDEX(vbo.attr[2]),
-       BUFFER_INDEX(vbo.attr[3]),  BUFFER_INDEX(vbo.attr[4]),  BUFFER_INDEX(vbo.attr[5]),
-       BUFFER_INDEX(vbo.attr[6]),  BUFFER_INDEX(vbo.attr[7]),  BUFFER_INDEX(vbo.attr[8]),
-       BUFFER_INDEX(vbo.attr[9]),  BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]),
-       BUFFER_INDEX(vbo.attr[12]), BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])});
+       BUFFER_INDEX(vbo.tan),      BUFFER_INDEX(vbo.orco),     BUFFER_INDEX(vbo.attr[0]),
+       BUFFER_INDEX(vbo.attr[1]),  BUFFER_INDEX(vbo.attr[2]),  BUFFER_INDEX(vbo.attr[3]),
+       BUFFER_INDEX(vbo.attr[4]),  BUFFER_INDEX(vbo.attr[5]),  BUFFER_INDEX(vbo.attr[6]),
+       BUFFER_INDEX(vbo.attr[7]),  BUFFER_INDEX(vbo.attr[8]),  BUFFER_INDEX(vbo.attr[9]),
+       BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), BUFFER_INDEX(vbo.attr[12]),
+       BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])});
   assert_deps_valid(MBC_SURFACE_PER_MAT, {TRIS_PER_MAT_INDEX});
   for (int i = 0; i < cache->mat_len; i++) {
     if (DRW_batch_requested(cache->surface_per_mat[i], GPU_PRIM_TRIS)) {
@@ -1826,9 +1646,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
       if ((cache->cd_used.tan != 0) || (cache->cd_used.tan_orco != 0)) {
         DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.tan);
       }
-      if (cache->cd_used.vcol != 0) {
-        DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.vcol);
-      }
       if (cache->cd_used.orco != 0) {
         DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.orco);
       }
@@ -1994,7 +1811,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   assert_final_deps_valid(BUFFER_INDEX(vbo.lnor));
   assert_final_deps_valid(BUFFER_INDEX(vbo.pos_nor));
   assert_final_deps_valid(BUFFER_INDEX(vbo.uv));
-  assert_final_deps_valid(BUFFER_INDEX(vbo.vcol));
   assert_final_deps_valid(BUFFER_INDEX(vbo.sculpt_data));
   assert_final_deps_valid(BUFFER_INDEX(vbo.weights));
   assert_final_deps_valid(BUFFER_INDEX(vbo.edge_fac));
@@ -2078,6 +1894,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                            ob->obmat,
                            true,
                            false,
+                           do_cage,
                            ts,
                            use_hide);
   }
diff --git a/source/blender/draw/intern/draw_cache_impl_metaball.c b/source/blender/draw/intern/draw_cache_impl_metaball.c
deleted file mode 100644
index 1408dc91069..00000000000
--- a/source/blender/draw/intern/draw_cache_impl_metaball.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2017 Blender Foundation. All rights reserved. */
-
-/** \file
- * \ingroup draw
- *
- * \brief MetaBall API for render engines
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "BLI_math_base.h"
-#include "BLI_utildefines.h"
-
-#include "DNA_meta_types.h"
-#include "DNA_object_types.h"
-
-#include "BKE_curve.h"
-#include "BKE_mball.h"
-
-#include "GPU_batch.h"
-
-#include "DRW_render.h"
-#include "draw_cache_impl.h" /* own include */
-
-static void metaball_batch_cache_clear(MetaBall *mb);
-
-/* -------------------------------------------------------------------- */
-/** \name MetaBall GPUBatch Cache
- * \{ */
-
-typedef struct MetaBallBatchCache {
-  GPUBatch *batch;
-  GPUBatch **shaded_triangles;
-
-  int mat_len;
-
-  /* Shared */
-  GPUVertBuf *pos_nor_in_order;
-
-  /* Wireframe */
-  struct {
-    GPUBatch *batch;
-  } face_wire;
-
-  /* Edge detection */
-  GPUBatch *edge_detection;
-  GPUIndexBuf *edges_adj_lines;
-
-  /* settings to determine if cache is invalid */
-  bool is_dirty;
-
-  /* Valid only if edge_detection is up to date. */
-  bool is_manifold;
-} MetaBallBatchCache;
-
-/* GPUBatch cache management. */
-
-static bool metaball_batch_cache_valid(MetaBall *mb)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-
-  if (cache == NULL) {
-    return false;
-  }
-
-  return cache->is_dirty == false;
-}
-
-static void metaball_batch_cache_init(MetaBall *mb)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-
-  if (!cache) {
-    cache = mb->batch_cache = MEM_mallocN(sizeof(*cache), __func__);
-  }
-  cache->batch = NULL;
-  cache->mat_len = 0;
-  cache->shaded_triangles = NULL;
-  cache->is_dirty = false;
-  cache->pos_nor_in_order = NULL;
-  cache->face_wire.batch = NULL;
-  cache->edge_detection = NULL;
-  cache->edges_adj_lines = NULL;
-  cache->is_manifold = false;
-}
-
-void DRW_mball_batch_cache_validate(MetaBall *mb)
-{
-  if (!metaball_batch_cache_valid(mb)) {
-    metaball_batch_cache_clear(mb);
-    metaball_batch_cache_init(mb);
-  }
-}
-
-static MetaBallBatchCache *metaball_batch_cache_get(MetaBall *mb)
-{
-  return mb->batch_cache;
-}
-
-void DRW_mball_batch_cache_dirty_tag(MetaBall *mb, int mode)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-  if (cache == NULL) {
-    return;
-  }
-  switch (mode) {
-    case BKE_MBALL_BATCH_DIRTY_ALL:
-      cache->is_dirty = true;
-      break;
-    default:
-      BLI_assert(0);
-  }
-}
-
-static void metaball_batch_cache_clear(MetaBall *mb)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-  if (!cache) {
-    return;
-  }
-
-  GPU_BATCH_DISCARD_SAFE(cache->face_wire.batch);
-  GPU_BATCH_DISCARD_SAFE(cache->batch);
-  GPU_BATCH_DISCARD_SAFE(cache->edge_detection);
-  GPU_VERTBUF_DISCARD_SAFE(cache->pos_nor_in_order);
-  GPU_INDEXBUF_DISCARD_SAFE(cache->edges_adj_lines);
-  /* NOTE: shaded_triangles[0] is already freed by `cache->batch`. */
-  MEM_SAFE_FREE(cache->shaded_triangles);
-  cache->mat_len = 0;
-  cache->is_manifold = false;
-}
-
-void DRW_mball_batch_cache_free(MetaBall *mb)
-{
-  metaball_batch_cache_clear(mb);
-  MEM_SAFE_FREE(mb->batch_cache);
-}
-
-static GPUVertBuf *mball_batch_cache_get_pos_and_normals(Object *ob,
-                                                         MetaBallBatchCache *cache,
-                                                         const struct Scene *scene)
-{
-  if (cache->pos_nor_in_order == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-    cache->pos_nor_in_order = GPU_vertbuf_calloc();
-    DRW_displist_vertbuf_create_pos_and_nor(lb, cache->pos_nor_in_order, scene);
-  }
-  return cache->pos_nor_in_order;
-}
-
-static GPUIndexBuf *mball_batch_cache_get_edges_adj_lines(Object *ob, MetaBallBatchCache *cache)
-{
-  if (cache->edges_adj_lines == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-    cache->edges_adj_lines = GPU_indexbuf_calloc();
-    DRW_displist_indexbuf_create_edges_adjacency_lines(
-        lb, cache->edges_adj_lines, &cache->is_manifold);
-  }
-  return cache->edges_adj_lines;
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Public Object/MetaBall API
- * \{ */
-
-GPUBatch *DRW_metaball_batch_cache_get_triangles_with_normals(Object *ob)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  if (cache->batch == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-    GPUIndexBuf *ibo = GPU_indexbuf_calloc();
-    DRW_displist_indexbuf_create_triangles_in_order(lb, ibo);
-    cache->batch = GPU_batch_create_ex(GPU_PRIM_TRIS,
-                                       mball_batch_cache_get_pos_and_normals(ob, cache, scene),
-                                       ibo,
-                                       GPU_BATCH_OWNS_INDEX);
-  }
-
-  return cache->batch;
-}
-
-GPUBatch **DRW_metaball_batch_cache_get_surface_shaded(Object *ob,
-                                                       MetaBall *mb,
-                                                       struct GPUMaterial **UNUSED(gpumat_array),
-                                                       uint gpumat_array_len)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  BLI_assert(gpumat_array_len == DRW_metaball_material_count_get(mb));
-
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  if (cache->shaded_triangles == NULL) {
-    cache->mat_len = gpumat_array_len;
-    cache->shaded_triangles = MEM_callocN(sizeof(*cache->shaded_triangles) * cache->mat_len,
-                                          __func__);
-    cache->shaded_triangles[0] = DRW_metaball_batch_cache_get_triangles_with_normals(ob);
-    for (int i = 1; i < cache->mat_len; i++) {
-      cache->shaded_triangles[i] = NULL;
-    }
-  }
-  return cache->shaded_triangles;
-}
-
-GPUBatch *DRW_metaball_batch_cache_get_wireframes_face(Object *ob)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  if (cache->face_wire.batch == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-
-    GPUVertBuf *vbo_wiredata = GPU_vertbuf_calloc();
-    DRW_displist_vertbuf_create_wiredata(lb, vbo_wiredata);
-
-    GPUIndexBuf *ibo = GPU_indexbuf_calloc();
-    DRW_displist_indexbuf_create_lines_in_order(lb, ibo);
-
-    cache->face_wire.batch = GPU_batch_create_ex(
-        GPU_PRIM_LINES,
-        mball_batch_cache_get_pos_and_normals(ob, cache, scene),
-        ibo,
-        GPU_BATCH_OWNS_INDEX);
-
-    GPU_batch_vertbuf_add_ex(cache->face_wire.batch, vbo_wiredata, true);
-  }
-
-  return cache->face_wire.batch;
-}
-
-struct GPUBatch *DRW_metaball_batch_cache_get_edge_detection(struct Object *ob,
-                                                             bool *r_is_manifold)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  if (cache->edge_detection == NULL) {
-    cache->edge_detection = GPU_batch_create(
-        GPU_PRIM_LINES_ADJ,
-        mball_batch_cache_get_pos_and_normals(ob, cache, scene),
-        mball_batch_cache_get_edges_adj_lines(ob, cache));
-  }
-
-  if (r_is_manifold) {
-    *r_is_manifold = cache->is_manifold;
-  }
-
-  return cache->edge_detection;
-}
-
-struct GPUVertBuf *DRW_mball_batch_cache_pos_vertbuf_get(Object *ob)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  return mball_batch_cache_get_pos_and_normals(ob, cache, scene);
-}
-
-int DRW_metaball_material_count_get(MetaBall *mb)
-{
-  return max_ii(1, mb->totcol);
-}
-
-/** \} */
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index c1d609bf648..9c1784b1de2 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -11,6 +11,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_alloca.h"
 #include "BLI_ghash.h"
 #include "BLI_math_vector.h"
 #include "BLI_string.h"
@@ -24,12 +25,15 @@
 
 #include "BKE_customdata.h"
 #include "BKE_mesh.h"
+#include "BKE_mesh_legacy_convert.h"
 #include "BKE_particle.h"
 #include "BKE_pointcache.h"
 
 #include "ED_particle.h"
 
 #include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 
 #include "DEG_depsgraph_query.h"
@@ -181,10 +185,11 @@ static void particle_batch_cache_clear_hair(ParticleHairCache *hair_cache)
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_uv_buf[i]);
     DRW_TEXTURE_FREE_SAFE(hair_cache->uv_tex[i]);
   }
-  for (int i = 0; i < MAX_MCOL; i++) {
+  for (int i = 0; i < hair_cache->num_col_layers; i++) {
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_col_buf[i]);
     DRW_TEXTURE_FREE_SAFE(hair_cache->col_tex[i]);
   }
+
   for (int i = 0; i < MAX_HAIR_SUBDIV; i++) {
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->final[i].proc_buf);
     DRW_TEXTURE_FREE_SAFE(hair_cache->final[i].proc_tex);
@@ -217,9 +222,24 @@ static void particle_batch_cache_clear(ParticleSystem *psys)
   GPU_VERTBUF_DISCARD_SAFE(cache->edit_tip_pos);
 }
 
+static void particle_batch_cache_free_hair(ParticleHairCache *hair)
+{
+  MEM_SAFE_FREE(hair->proc_col_buf);
+  MEM_SAFE_FREE(hair->col_tex);
+  MEM_SAFE_FREE(hair->col_layer_names);
+}
+
 void DRW_particle_batch_cache_free(ParticleSystem *psys)
 {
   particle_batch_cache_clear(psys);
+
+  ParticleBatchCache *cache = psys->batch_cache;
+
+  if (cache) {
+    particle_batch_cache_free_hair(&cache->hair);
+    particle_batch_cache_free_hair(&cache->edit_hair);
+  }
+
   MEM_SAFE_FREE(psys->batch_cache);
 }
 
@@ -295,7 +315,8 @@ static void particle_calculate_parent_uvs(ParticleSystem *psys,
     }
   }
   if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_uv_layers; j++) {
       psys_interpolate_uvs(mtfaces[j] + num, mface->v4, particle->fuv, r_uv[j]);
     }
@@ -324,7 +345,8 @@ static void particle_calculate_parent_mcol(ParticleSystem *psys,
     }
   }
   if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_col_layers; j++) {
       /* CustomDataLayer CD_MCOL has 4 structs per face. */
       psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]);
@@ -350,7 +372,8 @@ static void particle_interpolate_children_uvs(ParticleSystem *psys,
   ChildParticle *particle = &psys->child[child_index];
   int num = particle->num;
   if (num != DMCACHE_NOTFOUND) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_uv_layers; j++) {
       psys_interpolate_uvs(mtfaces[j] + num, mface->v4, particle->fuv, r_uv[j]);
     }
@@ -374,7 +397,8 @@ static void particle_interpolate_children_mcol(ParticleSystem *psys,
   ChildParticle *particle = &psys->child[child_index];
   int num = particle->num;
   if (num != DMCACHE_NOTFOUND) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_col_layers; j++) {
       /* CustomDataLayer CD_MCOL has 4 structs per face. */
       psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]);
@@ -790,7 +814,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format);
+  /* Transform feedback buffer only needs to be resident in device memory. */
+  GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC;
+  cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+      &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -832,10 +859,10 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
 
   GPUVertBufRaw data_step, seg_step;
   GPUVertBufRaw uv_step[MAX_MTFACE];
-  GPUVertBufRaw col_step[MAX_MCOL];
+  GPUVertBufRaw *col_step = BLI_array_alloca(col_step, cache->num_col_layers);
 
   const MTFace *mtfaces[MAX_MTFACE] = {NULL};
-  const MCol *mcols[MAX_MCOL] = {NULL};
+  const MCol **mcols = BLI_array_alloca(mcols, cache->num_col_layers);
   float(**parent_uvs)[2] = NULL;
   MCol **parent_mcol = NULL;
 
@@ -853,20 +880,22 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
       &format_col, "col", GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
   memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names));
-  memset(cache->col_layer_names, 0, sizeof(cache->col_layer_names));
 
   /* Strand Data */
-  cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+  cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+      &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len);
   GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step);
 
-  cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+  cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+      &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len);
   GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step);
 
   /* UV layers */
   for (int i = 0; i < cache->num_uv_layers; i++) {
-    cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv);
+    cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex(
+        &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
 
@@ -884,9 +913,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
       BLI_strncpy(cache->uv_layer_names[i][n++], "a", MAX_LAYER_NAME_LEN);
     }
   }
+
+  MEM_SAFE_FREE(cache->proc_col_buf);
+  MEM_SAFE_FREE(cache->col_tex);
+  MEM_SAFE_FREE(cache->col_layer_names);
+
+  cache->proc_col_buf = MEM_calloc_arrayN(cache->num_col_layers, sizeof(void *), "proc_col_buf");
+  cache->col_tex = MEM_calloc_arrayN(cache->num_col_layers, sizeof(void *), "col_tex");
+  cache->col_layer_names = MEM_calloc_arrayN(
+      cache->num_col_layers, sizeof(*cache->col_layer_names), "col_layer_names");
+
   /* Vertex colors */
   for (int i = 0; i < cache->num_col_layers; i++) {
-    cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col);
+    cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex(
+        &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
 
@@ -1032,8 +1072,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
   static GPUVertFormat format = {0};
   GPU_vertformat_clear(&format);
 
-  /* initialize vertex format */
-  GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+  /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum
+   * stride requirement. */
+  GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
   GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
   GPU_vertbuf_data_alloc(vbo, 1);
@@ -1074,7 +1115,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
     uint pos_id = GPU_vertformat_attr_add(
         &pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format);
+    cache->proc_point_buf = GPU_vertbuf_create_with_format_ex(
+        &pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len);
 
     GPUVertBufRaw pos_step;
@@ -1084,7 +1126,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
     uint length_id = GPU_vertformat_attr_add(
         &length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
 
-    cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+    cache->proc_length_buf = GPU_vertbuf_create_with_format_ex(
+        &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len);
 
     GPUVertBufRaw length_step;
diff --git a/source/blender/draw/intern/draw_cache_impl_pointcloud.c b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
index d715899e291..a43b23c8969 100644
--- a/source/blender/draw/intern/draw_cache_impl_pointcloud.c
+++ b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
@@ -13,23 +13,23 @@
 
 #include "BLI_math_base.h"
 #include "BLI_math_vector.h"
+#include "BLI_task.hh"
 #include "BLI_utildefines.h"
 
 #include "DNA_object_types.h"
 #include "DNA_pointcloud_types.h"
 
+#include "BKE_attribute.hh"
 #include "BKE_pointcloud.h"
 
 #include "GPU_batch.h"
 
 #include "draw_cache_impl.h" /* own include */
 
-static void pointcloud_batch_cache_clear(PointCloud *pointcloud);
-
 /* ---------------------------------------------------------------------- */
 /* PointCloud GPUBatch Cache */
 
-typedef struct PointCloudBatchCache {
+struct PointCloudBatchCache {
   GPUVertBuf *pos;  /* Position and radius. */
   GPUVertBuf *geom; /* Instanced geometry for each point in the cloud (small sphere). */
   GPUIndexBuf *geom_indices;
@@ -42,58 +42,51 @@ typedef struct PointCloudBatchCache {
   bool is_dirty;
 
   int mat_len;
-} PointCloudBatchCache;
+};
 
 /* GPUBatch cache management. */
 
-static bool pointcloud_batch_cache_valid(PointCloud *pointcloud)
+static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud &pointcloud)
+{
+  return static_cast<PointCloudBatchCache *>(pointcloud.batch_cache);
+}
+
+static bool pointcloud_batch_cache_valid(PointCloud &pointcloud)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
-  if (cache == NULL) {
+  if (cache == nullptr) {
     return false;
   }
-  if (cache->mat_len != DRW_pointcloud_material_count_get(pointcloud)) {
+  if (cache->mat_len != DRW_pointcloud_material_count_get(&pointcloud)) {
     return false;
   }
   return cache->is_dirty == false;
 }
 
-static void pointcloud_batch_cache_init(PointCloud *pointcloud)
+static void pointcloud_batch_cache_init(PointCloud &pointcloud)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
   if (!cache) {
-    cache = pointcloud->batch_cache = MEM_callocN(sizeof(*cache), __func__);
+    cache = MEM_cnew<PointCloudBatchCache>(__func__);
+    pointcloud.batch_cache = cache;
   }
   else {
     memset(cache, 0, sizeof(*cache));
   }
 
-  cache->mat_len = DRW_pointcloud_material_count_get(pointcloud);
-  cache->surface_per_mat = MEM_callocN(sizeof(GPUBatch *) * cache->mat_len,
-                                       "pointcloud suface_per_mat");
+  cache->mat_len = DRW_pointcloud_material_count_get(&pointcloud);
+  cache->surface_per_mat = static_cast<GPUBatch **>(
+      MEM_callocN(sizeof(GPUBatch *) * cache->mat_len, __func__));
 
   cache->is_dirty = false;
 }
 
-void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud)
-{
-  if (!pointcloud_batch_cache_valid(pointcloud)) {
-    pointcloud_batch_cache_clear(pointcloud);
-    pointcloud_batch_cache_init(pointcloud);
-  }
-}
-
-static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud *pointcloud)
-{
-  return pointcloud->batch_cache;
-}
-
 void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
-  if (cache == NULL) {
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud);
+  if (cache == nullptr) {
     return;
   }
   switch (mode) {
@@ -105,9 +98,9 @@ void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode)
   }
 }
 
-static void pointcloud_batch_cache_clear(PointCloud *pointcloud)
+static void pointcloud_batch_cache_clear(PointCloud &pointcloud)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
   if (!cache) {
     return;
   }
@@ -126,50 +119,65 @@ static void pointcloud_batch_cache_clear(PointCloud *pointcloud)
   MEM_SAFE_FREE(cache->surface_per_mat);
 }
 
+void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud)
+{
+  if (!pointcloud_batch_cache_valid(*pointcloud)) {
+    pointcloud_batch_cache_clear(*pointcloud);
+    pointcloud_batch_cache_init(*pointcloud);
+  }
+}
+
 void DRW_pointcloud_batch_cache_free(PointCloud *pointcloud)
 {
-  pointcloud_batch_cache_clear(pointcloud);
+  pointcloud_batch_cache_clear(*pointcloud);
   MEM_SAFE_FREE(pointcloud->batch_cache);
 }
 
-static void pointcloud_batch_cache_ensure_pos(Object *ob, PointCloudBatchCache *cache)
+static void pointcloud_batch_cache_ensure_pos(const PointCloud &pointcloud,
+                                              PointCloudBatchCache &cache)
 {
-  if (cache->pos != NULL) {
+  using namespace blender;
+  if (cache.pos != nullptr) {
     return;
   }
 
-  PointCloud *pointcloud = ob->data;
-  const bool has_radius = pointcloud->radius != NULL;
-
-  static GPUVertFormat format = {0};
-  static GPUVertFormat format_no_radius = {0};
-  static uint pos;
-  if (format.attr_len == 0) {
-    /* initialize vertex format */
-    /* From the opengl wiki:
-     * Note that size does not have to exactly match the size used by the vertex shader. If the
-     * vertex shader has fewer components than the attribute provides, then the extras are ignored.
-     * If the vertex shader has more components than the array provides, the extras are given
-     * values from the vector (0, 0, 0, 1) for the missing XYZW components.
-     */
-    pos = GPU_vertformat_attr_add(&format_no_radius, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-  }
-
-  cache->pos = GPU_vertbuf_create_with_format(has_radius ? &format : &format_no_radius);
-  GPU_vertbuf_data_alloc(cache->pos, pointcloud->totpoint);
-
-  if (has_radius) {
-    float(*vbo_data)[4] = (float(*)[4])GPU_vertbuf_get_data(cache->pos);
-    for (int i = 0; i < pointcloud->totpoint; i++) {
-      copy_v3_v3(vbo_data[i], pointcloud->co[i]);
-      /* TODO(fclem): remove multiplication here.
-       * Here only for keeping the size correct for now. */
-      vbo_data[i][3] = pointcloud->radius[i] * 100.0f;
+  const bke::AttributeAccessor attributes = pointcloud.attributes();
+  const VArraySpan<float3> positions = attributes.lookup<float3>("position", ATTR_DOMAIN_POINT);
+  const VArray<float> radii = attributes.lookup<float>("radius", ATTR_DOMAIN_POINT);
+  /* From the opengl wiki:
+   * Note that size does not have to exactly match the size used by the vertex shader. If the
+   * vertex shader has fewer components than the attribute provides, then the extras are ignored.
+   * If the vertex shader has more components than the array provides, the extras are given
+   * values from the vector (0, 0, 0, 1) for the missing XYZW components. */
+  if (radii) {
+    static GPUVertFormat format = {0};
+    if (format.attr_len == 0) {
+      GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     }
+    cache.pos = GPU_vertbuf_create_with_format(&format);
+    GPU_vertbuf_data_alloc(cache.pos, positions.size());
+    const VArraySpan<float> radii_span(radii);
+    MutableSpan<float4> vbo_data{static_cast<float4 *>(GPU_vertbuf_get_data(cache.pos)),
+                                 pointcloud.totpoint};
+    threading::parallel_for(vbo_data.index_range(), 4096, [&](IndexRange range) {
+      for (const int i : range) {
+        vbo_data[i].x = positions[i].x;
+        vbo_data[i].y = positions[i].y;
+        vbo_data[i].z = positions[i].z;
+        /* TODO(fclem): remove multiplication. Here only for keeping the size correct for now. */
+        vbo_data[i].w = radii_span[i] * 100.0f;
+      }
+    });
   }
   else {
-    GPU_vertbuf_attr_fill(cache->pos, pos, pointcloud->co);
+    static GPUVertFormat format = {0};
+    static uint pos;
+    if (format.attr_len == 0) {
+      pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+    }
+    cache.pos = GPU_vertbuf_create_with_format(&format);
+    GPU_vertbuf_data_alloc(cache.pos, positions.size());
+    GPU_vertbuf_attr_fill(cache.pos, pos, positions.data());
   }
 }
 
@@ -188,24 +196,23 @@ static const uint half_octahedron_tris[4][3] = {
     {0, 4, 1},
 };
 
-static void pointcloud_batch_cache_ensure_geom(Object *UNUSED(ob), PointCloudBatchCache *cache)
+static void pointcloud_batch_cache_ensure_geom(PointCloudBatchCache &cache)
 {
-  if (cache->geom != NULL) {
+  if (cache.geom != nullptr) {
     return;
   }
 
   static GPUVertFormat format = {0};
   static uint pos;
   if (format.attr_len == 0) {
-    /* initialize vertex format */
     pos = GPU_vertformat_attr_add(&format, "pos_inst", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
     GPU_vertformat_alias_add(&format, "nor");
   }
 
-  cache->geom = GPU_vertbuf_create_with_format(&format);
-  GPU_vertbuf_data_alloc(cache->geom, ARRAY_SIZE(half_octahedron_normals));
+  cache.geom = GPU_vertbuf_create_with_format(&format);
+  GPU_vertbuf_data_alloc(cache.geom, ARRAY_SIZE(half_octahedron_normals));
 
-  GPU_vertbuf_attr_fill(cache->geom, pos, half_octahedron_normals);
+  GPU_vertbuf_attr_fill(cache.geom, pos, half_octahedron_normals);
 
   GPUIndexBufBuilder builder;
   GPU_indexbuf_init(&builder,
@@ -217,17 +224,17 @@ static void pointcloud_batch_cache_ensure_geom(Object *UNUSED(ob), PointCloudBat
     GPU_indexbuf_add_tri_verts(&builder, UNPACK3(half_octahedron_tris[i]));
   }
 
-  cache->geom_indices = GPU_indexbuf_build(&builder);
+  cache.geom_indices = GPU_indexbuf_build(&builder);
 }
 
 GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob)
 {
-  PointCloud *pointcloud = ob->data;
+  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
-  if (cache->dots == NULL) {
-    pointcloud_batch_cache_ensure_pos(ob, cache);
-    cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, NULL);
+  if (cache->dots == nullptr) {
+    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
+    cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, nullptr);
   }
 
   return cache->dots;
@@ -235,12 +242,12 @@ GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob)
 
 GPUBatch *DRW_pointcloud_batch_cache_get_surface(Object *ob)
 {
-  PointCloud *pointcloud = ob->data;
+  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
-  if (cache->surface == NULL) {
-    pointcloud_batch_cache_ensure_pos(ob, cache);
-    pointcloud_batch_cache_ensure_geom(ob, cache);
+  if (cache->surface == nullptr) {
+    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
+    pointcloud_batch_cache_ensure_geom(*cache);
 
     cache->surface = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
     GPU_batch_instbuf_add_ex(cache->surface, cache->pos, false);
@@ -253,14 +260,14 @@ GPUBatch **DRW_cache_pointcloud_surface_shaded_get(Object *ob,
                                                    struct GPUMaterial **UNUSED(gpumat_array),
                                                    uint gpumat_array_len)
 {
-  PointCloud *pointcloud = ob->data;
+  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
   BLI_assert(cache->mat_len == gpumat_array_len);
   UNUSED_VARS(gpumat_array_len);
 
-  if (cache->surface_per_mat[0] == NULL) {
-    pointcloud_batch_cache_ensure_pos(ob, cache);
-    pointcloud_batch_cache_ensure_geom(ob, cache);
+  if (cache->surface_per_mat[0] == nullptr) {
+    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
+    pointcloud_batch_cache_ensure_geom(*cache);
 
     cache->surface_per_mat[0] = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
     GPU_batch_instbuf_add_ex(cache->surface_per_mat[0], cache->pos, false);
diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
index b37a420b555..ab935809f96 100644
--- a/source/blender/draw/intern/draw_cache_impl_subdivision.cc
+++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
@@ -7,8 +7,10 @@
 #include "DNA_object_types.h"
 #include "DNA_scene_types.h"
 
+#include "BKE_attribute.hh"
 #include "BKE_editmesh.h"
 #include "BKE_mesh.h"
+#include "BKE_mesh_mapping.h"
 #include "BKE_modifier.h"
 #include "BKE_object.h"
 #include "BKE_scene.h"
@@ -19,8 +21,8 @@
 #include "BKE_subdiv_modifier.h"
 
 #include "BLI_linklist.h"
-
 #include "BLI_string.h"
+#include "BLI_virtual_array.hh"
 
 #include "PIL_time.h"
 
@@ -44,6 +46,8 @@
 #include "draw_cache_inline.h"
 #include "mesh_extractors/extract_mesh.hh"
 
+using blender::Span;
+
 extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[];
 extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[];
 extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[];
@@ -668,20 +672,23 @@ static void draw_subdiv_cache_extra_coarse_face_data_bm(BMesh *bm,
   }
 }
 
-static void draw_subdiv_cache_extra_coarse_face_data_mesh(Mesh *mesh, uint32_t *flags_data)
+static void draw_subdiv_cache_extra_coarse_face_data_mesh(const MeshRenderData *mr,
+                                                          Mesh *mesh,
+                                                          uint32_t *flags_data)
 {
-  for (int i = 0; i < mesh->totpoly; i++) {
+  const Span<MPoly> polys = mesh->polys();
+  for (const int i : polys.index_range()) {
     uint32_t flag = 0;
-    if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) {
+    if ((polys[i].flag & ME_SMOOTH) != 0) {
       flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH;
     }
-    if ((mesh->mpoly[i].flag & ME_FACE_SEL) != 0) {
+    if ((polys[i].flag & ME_FACE_SEL) != 0) {
       flag |= SUBDIV_COARSE_FACE_FLAG_SELECT;
     }
-    if ((mesh->mpoly[i].flag & ME_HIDE) != 0) {
+    if (mr->hide_poly && mr->hide_poly[i]) {
       flag |= SUBDIV_COARSE_FACE_FLAG_HIDDEN;
     }
-    flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    flags_data[i] = (uint)(polys[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
   }
 }
 
@@ -691,7 +698,7 @@ static void draw_subdiv_cache_extra_coarse_face_data_mapped(Mesh *mesh,
                                                             uint32_t *flags_data)
 {
   if (bm == nullptr) {
-    draw_subdiv_cache_extra_coarse_face_data_mesh(mesh, flags_data);
+    draw_subdiv_cache_extra_coarse_face_data_mesh(mr, mesh, flags_data);
     return;
   }
 
@@ -722,11 +729,11 @@ static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cach
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     draw_subdiv_cache_extra_coarse_face_data_bm(cache->bm, mr->efa_act, flags_data);
   }
-  else if (mr->extract_type == MR_EXTRACT_MAPPED) {
+  else if (mr->p_origindex != nullptr) {
     draw_subdiv_cache_extra_coarse_face_data_mapped(mesh, cache->bm, mr, flags_data);
   }
   else {
-    draw_subdiv_cache_extra_coarse_face_data_mesh(mesh, flags_data);
+    draw_subdiv_cache_extra_coarse_face_data_mesh(mr, mesh, flags_data);
   }
 
   /* Make sure updated data is re-uploaded. */
@@ -801,15 +808,15 @@ struct DRWCacheBuildingContext {
 };
 
 static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context,
-                                         const int num_vertices,
+                                         const int num_verts,
                                          const int num_edges,
                                          const int num_loops,
-                                         const int num_polygons,
+                                         const int num_polys,
                                          const int *subdiv_polygon_offset)
 {
   /* num_loops does not take into account meshes with only loose geometry, which might be meshes
-   * used as custom bone shapes, so let's check the num_vertices also. */
-  if (num_vertices == 0 && num_loops == 0) {
+   * used as custom bone shapes, so let's check the num_verts also. */
+  if (num_verts == 0 && num_loops == 0) {
     return false;
   }
 
@@ -820,12 +827,12 @@ static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_con
   if (num_loops != 0) {
     cache->num_subdiv_edges = (uint)num_edges;
     cache->num_subdiv_loops = (uint)num_loops;
-    cache->num_subdiv_verts = (uint)num_vertices;
-    cache->num_subdiv_quads = (uint)num_polygons;
+    cache->num_subdiv_verts = (uint)num_verts;
+    cache->num_subdiv_quads = (uint)num_polys;
     cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset));
   }
 
-  cache->may_have_loose_geom = num_vertices != 0 || num_edges != 0;
+  cache->may_have_loose_geom = num_verts != 0 || num_edges != 0;
 
   /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after
    * it was sent to the device, since we may use the data while building other buffers on the CPU
@@ -876,7 +883,7 @@ static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_con
   if (cache->num_subdiv_verts) {
     ctx->vert_origindex_map = static_cast<int *>(
         MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map"));
-    for (int i = 0; i < num_vertices; i++) {
+    for (int i = 0; i < num_verts; i++) {
       ctx->vert_origindex_map[i] = -1;
     }
   }
@@ -1089,6 +1096,7 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
   }
 
   /* Only build polygon related data if we have polygons. */
+  const Span<MPoly> polys = mesh_eval->polys();
   if (cache->num_subdiv_loops != 0) {
     /* Build buffers for the PatchMap. */
     draw_patch_map_build(&cache->gpu_patch_map, subdiv);
@@ -1102,7 +1110,7 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
         GPU_vertbuf_get_data(cache->fdots_patch_coords);
     for (int i = 0; i < mesh_eval->totpoly; i++) {
       const int ptex_face_index = cache->face_ptex_offset[i];
-      if (mesh_eval->mpoly[i].totloop == 4) {
+      if (polys[i].totloop == 4) {
         /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */
         blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f);
       }
@@ -1115,16 +1123,16 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
     }
 
     cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer(
-        cache->subdiv_polygon_offset, mesh_eval->totpoly);
+        cache->subdiv_polygon_offset, polys.size());
 
     cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset,
-                                                                        mesh_eval->totpoly + 1);
+                                                                        polys.size() + 1);
 
     build_vertex_face_adjacency_maps(cache);
   }
 
   cache->resolution = to_mesh_settings.resolution;
-  cache->num_coarse_poly = mesh_eval->totpoly;
+  cache->num_coarse_poly = polys.size();
 
   /* To avoid floating point precision issues when evaluating patches at patch boundaries,
    * ensure that all loops sharing a vertex use the same patch coordinate. This could cause
@@ -1204,8 +1212,8 @@ struct DRWSubdivUboStorage {
    * of out of bond accesses as compute dispatch are of fixed size. */
   uint total_dispatch_size;
 
-  int _pad0;
-  int _pad2;
+  int is_edit_mode;
+  int use_hide;
   int _pad3;
 };
 
@@ -1236,6 +1244,8 @@ static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache,
   ubo->coarse_face_hidden_mask = SUBDIV_COARSE_FACE_FLAG_HIDDEN_MASK;
   ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK;
   ubo->total_dispatch_size = total_dispatch_size;
+  ubo->is_edit_mode = cache->is_edit_mode;
+  ubo->use_hide = cache->use_hide;
 }
 
 static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache,
@@ -1468,6 +1478,11 @@ void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
 {
   GPUShader *shader = nullptr;
 
+  if (!draw_subdiv_cache_need_polygon_data(cache)) {
+    /* Happens on meshes with only loose geometry. */
+    return;
+  }
+
   if (dimensions == 1) {
     shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D,
                                "#define SUBDIV_POLYGON_OFFSET\n"
@@ -1953,17 +1968,19 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
     return;
   }
 
+  const blender::VArraySpan<int> material_indices = mesh_eval->attributes().lookup_or_default<int>(
+      "material_index", ATTR_DOMAIN_FACE, 0);
+
   /* Count number of subdivided polygons for each material. */
   int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start"));
   int *subdiv_polygon_offset = cache->subdiv_polygon_offset;
 
   /* TODO: parallel_reduce? */
   for (int i = 0; i < mesh_eval->totpoly; i++) {
-    const MPoly *mpoly = &mesh_eval->mpoly[i];
     const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
                                                             subdiv_polygon_offset[i + 1];
     const int quad_count = next_offset - subdiv_polygon_offset[i];
-    const int mat_index = mpoly->mat_nr;
+    const int mat_index = material_indices[i];
     mat_start[mat_index] += quad_count;
   }
 
@@ -1982,8 +1999,7 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
       MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset"));
 
   for (int i = 0; i < mesh_eval->totpoly; i++) {
-    const MPoly *mpoly = &mesh_eval->mpoly[i];
-    const int mat_index = mpoly->mat_nr;
+    const int mat_index = material_indices[i];
     const int single_material_index = subdiv_polygon_offset[i];
     const int material_offset = mat_end[mat_index];
     const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
@@ -2004,7 +2020,7 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
 
 static bool draw_subdiv_create_requested_buffers(Object *ob,
                                                  Mesh *mesh,
-                                                 struct MeshBatchCache *batch_cache,
+                                                 MeshBatchCache *batch_cache,
                                                  MeshBufferCache *mbc,
                                                  const bool is_editmode,
                                                  const bool is_paint_mode,
@@ -2012,6 +2028,7 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
                                                  const float obmat[4][4],
                                                  const bool do_final,
                                                  const bool do_uvedit,
+                                                 const bool do_cage,
                                                  const ToolSettings *ts,
                                                  const bool use_hide,
                                                  OpenSubdiv_EvaluatorCache *evaluator_cache)
@@ -2038,7 +2055,7 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
   draw_subdiv_invalidate_evaluator_for_orco(subdiv, mesh_eval);
 
   if (!BKE_subdiv_eval_begin_from_mesh(
-          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) {
+          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GPU, evaluator_cache)) {
     /* This could happen in two situations:
      * - OpenSubdiv is disabled.
      * - Something totally bad happened, and OpenSubdiv rejected our
@@ -2055,9 +2072,8 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
     return false;
   }
 
-  /* Edges which do not come from coarse edges should not be drawn in edit mode, only in object
-   * mode when optimal display in turned off. */
-  const bool optimal_display = runtime_data->use_optimal_display || is_editmode;
+  /* Edges which do not come from coarse edges should not be drawn in edit cage mode. */
+  const bool optimal_display = runtime_data->use_optimal_display || (is_editmode && !do_cage);
 
   draw_cache->bm = bm;
   draw_cache->mesh = mesh_eval;
@@ -2083,6 +2099,12 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
   MeshRenderData *mr = mesh_render_data_create(
       ob, mesh, is_editmode, is_paint_mode, is_mode_active, obmat, do_final, do_uvedit, ts);
   mr->use_hide = use_hide;
+  draw_cache->use_hide = use_hide;
+
+  /* Used for setting loop normals flags. Mapped extraction is only used during edit mode.
+   * See comments in #extract_lnor_iter_poly_mesh.
+   */
+  draw_cache->is_edit_mode = mr->edit_bmesh != nullptr;
 
   draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval, mr);
 
@@ -2134,9 +2156,20 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac
   int subd_vert_offset = 0;
 
   /* Subdivide each loose coarse edge. */
+  const Span<MVert> coarse_verts = coarse_mesh->verts();
+  const Span<MEdge> coarse_edges = coarse_mesh->edges();
+
+  int *vert_to_edge_buffer;
+  MeshElemMap *vert_to_edge_map;
+  BKE_mesh_vert_edge_map_create(&vert_to_edge_map,
+                                &vert_to_edge_buffer,
+                                coarse_edges.data(),
+                                coarse_mesh->totvert,
+                                coarse_edges.size());
+
   for (int i = 0; i < coarse_loose_edge_len; i++) {
     const int coarse_edge_index = cache->loose_geom.edges[i];
-    const MEdge *coarse_edge = &coarse_mesh->medge[cache->loose_geom.edges[i]];
+    const MEdge *coarse_edge = &coarse_edges[cache->loose_geom.edges[i]];
 
     /* Perform interpolation of each vertex. */
     for (int i = 0; i < resolution - 1; i++, subd_edge_offset++) {
@@ -2147,8 +2180,13 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac
       DRWSubdivLooseVertex &subd_v1 = loose_subd_verts[subd_vert_offset];
       subd_v1.coarse_vertex_index = (i == 0) ? coarse_edge->v1 : -1u;
       const float u1 = i * inv_resolution_1;
-      BKE_subdiv_mesh_interpolate_position_on_edge(
-          coarse_mesh, coarse_edge, is_simple, u1, subd_v1.co);
+      BKE_subdiv_mesh_interpolate_position_on_edge(coarse_verts.data(),
+                                                   coarse_edges.data(),
+                                                   vert_to_edge_map,
+                                                   coarse_edge_index,
+                                                   is_simple,
+                                                   u1,
+                                                   subd_v1.co);
 
       subd_edge.loose_subdiv_v1_index = subd_vert_offset++;
 
@@ -2156,17 +2194,25 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac
       DRWSubdivLooseVertex &subd_v2 = loose_subd_verts[subd_vert_offset];
       subd_v2.coarse_vertex_index = ((i + 1) == resolution - 1) ? coarse_edge->v2 : -1u;
       const float u2 = (i + 1) * inv_resolution_1;
-      BKE_subdiv_mesh_interpolate_position_on_edge(
-          coarse_mesh, coarse_edge, is_simple, u2, subd_v2.co);
+      BKE_subdiv_mesh_interpolate_position_on_edge(coarse_verts.data(),
+                                                   coarse_edges.data(),
+                                                   vert_to_edge_map,
+                                                   coarse_edge_index,
+                                                   is_simple,
+                                                   u2,
+                                                   subd_v2.co);
 
       subd_edge.loose_subdiv_v2_index = subd_vert_offset++;
     }
   }
 
+  MEM_freeN(vert_to_edge_buffer);
+  MEM_freeN(vert_to_edge_map);
+
   /* Copy the remaining loose_verts. */
   for (int i = 0; i < coarse_loose_vert_len; i++) {
     const int coarse_vertex_index = cache->loose_geom.verts[i];
-    const MVert &coarse_vertex = coarse_mesh->mvert[coarse_vertex_index];
+    const MVert &coarse_vertex = coarse_verts[coarse_vertex_index];
 
     DRWSubdivLooseVertex &subd_v = loose_subd_verts[subd_vert_offset++];
     subd_v.coarse_vertex_index = cache->loose_geom.verts[i];
@@ -2195,7 +2241,7 @@ static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr;
 
 void DRW_create_subdivision(Object *ob,
                             Mesh *mesh,
-                            struct MeshBatchCache *batch_cache,
+                            MeshBatchCache *batch_cache,
                             MeshBufferCache *mbc,
                             const bool is_editmode,
                             const bool is_paint_mode,
@@ -2203,11 +2249,12 @@ void DRW_create_subdivision(Object *ob,
                             const float obmat[4][4],
                             const bool do_final,
                             const bool do_uvedit,
+                            const bool do_cage,
                             const ToolSettings *ts,
                             const bool use_hide)
 {
   if (g_evaluator_cache == nullptr) {
-    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE);
+    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GPU);
   }
 
 #undef TIME_SUBDIV
@@ -2226,6 +2273,7 @@ void DRW_create_subdivision(Object *ob,
                                             obmat,
                                             do_final,
                                             do_uvedit,
+                                            do_cage,
                                             ts,
                                             use_hide,
                                             g_evaluator_cache)) {
diff --git a/source/blender/draw/intern/draw_color_management.cc b/source/blender/draw/intern/draw_color_management.cc
index bb11f1ab3ad..eab86226be5 100644
--- a/source/blender/draw/intern/draw_color_management.cc
+++ b/source/blender/draw/intern/draw_color_management.cc
@@ -169,7 +169,7 @@ void DRW_transform_none(GPUTexture *tex)
 
   /* Draw as texture for final render (without immediate mode). */
   GPUBatch *geom = DRW_cache_fullscreen_quad_get();
-  GPU_batch_program_set_builtin(geom, GPU_SHADER_2D_IMAGE_COLOR);
+  GPU_batch_program_set_builtin(geom, GPU_SHADER_3D_IMAGE_COLOR);
   GPU_batch_uniform_4f(geom, "color", 1.0f, 1.0f, 1.0f, 1.0f);
   GPU_batch_texture_bind(geom, "image", tex);
 
diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc
new file mode 100644
index 00000000000..ff69885b3b6
--- /dev/null
+++ b/source/blender/draw/intern/draw_command.cc
@@ -0,0 +1,600 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_debug.h"
+
+#include "draw_command.hh"
+#include "draw_shader.h"
+#include "draw_view.hh"
+
+#include <bitset>
+#include <sstream>
+
+namespace blender::draw::command {
+
+/* -------------------------------------------------------------------- */
+/** \name Commands Execution
+ * \{ */
+
+void ShaderBind::execute(RecordingState &state) const
+{
+  if (assign_if_different(state.shader, shader)) {
+    GPU_shader_bind(shader);
+  }
+}
+
+void ResourceBind::execute() const
+{
+  if (slot == -1) {
+    return;
+  }
+  switch (type) {
+    case ResourceBind::Type::Sampler:
+      GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot, false);
+      break;
+    case ResourceBind::Type::Image:
+      GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot);
+      break;
+    case ResourceBind::Type::UniformBuf:
+      GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot);
+      break;
+    case ResourceBind::Type::StorageBuf:
+      GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot);
+      break;
+  }
+}
+
+void PushConstant::execute(RecordingState &state) const
+{
+  if (location == -1) {
+    return;
+  }
+  switch (type) {
+    case PushConstant::Type::IntValue:
+      GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int4_value);
+      break;
+    case PushConstant::Type::IntReference:
+      GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int_ref);
+      break;
+    case PushConstant::Type::FloatValue:
+      GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float4_value);
+      break;
+    case PushConstant::Type::FloatReference:
+      GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float_ref);
+      break;
+  }
+}
+
+void Draw::execute(RecordingState &state) const
+{
+  state.front_facing_set(handle.has_inverted_handedness());
+
+  if (GPU_shader_draw_parameters_support() == false) {
+    GPU_batch_resource_id_buf_set(batch, state.resource_id_buf);
+  }
+
+  GPU_batch_set_shader(batch, state.shader);
+  GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
+}
+
+void DrawMulti::execute(RecordingState &state) const
+{
+  DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &group = groups[group_index];
+
+    if (group.vertex_len > 0) {
+      if (GPU_shader_draw_parameters_support() == false) {
+        GPU_batch_resource_id_buf_set(group.gpu_batch, state.resource_id_buf);
+      }
+
+      GPU_batch_set_shader(group.gpu_batch, state.shader);
+
+      constexpr intptr_t stride = sizeof(DrawCommand);
+      /* We have 2 indirect command reserved per draw group. */
+      intptr_t offset = stride * group_index * 2;
+
+      /* Draw negatively scaled geometry first. */
+      if (group.len - group.front_facing_len > 0) {
+        state.front_facing_set(true);
+        GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset);
+      }
+
+      if (group.front_facing_len > 0) {
+        state.front_facing_set(false);
+        GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset + stride);
+      }
+    }
+
+    group_index = group.next;
+  }
+}
+
+void DrawIndirect::execute(RecordingState &state) const
+{
+  state.front_facing_set(handle.has_inverted_handedness());
+
+  GPU_batch_draw_indirect(batch, *indirect_buf, 0);
+}
+
+void Dispatch::execute(RecordingState &state) const
+{
+  if (is_reference) {
+    GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z);
+  }
+  else {
+    GPU_compute_dispatch(state.shader, size.x, size.y, size.z);
+  }
+}
+
+void DispatchIndirect::execute(RecordingState &state) const
+{
+  GPU_compute_dispatch_indirect(state.shader, *indirect_buf);
+}
+
+void Barrier::execute() const
+{
+  GPU_memory_barrier(type);
+}
+
+void Clear::execute() const
+{
+  GPUFrameBuffer *fb = GPU_framebuffer_active_get();
+  GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil);
+}
+
+void StateSet::execute(RecordingState &recording_state) const
+{
+  /**
+   * Does not support locked state for the moment and never should.
+   * Better implement a less hacky selection!
+   */
+  BLI_assert(DST.state_lock == 0);
+
+  if (!assign_if_different(recording_state.pipeline_state, new_state)) {
+    return;
+  }
+
+  /* Keep old API working. Keep the state tracking in sync. */
+  /* TODO(fclem): Move at the end of a pass. */
+  DST.state = new_state;
+
+  GPU_state_set(to_write_mask(new_state),
+                to_blend(new_state),
+                to_face_cull_test(new_state),
+                to_depth_test(new_state),
+                to_stencil_test(new_state),
+                to_stencil_op(new_state),
+                to_provoking_vertex(new_state));
+
+  if (new_state & DRW_STATE_SHADOW_OFFSET) {
+    GPU_shadow_offset(true);
+  }
+  else {
+    GPU_shadow_offset(false);
+  }
+
+  /* TODO: this should be part of shader state. */
+  if (new_state & DRW_STATE_CLIP_PLANES) {
+    GPU_clip_distances(recording_state.view_clip_plane_count);
+  }
+  else {
+    GPU_clip_distances(0);
+  }
+
+  if (new_state & DRW_STATE_IN_FRONT_SELECT) {
+    /* XXX `GPU_depth_range` is not a perfect solution
+     * since very distant geometries can still be occluded.
+     * Also the depth test precision of these geometries is impaired.
+     * However, it solves the selection for the vast majority of cases. */
+    GPU_depth_range(0.0f, 0.01f);
+  }
+  else {
+    GPU_depth_range(0.0f, 1.0f);
+  }
+
+  if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
+    GPU_program_point_size(true);
+  }
+  else {
+    GPU_program_point_size(false);
+  }
+}
+
+void StencilSet::execute() const
+{
+  GPU_stencil_write_mask_set(write_mask);
+  GPU_stencil_compare_mask_set(compare_mask);
+  GPU_stencil_reference_set(reference);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Commands Serialization for debugging
+ * \{ */
+
+std::string ShaderBind::serialize() const
+{
+  return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
+}
+
+std::string ResourceBind::serialize() const
+{
+  switch (type) {
+    case Type::Sampler:
+      return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) +
+             (sampler != GPU_SAMPLER_MAX ? ", sampler=" + std::to_string(sampler) : "") + ")";
+    case Type::Image:
+      return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    case Type::UniformBuf:
+      return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    case Type::StorageBuf:
+      return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    default:
+      BLI_assert_unreachable();
+      return "";
+  }
+}
+
+std::string PushConstant::serialize() const
+{
+  std::stringstream ss;
+  for (int i = 0; i < array_len; i++) {
+    switch (comp_len) {
+      case 1:
+        switch (type) {
+          case Type::IntValue:
+            ss << int1_value;
+            break;
+          case Type::IntReference:
+            ss << int_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float1_value;
+            break;
+          case Type::FloatReference:
+            ss << float_ref[i];
+            break;
+        }
+        break;
+      case 2:
+        switch (type) {
+          case Type::IntValue:
+            ss << int2_value;
+            break;
+          case Type::IntReference:
+            ss << int2_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float2_value;
+            break;
+          case Type::FloatReference:
+            ss << float2_ref[i];
+            break;
+        }
+        break;
+      case 3:
+        switch (type) {
+          case Type::IntValue:
+            ss << int3_value;
+            break;
+          case Type::IntReference:
+            ss << int3_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float3_value;
+            break;
+          case Type::FloatReference:
+            ss << float3_ref[i];
+            break;
+        }
+        break;
+      case 4:
+        switch (type) {
+          case Type::IntValue:
+            ss << int4_value;
+            break;
+          case Type::IntReference:
+            ss << int4_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float4_value;
+            break;
+          case Type::FloatReference:
+            ss << float4_ref[i];
+            break;
+        }
+        break;
+      case 16:
+        switch (type) {
+          case Type::IntValue:
+          case Type::IntReference:
+            BLI_assert_unreachable();
+            break;
+          case Type::FloatValue:
+            ss << *reinterpret_cast<const float4x4 *>(&float4_value);
+            break;
+          case Type::FloatReference:
+            ss << *float4x4_ref;
+            break;
+        }
+        break;
+    }
+    if (i < array_len - 1) {
+      ss << ", ";
+    }
+  }
+
+  return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
+}
+
+std::string Draw::serialize() const
+{
+  std::string inst_len = (instance_len == (uint)-1) ? "from_batch" : std::to_string(instance_len);
+  std::string vert_len = (vertex_len == (uint)-1) ? "from_batch" : std::to_string(vertex_len);
+  std::string vert_first = (vertex_first == (uint)-1) ? "from_batch" :
+                                                        std::to_string(vertex_first);
+  return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
+         ", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) +
+         ")";
+}
+
+std::string DrawMulti::serialize(std::string line_prefix) const
+{
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
+                                        multi_draw_buf->prototype_count_);
+
+  /* This emulates the GPU sorting but without the unstable draw order. */
+  std::sort(
+      prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
+        return (a.group_id < b.group_id) ||
+               (a.group_id == b.group_id && a.resource_handle > b.resource_handle);
+      });
+
+  /* Compute prefix sum to have correct offsets. */
+  uint prefix_sum = 0u;
+  for (DrawGroup &group : groups) {
+    group.start = prefix_sum;
+    prefix_sum += group.front_proto_len + group.back_proto_len;
+  }
+
+  std::stringstream ss;
+
+  uint group_len = 0;
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &grp = groups[group_index];
+
+    ss << std::endl << line_prefix << "  .group(id=" << group_index << ", len=" << grp.len << ")";
+
+    intptr_t offset = grp.start;
+
+    if (grp.back_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) {
+        BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        BLI_assert(handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)";
+      }
+      offset += grp.back_proto_len;
+    }
+
+    if (grp.front_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) {
+        BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        BLI_assert(!handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)";
+      }
+    }
+
+    group_index = grp.next;
+    group_len++;
+  }
+
+  ss << std::endl;
+
+  return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
+}
+
+std::string DrawIndirect::serialize() const
+{
+  return std::string(".draw_indirect()");
+}
+
+std::string Dispatch::serialize() const
+{
+  int3 sz = is_reference ? *size_ref : size;
+  return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) +
+         ", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")";
+}
+
+std::string DispatchIndirect::serialize() const
+{
+  return std::string(".dispatch_indirect()");
+}
+
+std::string Barrier::serialize() const
+{
+  /* TODO(@fclem): Better serialization... */
+  return std::string(".barrier(") + std::to_string(type) + ")";
+}
+
+std::string Clear::serialize() const
+{
+  std::stringstream ss;
+  if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) {
+    ss << "color=" << color;
+    if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
+      ss << ", ";
+    }
+  }
+  if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) {
+    ss << "depth=" << depth;
+    if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
+      ss << ", ";
+    }
+  }
+  if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
+    ss << "stencil=0b" << std::bitset<8>(stencil) << ")";
+  }
+  return std::string(".clear(") + ss.str() + ")";
+}
+
+std::string StateSet::serialize() const
+{
+  /* TODO(@fclem): Better serialization... */
+  return std::string(".state_set(") + std::to_string(new_state) + ")";
+}
+
+std::string StencilSet::serialize() const
+{
+  std::stringstream ss;
+  ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", compare_mask=0b"
+     << std::bitset<8>(compare_mask) << ", reference=0b" << std::bitset<8>(reference);
+  return ss.str();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Commands buffers binding / command / resource ID generation
+ * \{ */
+
+void DrawCommandBuf::bind(RecordingState &state,
+                          Vector<Header, 0> &headers,
+                          Vector<Undetermined, 0> &commands)
+{
+  UNUSED_VARS(headers, commands);
+
+  resource_id_count_ = 0;
+
+  for (const Header &header : headers) {
+    if (header.type != Type::Draw) {
+      continue;
+    }
+
+    Draw &cmd = commands[header.index].draw;
+
+    int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
+    /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+    GPU_batch_draw_parameter_get(
+        cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len);
+    /* Instancing attributes are not supported using the new pipeline since we use the base
+     * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+    BLI_assert(batch_inst_len == 1);
+
+    if (cmd.vertex_len == (uint)-1) {
+      cmd.vertex_len = batch_vert_len;
+    }
+
+    if (cmd.handle.raw > 0) {
+      /* Save correct offset to start of resource_id buffer region for this draw. */
+      uint instance_first = resource_id_count_;
+      resource_id_count_ += cmd.instance_len;
+      /* Ensure the buffer is big enough. */
+      resource_id_buf_.get_or_resize(resource_id_count_ - 1);
+
+      /* Copy the resource id for all instances. */
+      uint index = cmd.handle.resource_index();
+      for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
+        resource_id_buf_[i] = index;
+      }
+    }
+  }
+
+  resource_id_buf_.push_update();
+
+  if (GPU_shader_draw_parameters_support() == false) {
+    state.resource_id_buf = resource_id_buf_;
+  }
+  else {
+    GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
+  }
+}
+
+void DrawMultiBuf::bind(RecordingState &state,
+                        Vector<Header, 0> &headers,
+                        Vector<Undetermined, 0> &commands,
+                        VisibilityBuf &visibility_buf)
+{
+  UNUSED_VARS(headers, commands);
+
+  GPU_debug_group_begin("DrawMultiBuf.bind");
+
+  resource_id_count_ = 0u;
+  for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) {
+    /* Compute prefix sum of all instance of previous group. */
+    group.start = resource_id_count_;
+    resource_id_count_ += group.len;
+
+    int batch_inst_len;
+    /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+    GPU_batch_draw_parameter_get(group.gpu_batch,
+                                 &group.vertex_len,
+                                 &group.vertex_first,
+                                 &group.base_index,
+                                 &batch_inst_len);
+
+    /* Instancing attributes are not supported using the new pipeline since we use the base
+     * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+    BLI_assert(batch_inst_len == 1);
+    UNUSED_VARS_NDEBUG(batch_inst_len);
+
+    /* Now that we got the batch information, we can set the counters to 0. */
+    group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
+  }
+
+  group_buf_.push_update();
+  prototype_buf_.push_update();
+  /* Allocate enough for the expansion pass. */
+  resource_id_buf_.get_or_resize(resource_id_count_);
+  /* Two command per group. */
+  command_buf_.get_or_resize(group_count_ * 2);
+
+  if (prototype_count_ > 0) {
+    GPUShader *shader = DRW_shader_draw_command_generate_get();
+    GPU_shader_bind(shader);
+    GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_);
+    GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo(shader, "group_buf"));
+    GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo(shader, "visibility_buf"));
+    GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo(shader, "prototype_buf"));
+    GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo(shader, "command_buf"));
+    GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
+    GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1);
+    if (GPU_shader_draw_parameters_support() == false) {
+      GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+      state.resource_id_buf = resource_id_buf_;
+    }
+    else {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
+/** \} */
+
+};  // namespace blender::draw::command
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh
new file mode 100644
index 00000000000..46a9199a267
--- /dev/null
+++ b/source/blender/draw/intern/draw_command.hh
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Commands stored inside draw passes. Converted into GPU commands upon pass submission.
+ *
+ * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or
+ * `DrawMultiBuf`. See implementation details at their definition.
+ */
+
+#include "BKE_global.h"
+#include "BLI_map.hh"
+#include "DRW_gpu_wrapper.hh"
+
+#include "draw_command_shared.hh"
+#include "draw_handle.hh"
+#include "draw_state.h"
+#include "draw_view.hh"
+
+namespace blender::draw::command {
+
+class DrawCommandBuf;
+class DrawMultiBuf;
+
+/* -------------------------------------------------------------------- */
+/** \name Recording State
+ * \{ */
+
+/**
+ * Command recording state.
+ * Keep track of several states and avoid redundant state changes.
+ */
+struct RecordingState {
+  GPUShader *shader = nullptr;
+  bool front_facing = true;
+  bool inverted_view = false;
+  DRWState pipeline_state = DRW_STATE_NO_DRAW;
+  int view_clip_plane_count = 0;
+  /** Used for gl_BaseInstance workaround. */
+  GPUStorageBuf *resource_id_buf = nullptr;
+
+  void front_facing_set(bool facing)
+  {
+    /* Facing is inverted if view is not in expected handedness. */
+    facing = this->inverted_view == facing;
+    /* Remove redundant changes. */
+    if (assign_if_different(this->front_facing, facing)) {
+      GPU_front_facing(!facing);
+    }
+  }
+
+  void cleanup()
+  {
+    if (front_facing == false) {
+      GPU_front_facing(false);
+    }
+
+    if (G.debug & G_DEBUG_GPU) {
+      GPU_storagebuf_unbind_all();
+      GPU_texture_image_unbind_all();
+      GPU_texture_unbind_all();
+      GPU_uniformbuf_unbind_all();
+    }
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Regular Commands
+ * \{ */
+
+enum class Type : uint8_t {
+  /**
+   * None Type commands are either uninitialized or are repurposed as data storage.
+   * They are skipped during submission.
+   */
+  None = 0,
+
+  /** Commands stored as Undetermined in regular command buffer. */
+  Barrier,
+  Clear,
+  Dispatch,
+  DispatchIndirect,
+  Draw,
+  DrawIndirect,
+  PushConstant,
+  ResourceBind,
+  ShaderBind,
+  StateSet,
+  StencilSet,
+
+  /** Special commands stored in separate buffers. */
+  SubPass,
+  DrawMulti,
+};
+
+/**
+ * The index of the group is implicit since it is known by the one who want to
+ * access it. This also allows to have an indexed object to split the command
+ * stream.
+ */
+struct Header {
+  /** Command type. */
+  Type type;
+  /** Command index in command heap of this type. */
+  uint index;
+};
+
+struct ShaderBind {
+  GPUShader *shader;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct ResourceBind {
+  eGPUSamplerState sampler;
+  int slot;
+  bool is_reference;
+
+  enum class Type : uint8_t {
+    Sampler = 0,
+    Image,
+    UniformBuf,
+    StorageBuf,
+  } type;
+
+  union {
+    /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more
+     * debug info. */
+    GPUUniformBuf *uniform_buf;
+    GPUUniformBuf **uniform_buf_ref;
+    GPUStorageBuf *storage_buf;
+    GPUStorageBuf **storage_buf_ref;
+    /** NOTE: Texture is used for both Sampler and Image binds. */
+    GPUTexture *texture;
+    GPUTexture **texture_ref;
+  };
+
+  ResourceBind() = default;
+
+  ResourceBind(int slot_, GPUUniformBuf *res)
+      : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){};
+  ResourceBind(int slot_, GPUUniformBuf **res)
+      : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){};
+  ResourceBind(int slot_, GPUStorageBuf *res)
+      : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){};
+  ResourceBind(int slot_, GPUStorageBuf **res)
+      : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){};
+  ResourceBind(int slot_, draw::Image *res)
+      : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){};
+  ResourceBind(int slot_, draw::Image **res)
+      : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){};
+  ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state)
+      : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){};
+  ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state)
+      : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){};
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct PushConstant {
+  int location;
+  uint8_t array_len;
+  uint8_t comp_len;
+  enum class Type : uint8_t {
+    IntValue = 0,
+    FloatValue,
+    IntReference,
+    FloatReference,
+  } type;
+  /**
+   * IMPORTANT: Data is at the end of the struct as it can span over the next commands.
+   * These next commands are not real commands but just memory to hold the data and are not
+   * referenced by any Command::Header.
+   * This is a hack to support float4x4 copy.
+   */
+  union {
+    int int1_value;
+    int2 int2_value;
+    int3 int3_value;
+    int4 int4_value;
+    float float1_value;
+    float2 float2_value;
+    float3 float3_value;
+    float4 float4_value;
+    const int *int_ref;
+    const int2 *int2_ref;
+    const int3 *int3_ref;
+    const int4 *int4_ref;
+    const float *float_ref;
+    const float2 *float2_ref;
+    const float3 *float3_ref;
+    const float4 *float4_ref;
+    const float4x4 *float4x4_ref;
+  };
+
+  PushConstant() = default;
+
+  PushConstant(int loc, const float &val)
+      : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){};
+  PushConstant(int loc, const float2 &val)
+      : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){};
+  PushConstant(int loc, const float3 &val)
+      : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){};
+  PushConstant(int loc, const float4 &val)
+      : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){};
+
+  PushConstant(int loc, const int &val)
+      : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){};
+  PushConstant(int loc, const int2 &val)
+      : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){};
+  PushConstant(int loc, const int3 &val)
+      : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){};
+  PushConstant(int loc, const int4 &val)
+      : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){};
+
+  PushConstant(int loc, const float *val, int arr)
+      : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){};
+  PushConstant(int loc, const float2 *val, int arr)
+      : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){};
+  PushConstant(int loc, const float3 *val, int arr)
+      : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){};
+  PushConstant(int loc, const float4 *val, int arr)
+      : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){};
+  PushConstant(int loc, const float4x4 *val)
+      : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){};
+
+  PushConstant(int loc, const int *val, int arr)
+      : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){};
+  PushConstant(int loc, const int2 *val, int arr)
+      : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){};
+  PushConstant(int loc, const int3 *val, int arr)
+      : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){};
+  PushConstant(int loc, const int4 *val, int arr)
+      : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){};
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Draw {
+  GPUBatch *batch;
+  uint instance_len;
+  uint vertex_len;
+  uint vertex_first;
+  ResourceHandle handle;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct DrawMulti {
+  GPUBatch *batch;
+  DrawMultiBuf *multi_draw_buf;
+  uint group_first;
+  uint uuid;
+
+  void execute(RecordingState &state) const;
+  std::string serialize(std::string line_prefix) const;
+};
+
+struct DrawIndirect {
+  GPUBatch *batch;
+  GPUStorageBuf **indirect_buf;
+  ResourceHandle handle;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Dispatch {
+  bool is_reference;
+  union {
+    int3 size;
+    int3 *size_ref;
+  };
+
+  Dispatch() = default;
+
+  Dispatch(int3 group_len) : is_reference(false), size(group_len){};
+  Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){};
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct DispatchIndirect {
+  GPUStorageBuf **indirect_buf;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Barrier {
+  eGPUBarrier type;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct Clear {
+  uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */
+  uint8_t stencil;
+  float depth;
+  float4 color;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct StateSet {
+  DRWState new_state;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct StencilSet {
+  uint write_mask;
+  uint compare_mask;
+  uint reference;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+union Undetermined {
+  ShaderBind shader_bind;
+  ResourceBind resource_bind;
+  PushConstant push_constant;
+  Draw draw;
+  DrawMulti draw_multi;
+  DrawIndirect draw_indirect;
+  Dispatch dispatch;
+  DispatchIndirect dispatch_indirect;
+  Barrier barrier;
+  Clear clear;
+  StateSet state_set;
+  StencilSet stencil_set;
+};
+
+/** Try to keep the command size as low as possible for performance. */
+BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.")
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Commands
+ *
+ * A draw command buffer used to issue single draw commands without instance merging or any
+ * other optimizations.
+ *
+ * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands.
+ *
+ * \{ */
+
+class DrawCommandBuf {
+  friend Manager;
+
+ private:
+  using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>;
+
+  /** Array of resource id. One per instance. Generated on GPU and send to GPU. */
+  ResourceIdBuf resource_id_buf_;
+  /** Used items in the resource_id_buf_. Not it's allocated length. */
+  uint resource_id_count_ = 0;
+
+ public:
+  void clear(){};
+
+  void append_draw(Vector<Header, 0> &headers,
+                   Vector<Undetermined, 0> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    vertex_first = vertex_first != -1 ? vertex_first : 0;
+    instance_len = instance_len != -1 ? instance_len : 1;
+
+    int64_t index = commands.append_and_get_index({});
+    headers.append({Type::Draw, static_cast<uint>(index)});
+    commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle};
+  }
+
+  void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands);
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Multi Draw Commands
+ *
+ * For efficient rendering of large scene we strive to minimize the number of draw call and state
+ * changes. To this end, we group many rendering commands and sort them per render state using
+ * `DrawGroup` as a container. This is done automatically for any successive commands with the
+ * same state.
+ *
+ * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti`
+ * (Pipeline State).
+ *
+ * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect
+ * command.
+ *
+ * To support this arbitrary reordering, we only need to know the offset of all the commands for a
+ * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to
+ * GPU and used on CPU to issue the right command indirect.
+ *
+ * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the
+ * GPU.
+ *
+ * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or
+ * not depending on visibility) to the correct draw command using the offset of the `DrawGroup`
+ * computed on CPU. After that, it also outputs one resource ID for each instance inside a
+ * `DrawPrototype`.
+ *
+ * \{ */
+
+class DrawMultiBuf {
+  friend Manager;
+  friend DrawMulti;
+
+ private:
+  using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>;
+  using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>;
+  using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>;
+  using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>;
+
+  using DrawGroupKey = std::pair<uint, GPUBatch *>;
+  using DrawGroupMap = Map<DrawGroupKey, uint>;
+  /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */
+  DrawGroupMap group_ids_;
+
+  /** DrawGroup Command heap. Uploaded to GPU for sorting. */
+  DrawGroupBuf group_buf_ = {"DrawGroupBuf"};
+  /** Command Prototypes. Unsorted */
+  DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"};
+  /** Command list generated by the sorting / compaction steps. Lives on GPU. */
+  DrawCommandBuf command_buf_ = {"DrawCommandBuf"};
+  /** Array of resource id. One per instance. Lives on GPU. */
+  ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"};
+  /** Give unique ID to each header so we can use that as hash key. */
+  uint header_id_counter_ = 0;
+  /** Number of groups inside group_buf_. */
+  uint group_count_ = 0;
+  /** Number of prototype command inside prototype_buf_. */
+  uint prototype_count_ = 0;
+  /** Used items in the resource_id_buf_. Not it's allocated length. */
+  uint resource_id_count_ = 0;
+
+ public:
+  void clear()
+  {
+    header_id_counter_ = 0;
+    group_count_ = 0;
+    prototype_count_ = 0;
+    group_ids_.clear();
+  }
+
+  void append_draw(Vector<Header, 0> &headers,
+                   Vector<Undetermined, 0> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    /* Unsupported for now. Use PassSimple. */
+    BLI_assert(vertex_first == 0 || vertex_first == -1);
+    BLI_assert(vertex_len == -1);
+    UNUSED_VARS_NDEBUG(vertex_len, vertex_first);
+
+    instance_len = instance_len != -1 ? instance_len : 1;
+
+    /* If there was some state changes since previous call, we have to create another command. */
+    if (headers.is_empty() || headers.last().type != Type::DrawMulti) {
+      uint index = commands.append_and_get_index({});
+      headers.append({Type::DrawMulti, index});
+      commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++};
+    }
+
+    DrawMulti &cmd = commands.last().draw_multi;
+
+    uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1);
+
+    bool inverted = handle.has_inverted_handedness();
+
+    if (group_id == (uint)-1) {
+      uint new_group_id = group_count_++;
+
+      DrawGroup &group = group_buf_.get_or_resize(new_group_id);
+      group.next = cmd.group_first;
+      group.len = instance_len;
+      group.front_facing_len = inverted ? 0 : instance_len;
+      group.gpu_batch = batch;
+      group.front_proto_len = 0;
+      group.back_proto_len = 0;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
+      /* Append to list. */
+      cmd.group_first = new_group_id;
+      group_id = new_group_id;
+    }
+    else {
+      DrawGroup &group = group_buf_[group_id];
+      group.len += instance_len;
+      group.front_facing_len += inverted ? 0 : instance_len;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
+    }
+
+    DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
+    draw.group_id = group_id;
+    draw.resource_handle = handle.raw;
+    draw.instance_len = instance_len;
+  }
+
+  void bind(RecordingState &state,
+            Vector<Header, 0> &headers,
+            Vector<Undetermined, 0> &commands,
+            VisibilityBuf &visibility_buf);
+};
+
+/** \} */
+
+};  // namespace blender::draw::command
diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh
new file mode 100644
index 00000000000..9fbbe23f0ce
--- /dev/null
+++ b/source/blender/draw/intern/draw_command_shared.hh
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#ifndef GPU_SHADER
+#  include "BLI_span.hh"
+#  include "GPU_shader_shared_utils.h"
+
+namespace blender::draw::command {
+
+struct RecordingState;
+
+#endif
+
+/* -------------------------------------------------------------------- */
+/** \name Multi Draw
+ * \{ */
+
+/**
+ * A DrawGroup allow to split the command stream into batch-able chunks of commands with
+ * the same render state.
+ */
+struct DrawGroup {
+  /** Index of next #DrawGroup from the same header. */
+  uint next;
+
+  /** Index of the first instances after sorting. */
+  uint start;
+  /** Total number of instances (including inverted facing). Needed to issue the draw call. */
+  uint len;
+  /** Number of non inverted scaling instances in this Group. */
+  uint front_facing_len;
+
+  /** #GPUBatch values to be copied to #DrawCommand after sorting (if not overridden). */
+  int vertex_len;
+  int vertex_first;
+  int base_index;
+
+  /** Atomic counters used during command sorting. */
+  uint total_counter;
+
+#ifndef GPU_SHADER
+  /* NOTE: Union just to make sure the struct has always the same size on all platform. */
+  union {
+    struct {
+      /** For debug printing only. */
+      uint front_proto_len;
+      uint back_proto_len;
+      /** Needed to create the correct draw call. */
+      GPUBatch *gpu_batch;
+    };
+    struct {
+#endif
+      uint front_facing_counter;
+      uint back_facing_counter;
+      uint _pad0, _pad1;
+#ifndef GPU_SHADER
+    };
+  };
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(DrawGroup, 16)
+
+/**
+ * Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then
+ * converted into #DrawCommand on GPU after visibility and compaction. Multiple
+ * #DrawPrototype might get merged into the same final #DrawCommand.
+ */
+struct DrawPrototype {
+  /* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */
+  uint group_id;
+  /* Resource handle associated with this call. Also reference visibility. */
+  uint resource_handle;
+  /* Number of instances. */
+  uint instance_len;
+  uint _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(DrawPrototype, 16)
+
+/** \} */
+
+#ifndef GPU_SHADER
+};  // namespace blender::draw::command
+#endif
diff --git a/source/blender/draw/intern/draw_common.c b/source/blender/draw/intern/draw_common.c
index 0f330dbb519..c1b4c3c1f81 100644
--- a/source/blender/draw/intern/draw_common.c
+++ b/source/blender/draw/intern/draw_common.c
@@ -280,10 +280,11 @@ int DRW_object_wire_theme_get(Object *ob, ViewLayer *view_layer, float **r_color
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
   const bool is_edit = (draw_ctx->object_mode & OB_MODE_EDIT) && (ob->mode & OB_MODE_EDIT);
-  const bool active = view_layer->basact &&
-                      ((ob->base_flag & BASE_FROM_DUPLI) ?
-                           (DRW_object_get_dupli_parent(ob) == view_layer->basact->object) :
-                           (view_layer->basact->object == ob));
+  BKE_view_layer_synced_ensure(draw_ctx->scene, view_layer);
+  const Base *base = BKE_view_layer_active_base_get(view_layer);
+  const bool active = base && ((ob->base_flag & BASE_FROM_DUPLI) ?
+                                   (DRW_object_get_dupli_parent(ob) == base->object) :
+                                   (base->object == ob));
 
   /* confusing logic here, there are 2 methods of setting the color
    * 'colortab[colindex]' and 'theme_id', colindex overrides theme_id.
@@ -417,7 +418,6 @@ bool DRW_object_is_flat(Object *ob, int *r_axis)
             OB_CURVES_LEGACY,
             OB_SURF,
             OB_FONT,
-            OB_MBALL,
             OB_CURVES,
             OB_POINTCLOUD,
             OB_VOLUME)) {
diff --git a/source/blender/draw/intern/draw_common_shader_shared.h b/source/blender/draw/intern/draw_common_shader_shared.h
index c9819d9da87..57cb7880ce6 100644
--- a/source/blender/draw/intern/draw_common_shader_shared.h
+++ b/source/blender/draw/intern/draw_common_shader_shared.h
@@ -19,7 +19,7 @@ typedef struct GlobalsUboStorage GlobalsUboStorage;
 #define UBO_LAST_COLOR color_uv_shadow
 
 /* Used as ubo but colors can be directly referenced as well */
-/* NOTE: Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */
+/* \note Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */
 struct GlobalsUboStorage {
   /* UBOs data needs to be 16 byte aligned (size of vec4) */
   float4 color_wire;
diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc
index c40f2275968..a61769e7a63 100644
--- a/source/blender/draw/intern/draw_curves.cc
+++ b/source/blender/draw/intern/draw_curves.cc
@@ -33,25 +33,17 @@
 #include "draw_manager.h"
 #include "draw_shader.h"
 
-#ifndef __APPLE__
-#  define USE_TRANSFORM_FEEDBACK
-#  define USE_COMPUTE_SHADERS
-#endif
-
 BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
 {
-#ifdef USE_COMPUTE_SHADERS
   if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
     return PART_REFINE_SHADER_COMPUTE;
   }
-#endif
-#ifdef USE_TRANSFORM_FEEDBACK
-  return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
-#endif
+  if (GPU_transform_feedback_support()) {
+    return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+  }
   return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
 }
 
-#ifndef USE_TRANSFORM_FEEDBACK
 struct CurvesEvalCall {
   struct CurvesEvalCall *next;
   GPUVertBuf *vbo;
@@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr;
 static int g_tf_id_offset;
 static int g_tf_target_width;
 static int g_tf_target_height;
-#endif
 
 static GPUVertBuf *g_dummy_vbo = nullptr;
 static GPUTexture *g_dummy_texture = nullptr;
@@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data)
   CurvesUniformBufPool *pool = drw_data->curves_ubos;
   pool->reset();
 
-#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
-  g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
-#else
-  g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
-#endif
+  if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
+    g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
+  }
+  else {
+    g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
+  }
 
   if (g_dummy_vbo == nullptr) {
     /* initialize vertex format */
     GPUVertFormat format = {0};
     uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+    g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
     const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
     GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
@@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
 {
   GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
 
-#ifdef USE_TRANSFORM_FEEDBACK
-  DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
-#else
-  DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
-
-  CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
-  pr_call->next = g_tf_calls;
-  pr_call->vbo = vbo;
-  pr_call->shgrp = tf_shgrp;
-  pr_call->vert_len = final_points_len;
-  g_tf_calls = pr_call;
-  DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
-  DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
-  DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
-#endif
+  DRWShadingGroup *tf_shgrp = nullptr;
+  if (GPU_transform_feedback_support()) {
+    tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
+  }
+  else {
+    tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+    CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
+    pr_call->next = g_tf_calls;
+    pr_call->vbo = vbo;
+    pr_call->shgrp = tf_shgrp;
+    pr_call->vert_len = final_points_len;
+    g_tf_calls = pr_call;
+    DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+    DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+    DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+  }
+  BLI_assert(tf_shgrp != nullptr);
 
   drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv);
   DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
@@ -246,13 +242,14 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, c
   }
 }
 
-static CurvesEvalCache *drw_curves_cache_get(Object *object,
+static CurvesEvalCache *drw_curves_cache_get(Curves &curves,
                                              GPUMaterial *gpu_material,
                                              int subdiv,
                                              int thickness_res)
 {
   CurvesEvalCache *cache;
-  bool update = curves_ensure_procedural_data(object, &cache, gpu_material, subdiv, thickness_res);
+  const bool update = curves_ensure_procedural_data(
+      &curves, &cache, gpu_material, subdiv, thickness_res);
 
   if (update) {
     if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
@@ -268,12 +265,13 @@ static CurvesEvalCache *drw_curves_cache_get(Object *object,
 GPUVertBuf *DRW_curves_pos_buffer_get(Object *object)
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
-  Scene *scene = draw_ctx->scene;
+  const Scene *scene = draw_ctx->scene;
 
-  int subdiv = scene->r.hair_subdiv;
-  int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
+  const int subdiv = scene->r.hair_subdiv;
+  const int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
 
-  CurvesEvalCache *cache = drw_curves_cache_get(object, nullptr, subdiv, thickness_res);
+  Curves &curves = *static_cast<Curves *>(object->data);
+  CurvesEvalCache *cache = drw_curves_cache_get(curves, nullptr, subdiv, thickness_res);
 
   return cache->final[subdiv].proc_buf;
 }
@@ -303,15 +301,16 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
                                                GPUMaterial *gpu_material)
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
-  Scene *scene = draw_ctx->scene;
+  const Scene *scene = draw_ctx->scene;
   CurvesUniformBufPool *pool = DST.vmempool->curves_ubos;
   CurvesInfosBuf &curves_infos = pool->alloc();
+  Curves &curves_id = *static_cast<Curves *>(object->data);
 
-  int subdiv = scene->r.hair_subdiv;
-  int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
+  const int subdiv = scene->r.hair_subdiv;
+  const int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
 
   CurvesEvalCache *curves_cache = drw_curves_cache_get(
-      object, gpu_material, subdiv, thickness_res);
+      curves_id, gpu_material, subdiv, thickness_res);
 
   DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
 
@@ -330,13 +329,10 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
 
   /* Use the radius of the root and tip of the first curve for now. This is a workaround that we
    * use for now because we can't use a per-point radius yet. */
-  Curves &curves_id = *static_cast<Curves *>(object->data);
   const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap(
       curves_id.geometry);
   if (curves.curves_num() >= 1) {
-    CurveComponent curves_component;
-    curves_component.replace(&curves_id, GeometryOwnershipType::ReadOnly);
-    blender::VArray<float> radii = curves_component.attribute_get_for_read(
+    blender::VArray<float> radii = curves.attributes().lookup_or_default(
         "radius", ATTR_DOMAIN_POINT, 0.005f);
     const blender::IndexRange first_curve_points = curves.points_for_curve(0);
     const float first_radius = radii[first_curve_points.first()];
@@ -383,7 +379,7 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
      * attributes. */
     const int index = attribute_index_in_material(gpu_material, request.attribute_name);
     if (index != -1) {
-      curves_infos.is_point_attribute[index] = request.domain == ATTR_DOMAIN_POINT;
+      curves_infos.is_point_attribute[index][0] = request.domain == ATTR_DOMAIN_POINT;
     }
   }
 
@@ -411,82 +407,118 @@ void DRW_curves_update()
   /* Update legacy hair too, to avoid verbosity in callers. */
   DRW_hair_update();
 
-#ifndef USE_TRANSFORM_FEEDBACK
-  /**
-   * Workaround to transform feedback not working on mac.
-   * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
-   *
-   * So instead of using transform feedback we render to a texture,
-   * read back the result to system memory and re-upload as VBO data.
-   * It is really not ideal performance wise, but it is the simplest
-   * and the most local workaround that still uses the power of the GPU.
-   */
-
-  if (g_tf_calls == nullptr) {
-    return;
-  }
+  if (!GPU_transform_feedback_support()) {
+    /**
+     * Workaround to transform feedback not working on mac.
+     * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+     *
+     * So instead of using transform feedback we render to a texture,
+     * read back the result to system memory and re-upload as VBO data.
+     * It is really not ideal performance wise, but it is the simplest
+     * and the most local workaround that still uses the power of the GPU.
+     */
+
+    if (g_tf_calls == nullptr) {
+      return;
+    }
 
-  /* Search ideal buffer size. */
-  uint max_size = 0;
-  for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
-    max_size = max_ii(max_size, pr_call->vert_len);
-  }
+    /* Search ideal buffer size. */
+    uint max_size = 0;
+    for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+      max_size = max_ii(max_size, pr_call->vert_len);
+    }
+
+    /* Create target Texture / Frame-buffer */
+    /* Don't use max size as it can be really heavy and fail.
+     * Do chunks of maximum 2048 * 2048 hair points. */
+    int width = 2048;
+    int height = min_ii(width, 1 + max_size / width);
+    GPUTexture *tex = DRW_texture_pool_query_2d(
+        width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
+    g_tf_target_height = height;
+    g_tf_target_width = width;
+
+    GPUFrameBuffer *fb = nullptr;
+    GPU_framebuffer_ensure_config(&fb,
+                                  {
+                                      GPU_ATTACHMENT_NONE,
+                                      GPU_ATTACHMENT_TEXTURE(tex),
+                                  });
+
+    float *data = static_cast<float *>(
+        MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
+
+    GPU_framebuffer_bind(fb);
+    while (g_tf_calls != nullptr) {
+      CurvesEvalCall *pr_call = g_tf_calls;
+      g_tf_calls = g_tf_calls->next;
+
+      g_tf_id_offset = 0;
+      while (pr_call->vert_len > 0) {
+        int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+        DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+        /* Read back result to main memory. */
+        GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+        /* Upload back to VBO. */
+        GPU_vertbuf_use(pr_call->vbo);
+        GPU_vertbuf_update_sub(pr_call->vbo,
+                               sizeof(float[4]) * g_tf_id_offset,
+                               sizeof(float[4]) * max_read_px_len,
+                               data);
+
+        g_tf_id_offset += max_read_px_len;
+        pr_call->vert_len -= max_read_px_len;
+      }
 
-  /* Create target Texture / Frame-buffer */
-  /* Don't use max size as it can be really heavy and fail.
-   * Do chunks of maximum 2048 * 2048 hair points. */
-  int width = 2048;
-  int height = min_ii(width, 1 + max_size / width);
-  GPUTexture *tex = DRW_texture_pool_query_2d(
-      width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
-  g_tf_target_height = height;
-  g_tf_target_width = width;
-
-  GPUFrameBuffer *fb = nullptr;
-  GPU_framebuffer_ensure_config(&fb,
-                                {
-                                    GPU_ATTACHMENT_NONE,
-                                    GPU_ATTACHMENT_TEXTURE(tex),
-                                });
-
-  float *data = static_cast<float *>(
-      MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
-
-  GPU_framebuffer_bind(fb);
-  while (g_tf_calls != nullptr) {
-    CurvesEvalCall *pr_call = g_tf_calls;
-    g_tf_calls = g_tf_calls->next;
-
-    g_tf_id_offset = 0;
-    while (pr_call->vert_len > 0) {
-      int max_read_px_len = min_ii(width * height, pr_call->vert_len);
-
-      DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
-      /* Read back result to main memory. */
-      GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
-      /* Upload back to VBO. */
-      GPU_vertbuf_use(pr_call->vbo);
-      GPU_vertbuf_update_sub(pr_call->vbo,
-                             sizeof(float[4]) * g_tf_id_offset,
-                             sizeof(float[4]) * max_read_px_len,
-                             data);
-
-      g_tf_id_offset += max_read_px_len;
-      pr_call->vert_len -= max_read_px_len;
+      MEM_freeN(pr_call);
     }
 
-    MEM_freeN(pr_call);
+    MEM_freeN(data);
+    GPU_framebuffer_free(fb);
   }
+  else {
+    /* NOTE(Metal): If compute is not supported, bind a temporary frame-buffer to avoid
+     * side-effects from rendering in the active buffer.
+     * We also need to guarantee that a Frame-buffer is active to perform any rendering work,
+     * even if there is no output */
+    GPUFrameBuffer *temp_fb = nullptr;
+    GPUFrameBuffer *prev_fb = nullptr;
+    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+      if (!GPU_compute_shader_support()) {
+        prev_fb = GPU_framebuffer_active_get();
+        char errorOut[256];
+        /* if the frame-buffer is invalid we need a dummy frame-buffer to be bound. */
+        if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
+          int width = 64;
+          int height = 64;
+          GPUTexture *tex = DRW_texture_pool_query_2d(
+              width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
+          g_tf_target_height = height;
+          g_tf_target_width = width;
+
+          GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
+
+          GPU_framebuffer_bind(temp_fb);
+        }
+      }
+    }
 
-  MEM_freeN(data);
-  GPU_framebuffer_free(fb);
-#else
-  /* Just render the pass when using compute shaders or transform feedback. */
-  DRW_draw_pass(g_tf_pass);
-  if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
-    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    /* Just render the pass when using compute shaders or transform feedback. */
+    DRW_draw_pass(g_tf_pass);
+    if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
+
+    /* Release temporary frame-buffer. */
+    if (temp_fb != nullptr) {
+      GPU_framebuffer_free(temp_fb);
+    }
+    /* Rebind existing frame-buffer */
+    if (prev_fb != nullptr) {
+      GPU_framebuffer_bind(prev_fb);
+    }
   }
-#endif
 }
 
 void DRW_curves_free()
diff --git a/source/blender/draw/intern/draw_curves_private.h b/source/blender/draw/intern/draw_curves_private.h
index 7d54e1089d6..31122ed5248 100644
--- a/source/blender/draw/intern/draw_curves_private.h
+++ b/source/blender/draw/intern/draw_curves_private.h
@@ -16,6 +16,12 @@
 extern "C" {
 #endif
 
+struct Curves;
+struct GPUVertBuf;
+struct GPUIndexBuf;
+struct GPUBatch;
+struct GPUTexture;
+
 #define MAX_THICKRES 2    /* see eHairType */
 #define MAX_HAIR_SUBDIV 4 /* see hair_subdiv rna */
 
@@ -25,11 +31,6 @@ typedef enum CurvesEvalShader {
 } CurvesEvalShader;
 #define CURVES_EVAL_SHADER_NUM 3
 
-struct GPUVertBuf;
-struct GPUIndexBuf;
-struct GPUBatch;
-struct GPUTexture;
-
 typedef struct CurvesEvalFinalCache {
   /* Output of the subdivision stage: vertex buffer sized to subdiv level. */
   GPUVertBuf *proc_buf;
@@ -95,7 +96,7 @@ typedef struct CurvesEvalCache {
 /**
  * Ensure all necessary textures and buffers exist for GPU accelerated drawing.
  */
-bool curves_ensure_procedural_data(struct Object *object,
+bool curves_ensure_procedural_data(struct Curves *curves,
                                    struct CurvesEvalCache **r_hair_cache,
                                    struct GPUMaterial *gpu_material,
                                    int subdiv,
diff --git a/source/blender/draw/intern/draw_debug.c b/source/blender/draw/intern/draw_debug.c
deleted file mode 100644
index b568119627e..00000000000
--- a/source/blender/draw/intern/draw_debug.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2018 Blender Foundation. */
-
-/** \file
- * \ingroup draw
- *
- * \brief Simple API to draw debug shapes in the viewport.
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "DNA_object_types.h"
-
-#include "BKE_object.h"
-
-#include "BLI_link_utils.h"
-
-#include "GPU_immediate.h"
-#include "GPU_matrix.h"
-
-#include "draw_debug.h"
-#include "draw_manager.h"
-
-/* --------- Register --------- */
-
-/* Matrix applied to all points before drawing. Could be a stack if needed. */
-static float g_modelmat[4][4];
-
-void DRW_debug_modelmat_reset(void)
-{
-  unit_m4(g_modelmat);
-}
-
-void DRW_debug_modelmat(const float modelmat[4][4])
-{
-  copy_m4_m4(g_modelmat, modelmat);
-}
-
-void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4])
-{
-  DRWDebugLine *line = MEM_mallocN(sizeof(DRWDebugLine), "DRWDebugLine");
-  mul_v3_m4v3(line->pos[0], g_modelmat, v1);
-  mul_v3_m4v3(line->pos[1], g_modelmat, v2);
-  copy_v4_v4(line->color, color);
-  BLI_LINKS_PREPEND(DST.debug.lines, line);
-}
-
-void DRW_debug_polygon_v3(const float (*v)[3], const int vert_len, const float color[4])
-{
-  BLI_assert(vert_len > 1);
-
-  for (int i = 0; i < vert_len; i++) {
-    DRW_debug_line_v3v3(v[i], v[(i + 1) % vert_len], color);
-  }
-}
-
-void DRW_debug_m4(const float m[4][4])
-{
-  float v0[3] = {0.0f, 0.0f, 0.0f};
-  float v1[3] = {1.0f, 0.0f, 0.0f};
-  float v2[3] = {0.0f, 1.0f, 0.0f};
-  float v3[3] = {0.0f, 0.0f, 1.0f};
-
-  mul_m4_v3(m, v0);
-  mul_m4_v3(m, v1);
-  mul_m4_v3(m, v2);
-  mul_m4_v3(m, v3);
-
-  DRW_debug_line_v3v3(v0, v1, (float[4]){1.0f, 0.0f, 0.0f, 1.0f});
-  DRW_debug_line_v3v3(v0, v2, (float[4]){0.0f, 1.0f, 0.0f, 1.0f});
-  DRW_debug_line_v3v3(v0, v3, (float[4]){0.0f, 0.0f, 1.0f, 1.0f});
-}
-
-void DRW_debug_bbox(const BoundBox *bbox, const float color[4])
-{
-  DRW_debug_line_v3v3(bbox->vec[0], bbox->vec[1], color);
-  DRW_debug_line_v3v3(bbox->vec[1], bbox->vec[2], color);
-  DRW_debug_line_v3v3(bbox->vec[2], bbox->vec[3], color);
-  DRW_debug_line_v3v3(bbox->vec[3], bbox->vec[0], color);
-
-  DRW_debug_line_v3v3(bbox->vec[4], bbox->vec[5], color);
-  DRW_debug_line_v3v3(bbox->vec[5], bbox->vec[6], color);
-  DRW_debug_line_v3v3(bbox->vec[6], bbox->vec[7], color);
-  DRW_debug_line_v3v3(bbox->vec[7], bbox->vec[4], color);
-
-  DRW_debug_line_v3v3(bbox->vec[0], bbox->vec[4], color);
-  DRW_debug_line_v3v3(bbox->vec[1], bbox->vec[5], color);
-  DRW_debug_line_v3v3(bbox->vec[2], bbox->vec[6], color);
-  DRW_debug_line_v3v3(bbox->vec[3], bbox->vec[7], color);
-}
-
-void DRW_debug_m4_as_bbox(const float m[4][4], const float color[4], const bool invert)
-{
-  BoundBox bb;
-  const float min[3] = {-1.0f, -1.0f, -1.0f}, max[3] = {1.0f, 1.0f, 1.0f};
-  float project_matrix[4][4];
-  if (invert) {
-    invert_m4_m4(project_matrix, m);
-  }
-  else {
-    copy_m4_m4(project_matrix, m);
-  }
-
-  BKE_boundbox_init_from_minmax(&bb, min, max);
-  for (int i = 0; i < 8; i++) {
-    mul_project_m4_v3(project_matrix, bb.vec[i]);
-  }
-  DRW_debug_bbox(&bb, color);
-}
-
-void DRW_debug_sphere(const float center[3], const float radius, const float color[4])
-{
-  float size_mat[4][4];
-  DRWDebugSphere *sphere = MEM_mallocN(sizeof(DRWDebugSphere), "DRWDebugSphere");
-  /* Bake all transform into a Matrix4 */
-  scale_m4_fl(size_mat, radius);
-  copy_m4_m4(sphere->mat, g_modelmat);
-  translate_m4(sphere->mat, center[0], center[1], center[2]);
-  mul_m4_m4m4(sphere->mat, sphere->mat, size_mat);
-
-  copy_v4_v4(sphere->color, color);
-  BLI_LINKS_PREPEND(DST.debug.spheres, sphere);
-}
-
-/* --------- Render --------- */
-
-static void drw_debug_draw_lines(void)
-{
-  int count = BLI_linklist_count((LinkNode *)DST.debug.lines);
-
-  if (count == 0) {
-    return;
-  }
-
-  GPUVertFormat *vert_format = immVertexFormat();
-  uint pos = GPU_vertformat_attr_add(vert_format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-  uint col = GPU_vertformat_attr_add(vert_format, "color", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-
-  immBindBuiltinProgram(GPU_SHADER_3D_FLAT_COLOR);
-
-  immBegin(GPU_PRIM_LINES, count * 2);
-
-  while (DST.debug.lines) {
-    void *next = DST.debug.lines->next;
-
-    immAttr4fv(col, DST.debug.lines->color);
-    immVertex3fv(pos, DST.debug.lines->pos[0]);
-
-    immAttr4fv(col, DST.debug.lines->color);
-    immVertex3fv(pos, DST.debug.lines->pos[1]);
-
-    MEM_freeN(DST.debug.lines);
-    DST.debug.lines = next;
-  }
-  immEnd();
-
-  immUnbindProgram();
-}
-
-static void drw_debug_draw_spheres(void)
-{
-  int count = BLI_linklist_count((LinkNode *)DST.debug.spheres);
-
-  if (count == 0) {
-    return;
-  }
-
-  float persmat[4][4];
-  DRW_view_persmat_get(NULL, persmat, false);
-
-  GPUBatch *empty_sphere = DRW_cache_empty_sphere_get();
-  GPU_batch_program_set_builtin(empty_sphere, GPU_SHADER_3D_UNIFORM_COLOR);
-  while (DST.debug.spheres) {
-    void *next = DST.debug.spheres->next;
-    float MVP[4][4];
-
-    mul_m4_m4m4(MVP, persmat, DST.debug.spheres->mat);
-    GPU_batch_uniform_mat4(empty_sphere, "ModelViewProjectionMatrix", MVP);
-    GPU_batch_uniform_4fv(empty_sphere, "color", DST.debug.spheres->color);
-    GPU_batch_draw(empty_sphere);
-
-    MEM_freeN(DST.debug.spheres);
-    DST.debug.spheres = next;
-  }
-}
-
-void drw_debug_draw(void)
-{
-  drw_debug_draw_lines();
-  drw_debug_draw_spheres();
-}
-
-void drw_debug_init(void)
-{
-  DRW_debug_modelmat_reset();
-}
diff --git a/source/blender/draw/intern/draw_debug.cc b/source/blender/draw/intern/draw_debug.cc
new file mode 100644
index 00000000000..b0662a42ea0
--- /dev/null
+++ b/source/blender/draw/intern/draw_debug.cc
@@ -0,0 +1,736 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2018 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ *
+ * \brief Simple API to draw debug shapes in the viewport.
+ */
+
+#include "BKE_object.h"
+#include "BLI_link_utils.h"
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_debug.h"
+
+#include "draw_debug.h"
+#include "draw_debug.hh"
+#include "draw_manager.h"
+#include "draw_shader.h"
+#include "draw_shader_shared.h"
+
+#include <iomanip>
+
+#ifdef DEBUG
+#  define DRAW_DEBUG
+#else
+/* Uncomment to forcibly enable debug draw in release mode. */
+//#define DRAW_DEBUG
+#endif
+
+namespace blender::draw {
+
+/* -------------------------------------------------------------------- */
+/** \name Init and state
+ * \{ */
+
+DebugDraw::DebugDraw()
+{
+  constexpr int circle_resolution = 16;
+  for (auto axis : IndexRange(3)) {
+    for (auto edge : IndexRange(circle_resolution)) {
+      for (auto vert : IndexRange(2)) {
+        const float angle = (2 * M_PI) * (edge + vert) / float(circle_resolution);
+        float point[3] = {cosf(angle), sinf(angle), 0.0f};
+        sphere_verts_.append(
+            float3(point[(0 + axis) % 3], point[(1 + axis) % 3], point[(2 + axis) % 3]));
+      }
+    }
+  }
+
+  constexpr int point_resolution = 4;
+  for (auto axis : IndexRange(3)) {
+    for (auto edge : IndexRange(point_resolution)) {
+      for (auto vert : IndexRange(2)) {
+        const float angle = (2 * M_PI) * (edge + vert) / float(point_resolution);
+        float point[3] = {cosf(angle), sinf(angle), 0.0f};
+        point_verts_.append(
+            float3(point[(0 + axis) % 3], point[(1 + axis) % 3], point[(2 + axis) % 3]));
+      }
+    }
+  }
+};
+
+void DebugDraw::init()
+{
+  cpu_print_buf_.command.vertex_len = 0;
+  cpu_print_buf_.command.vertex_first = 0;
+  cpu_print_buf_.command.instance_len = 1;
+  cpu_print_buf_.command.instance_first_array = 0;
+
+  cpu_draw_buf_.command.vertex_len = 0;
+  cpu_draw_buf_.command.vertex_first = 0;
+  cpu_draw_buf_.command.instance_len = 1;
+  cpu_draw_buf_.command.instance_first_array = 0;
+
+  gpu_print_buf_.command.vertex_len = 0;
+  gpu_print_buf_.command.vertex_first = 0;
+  gpu_print_buf_.command.instance_len = 1;
+  gpu_print_buf_.command.instance_first_array = 0;
+  gpu_print_buf_used = false;
+
+  gpu_draw_buf_.command.vertex_len = 0;
+  gpu_draw_buf_.command.vertex_first = 0;
+  gpu_draw_buf_.command.instance_len = 1;
+  gpu_draw_buf_.command.instance_first_array = 0;
+  gpu_draw_buf_used = false;
+
+  modelmat_reset();
+}
+
+void DebugDraw::modelmat_reset()
+{
+  model_mat_ = float4x4::identity();
+}
+
+void DebugDraw::modelmat_set(const float modelmat[4][4])
+{
+  model_mat_ = modelmat;
+}
+
+GPUStorageBuf *DebugDraw::gpu_draw_buf_get()
+{
+  BLI_assert(GPU_shader_storage_buffer_objects_support());
+  if (!gpu_draw_buf_used) {
+    gpu_draw_buf_used = true;
+    gpu_draw_buf_.push_update();
+  }
+  return gpu_draw_buf_;
+}
+
+GPUStorageBuf *DebugDraw::gpu_print_buf_get()
+{
+  BLI_assert(GPU_shader_storage_buffer_objects_support());
+  if (!gpu_print_buf_used) {
+    gpu_print_buf_used = true;
+    gpu_print_buf_.push_update();
+  }
+  return gpu_print_buf_;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw functions
+ * \{ */
+
+void DebugDraw::draw_line(float3 v1, float3 v2, float4 color)
+{
+  draw_line(v1, v2, color_pack(color));
+}
+
+void DebugDraw::draw_polygon(Span<float3> poly_verts, float4 color)
+{
+  BLI_assert(!poly_verts.is_empty());
+
+  uint col = color_pack(color);
+  float3 v0 = model_mat_ * poly_verts.last();
+  for (auto vert : poly_verts) {
+    float3 v1 = model_mat_ * vert;
+    draw_line(v0, v1, col);
+    v0 = v1;
+  }
+}
+
+void DebugDraw::draw_matrix(const float4x4 m4)
+{
+  float3 v0 = float3(0.0f, 0.0f, 0.0f);
+  float3 v1 = float3(1.0f, 0.0f, 0.0f);
+  float3 v2 = float3(0.0f, 1.0f, 0.0f);
+  float3 v3 = float3(0.0f, 0.0f, 1.0f);
+
+  mul_project_m4_v3(m4.ptr(), v0);
+  mul_project_m4_v3(m4.ptr(), v1);
+  mul_project_m4_v3(m4.ptr(), v2);
+  mul_project_m4_v3(m4.ptr(), v3);
+
+  draw_line(v0, v1, float4(1.0f, 0.0f, 0.0f, 1.0f));
+  draw_line(v0, v2, float4(0.0f, 1.0f, 0.0f, 1.0f));
+  draw_line(v0, v3, float4(0.0f, 0.0f, 1.0f, 1.0f));
+}
+
+void DebugDraw::draw_bbox(const BoundBox &bbox, const float4 color)
+{
+  uint col = color_pack(color);
+  draw_line(bbox.vec[0], bbox.vec[1], col);
+  draw_line(bbox.vec[1], bbox.vec[2], col);
+  draw_line(bbox.vec[2], bbox.vec[3], col);
+  draw_line(bbox.vec[3], bbox.vec[0], col);
+
+  draw_line(bbox.vec[4], bbox.vec[5], col);
+  draw_line(bbox.vec[5], bbox.vec[6], col);
+  draw_line(bbox.vec[6], bbox.vec[7], col);
+  draw_line(bbox.vec[7], bbox.vec[4], col);
+
+  draw_line(bbox.vec[0], bbox.vec[4], col);
+  draw_line(bbox.vec[1], bbox.vec[5], col);
+  draw_line(bbox.vec[2], bbox.vec[6], col);
+  draw_line(bbox.vec[3], bbox.vec[7], col);
+}
+
+void DebugDraw::draw_matrix_as_bbox(float4x4 mat, const float4 color)
+{
+  BoundBox bb;
+  const float min[3] = {-1.0f, -1.0f, -1.0f}, max[3] = {1.0f, 1.0f, 1.0f};
+  BKE_boundbox_init_from_minmax(&bb, min, max);
+  for (auto i : IndexRange(8)) {
+    mul_project_m4_v3(mat.ptr(), bb.vec[i]);
+  }
+  draw_bbox(bb, color);
+}
+
+void DebugDraw::draw_sphere(const float3 center, float radius, const float4 color)
+{
+  uint col = color_pack(color);
+  for (auto i : IndexRange(sphere_verts_.size() / 2)) {
+    float3 v0 = sphere_verts_[i * 2] * radius + center;
+    float3 v1 = sphere_verts_[i * 2 + 1] * radius + center;
+    draw_line(v0, v1, col);
+  }
+}
+
+void DebugDraw::draw_point(const float3 center, float radius, const float4 color)
+{
+  uint col = color_pack(color);
+  for (auto i : IndexRange(point_verts_.size() / 2)) {
+    float3 v0 = point_verts_[i * 2] * radius + center;
+    float3 v1 = point_verts_[i * 2 + 1] * radius + center;
+    draw_line(v0, v1, col);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Print functions
+ * \{ */
+
+template<> void DebugDraw::print_value<uint>(const uint &value)
+{
+  print_value_uint(value, false, false, true);
+}
+template<> void DebugDraw::print_value<int>(const int &value)
+{
+  print_value_uint(uint(abs(value)), false, (value < 0), false);
+}
+template<> void DebugDraw::print_value<bool>(const bool &value)
+{
+  print_string(value ? "true " : "false");
+}
+template<> void DebugDraw::print_value<float>(const float &val)
+{
+  std::stringstream ss;
+  ss << std::setw(12) << std::to_string(val);
+  print_string(ss.str());
+}
+template<> void DebugDraw::print_value<double>(const double &val)
+{
+  print_value(float(val));
+}
+
+template<> void DebugDraw::print_value_hex<uint>(const uint &value)
+{
+  print_value_uint(value, true, false, false);
+}
+template<> void DebugDraw::print_value_hex<int>(const int &value)
+{
+  print_value_uint(uint(value), true, false, false);
+}
+template<> void DebugDraw::print_value_hex<float>(const float &value)
+{
+  print_value_uint(*reinterpret_cast<const uint *>(&value), true, false, false);
+}
+template<> void DebugDraw::print_value_hex<double>(const double &val)
+{
+  print_value_hex(float(val));
+}
+
+template<> void DebugDraw::print_value_binary<uint>(const uint &value)
+{
+  print_value_binary(value);
+}
+template<> void DebugDraw::print_value_binary<int>(const int &value)
+{
+  print_value_binary(uint(value));
+}
+template<> void DebugDraw::print_value_binary<float>(const float &value)
+{
+  print_value_binary(*reinterpret_cast<const uint *>(&value));
+}
+template<> void DebugDraw::print_value_binary<double>(const double &val)
+{
+  print_value_binary(float(val));
+}
+
+template<> void DebugDraw::print_value<float2>(const float2 &value)
+{
+  print_no_endl("float2(", value[0], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<float3>(const float3 &value)
+{
+  print_no_endl("float3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<float4>(const float4 &value)
+{
+  print_no_endl("float4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+template<> void DebugDraw::print_value<int2>(const int2 &value)
+{
+  print_no_endl("int2(", value[0], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<int3>(const int3 &value)
+{
+  print_no_endl("int3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<int4>(const int4 &value)
+{
+  print_no_endl("int4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+template<> void DebugDraw::print_value<uint2>(const uint2 &value)
+{
+  print_no_endl("uint2(", value[0], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<uint3>(const uint3 &value)
+{
+  print_no_endl("uint3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<uint4>(const uint4 &value)
+{
+  print_no_endl("uint4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Internals
+ *
+ * IMPORTANT: All of these are copied from the shader libs (common_debug_draw_lib.glsl &
+ * common_debug_print_lib.glsl). They need to be kept in sync to write the same data.
+ * \{ */
+
+void DebugDraw::draw_line(float3 v1, float3 v2, uint color)
+{
+  DebugDrawBuf &buf = cpu_draw_buf_;
+  uint index = buf.command.vertex_len;
+  if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) {
+    buf.verts[index + 0] = vert_pack(model_mat_ * v1, color);
+    buf.verts[index + 1] = vert_pack(model_mat_ * v2, color);
+    buf.command.vertex_len += 2;
+  }
+}
+
+/* Keep in sync with drw_debug_color_pack(). */
+uint DebugDraw::color_pack(float4 color)
+{
+  color = math::clamp(color, 0.0f, 1.0f);
+  uint result = 0;
+  result |= uint(color.x * 255.0f) << 0u;
+  result |= uint(color.y * 255.0f) << 8u;
+  result |= uint(color.z * 255.0f) << 16u;
+  result |= uint(color.w * 255.0f) << 24u;
+  return result;
+}
+
+DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color)
+{
+  DRWDebugVert vert;
+  vert.pos0 = *reinterpret_cast<uint32_t *>(&pos.x);
+  vert.pos1 = *reinterpret_cast<uint32_t *>(&pos.y);
+  vert.pos2 = *reinterpret_cast<uint32_t *>(&pos.z);
+  vert.color = color;
+  return vert;
+}
+
+void DebugDraw::print_newline()
+{
+  print_col_ = 0u;
+  print_row_ = ++cpu_print_buf_.command.instance_first_array;
+}
+
+void DebugDraw::print_string_start(uint len)
+{
+  /* Break before word. */
+  if (print_col_ + len > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+    print_newline();
+  }
+}
+
+/* Copied from gpu_shader_dependency. */
+void DebugDraw::print_string(std::string str)
+{
+  size_t len_before_pad = str.length();
+  /* Pad string to uint size to avoid out of bound reads. */
+  while (str.length() % 4 != 0) {
+    str += " ";
+  }
+
+  print_string_start(len_before_pad);
+  for (size_t i = 0; i < len_before_pad; i += 4) {
+    union {
+      uint8_t chars[4];
+      uint32_t word;
+    };
+
+    chars[0] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 0);
+    chars[1] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 1);
+    chars[2] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 2);
+    chars[3] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 3);
+
+    if (i + 4 > len_before_pad) {
+      chars[len_before_pad - i] = '\0';
+    }
+    print_char4(word);
+  }
+}
+
+/* Keep in sync with shader. */
+void DebugDraw::print_char4(uint data)
+{
+  /* Convert into char stream. */
+  for (; data != 0u; data >>= 8u) {
+    uint char1 = data & 0xFFu;
+    /* Check for null terminator. */
+    if (char1 == 0x00) {
+      break;
+    }
+    /* NOTE: Do not skip the header manually like in GPU. */
+    uint cursor = cpu_print_buf_.command.vertex_len++;
+    if (cursor < DRW_DEBUG_PRINT_MAX) {
+      /* For future usage. (i.e: Color) */
+      uint flags = 0u;
+      uint col = print_col_++;
+      uint print_header = (flags << 24u) | (print_row_ << 16u) | (col << 8u);
+      cpu_print_buf_.char_array[cursor] = print_header | char1;
+      /* Break word. */
+      if (print_col_ > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+        print_newline();
+      }
+    }
+  }
+}
+
+void DebugDraw::print_append_char(uint char1, uint &char4)
+{
+  char4 = (char4 << 8u) | char1;
+}
+
+void DebugDraw::print_append_digit(uint digit, uint &char4)
+{
+  const uint char_A = 0x41u;
+  const uint char_0 = 0x30u;
+  bool is_hexadecimal = digit > 9u;
+  char4 = (char4 << 8u) | (is_hexadecimal ? (char_A + digit - 10u) : (char_0 + digit));
+}
+
+void DebugDraw::print_append_space(uint &char4)
+{
+  char4 = (char4 << 8u) | 0x20u;
+}
+
+void DebugDraw::print_value_binary(uint value)
+{
+  print_string("0b");
+  print_string_start(10u * 4u);
+  uint digits[10] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u};
+  uint digit = 0u;
+  for (uint i = 0u; i < 32u; i++) {
+    print_append_digit(((value >> i) & 1u), digits[digit / 4u]);
+    digit++;
+    if ((i % 4u) == 3u) {
+      print_append_space(digits[digit / 4u]);
+      digit++;
+    }
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 9; j >= 0; j--) {
+    print_char4(digits[j]);
+  }
+}
+
+void DebugDraw::print_value_uint(uint value,
+                                 const bool hex,
+                                 bool is_negative,
+                                 const bool is_unsigned)
+{
+  print_string_start(3u * 4u);
+  const uint blank_value = hex ? 0x30303030u : 0x20202020u;
+  const uint prefix = hex ? 0x78302020u : 0x20202020u;
+  uint digits[3] = {blank_value, blank_value, prefix};
+  const uint base = hex ? 16u : 10u;
+  uint digit = 0u;
+  /* Add `u` suffix. */
+  if (is_unsigned) {
+    print_append_char('u', digits[digit / 4u]);
+    digit++;
+  }
+  /* Number's digits. */
+  for (; value != 0u || digit == uint(is_unsigned); value /= base) {
+    print_append_digit(value % base, digits[digit / 4u]);
+    digit++;
+  }
+  /* Add negative sign. */
+  if (is_negative) {
+    print_append_char('-', digits[digit / 4u]);
+    digit++;
+  }
+  /* Need to pad to uint alignment because we are issuing chars in "reverse". */
+  for (uint i = digit % 4u; i < 4u && i > 0u; i++) {
+    print_append_space(digits[digit / 4u]);
+    digit++;
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 2; j >= 0; j--) {
+    print_char4(digits[j]);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Display
+ * \{ */
+
+void DebugDraw::display_lines()
+{
+  if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) {
+    return;
+  }
+  GPU_debug_group_begin("Lines");
+  cpu_draw_buf_.push_update();
+
+  float4x4 persmat;
+  const DRWView *view = DRW_view_get_active();
+  DRW_view_persmat_get(view, persmat.ptr(), false);
+
+  drw_state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS);
+
+  GPUBatch *batch = drw_cache_procedural_lines_get();
+  GPUShader *shader = DRW_shader_debug_draw_display_get();
+  GPU_batch_set_shader(batch, shader);
+  int slot = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
+  GPU_shader_uniform_mat4(shader, "persmat", persmat.ptr());
+
+  if (gpu_draw_buf_used) {
+    GPU_debug_group_begin("GPU");
+    GPU_storagebuf_bind(gpu_draw_buf_, slot);
+    GPU_batch_draw_indirect(batch, gpu_draw_buf_, 0);
+    GPU_storagebuf_unbind(gpu_draw_buf_);
+    GPU_debug_group_end();
+  }
+
+  GPU_debug_group_begin("CPU");
+  GPU_storagebuf_bind(cpu_draw_buf_, slot);
+  GPU_batch_draw_indirect(batch, cpu_draw_buf_, 0);
+  GPU_storagebuf_unbind(cpu_draw_buf_);
+  GPU_debug_group_end();
+
+  GPU_debug_group_end();
+}
+
+void DebugDraw::display_prints()
+{
+  if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) {
+    return;
+  }
+  GPU_debug_group_begin("Prints");
+  cpu_print_buf_.push_update();
+
+  drw_state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_PROGRAM_POINT_SIZE);
+
+  GPUBatch *batch = drw_cache_procedural_points_get();
+  GPUShader *shader = DRW_shader_debug_print_display_get();
+  GPU_batch_set_shader(batch, shader);
+  int slot = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
+
+  if (gpu_print_buf_used) {
+    GPU_debug_group_begin("GPU");
+    GPU_storagebuf_bind(gpu_print_buf_, slot);
+    GPU_batch_draw_indirect(batch, gpu_print_buf_, 0);
+    GPU_storagebuf_unbind(gpu_print_buf_);
+    GPU_debug_group_end();
+  }
+
+  GPU_debug_group_begin("CPU");
+  GPU_storagebuf_bind(cpu_print_buf_, slot);
+  GPU_batch_draw_indirect(batch, cpu_print_buf_, 0);
+  GPU_storagebuf_unbind(cpu_print_buf_);
+  GPU_debug_group_end();
+
+  GPU_debug_group_end();
+}
+
+void DebugDraw::display_to_view()
+{
+  GPU_debug_group_begin("DebugDraw");
+
+  display_lines();
+  /* Print 3D shapes before text to avoid overlaps. */
+  display_prints();
+  /* Init again so we don't draw the same thing twice. */
+  init();
+
+  GPU_debug_group_end();
+}
+
+}  // namespace blender::draw
+
+blender::draw::DebugDraw *DRW_debug_get()
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return nullptr;
+  }
+  return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name C-API private
+ * \{ */
+
+void drw_debug_draw()
+{
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support() || DST.debug == nullptr) {
+    return;
+  }
+  /* TODO(@fclem): Convenience for now. Will have to move to #DRWManager. */
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->display_to_view();
+#endif
+}
+
+/**
+ * NOTE: Init is once per draw manager cycle.
+ */
+void drw_debug_init()
+{
+  /* Module should not be used in release builds. */
+  /* TODO(@fclem): Hide the functions declarations without using `ifdefs` everywhere. */
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  /* TODO(@fclem): Convenience for now. Will have to move to #DRWManager. */
+  if (DST.debug == nullptr) {
+    DST.debug = reinterpret_cast<DRWDebugModule *>(new blender::draw::DebugDraw());
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->init();
+#endif
+}
+
+void drw_debug_module_free(DRWDebugModule *module)
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  if (module != nullptr) {
+    delete reinterpret_cast<blender::draw::DebugDraw *>(module);
+  }
+}
+
+GPUStorageBuf *drw_debug_gpu_draw_buf_get()
+{
+  return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->gpu_draw_buf_get();
+}
+
+GPUStorageBuf *drw_debug_gpu_print_buf_get()
+{
+  return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->gpu_print_buf_get();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name C-API public
+ * \{ */
+
+void DRW_debug_modelmat_reset()
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->modelmat_reset();
+}
+
+void DRW_debug_modelmat(const float modelmat[4][4])
+{
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->modelmat_set(modelmat);
+#else
+  UNUSED_VARS(modelmat);
+#endif
+}
+
+void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_line(v1, v2, color);
+}
+
+void DRW_debug_polygon_v3(const float (*v)[3], int vert_len, const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_polygon(
+      blender::Span<float3>((float3 *)v, vert_len), color);
+}
+
+void DRW_debug_m4(const float m[4][4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_matrix(m);
+}
+
+void DRW_debug_m4_as_bbox(const float m[4][4], bool invert, const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  blender::float4x4 m4 = m;
+  if (invert) {
+    m4 = m4.inverted();
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_matrix_as_bbox(m4, color);
+}
+
+void DRW_debug_bbox(const BoundBox *bbox, const float color[4])
+{
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_bbox(*bbox, color);
+#else
+  UNUSED_VARS(bbox, color);
+#endif
+}
+
+void DRW_debug_sphere(const float center[3], float radius, const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_sphere(center, radius, color);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_debug.h b/source/blender/draw/intern/draw_debug.h
index 333d734edb9..9a56a12242e 100644
--- a/source/blender/draw/intern/draw_debug.h
+++ b/source/blender/draw/intern/draw_debug.h
@@ -3,21 +3,38 @@
 
 /** \file
  * \ingroup draw
+ *
+ * \brief Simple API to draw debug shapes in the viewport.
+ * IMPORTANT: This is the legacy API for C. Use draw_debug.hh instead in new C++ code.
  */
 
 #pragma once
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct DRWDebugModule DRWDebugModule;
+
 struct BoundBox;
 
 void DRW_debug_modelmat_reset(void);
 void DRW_debug_modelmat(const float modelmat[4][4]);
 
+/**
+ * IMPORTANT: For now there is a limit of DRW_DEBUG_DRAW_VERT_MAX that can be drawn
+ * using all the draw functions.
+ */
 void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4]);
 void DRW_debug_polygon_v3(const float (*v)[3], int vert_len, const float color[4]);
 /**
  * \note g_modelmat is still applied on top.
  */
 void DRW_debug_m4(const float m[4][4]);
-void DRW_debug_m4_as_bbox(const float m[4][4], const float color[4], bool invert);
+void DRW_debug_m4_as_bbox(const float m[4][4], bool invert, const float color[4]);
 void DRW_debug_bbox(const BoundBox *bbox, const float color[4]);
 void DRW_debug_sphere(const float center[3], float radius, const float color[4]);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/intern/draw_debug.hh b/source/blender/draw/intern/draw_debug.hh
new file mode 100644
index 00000000000..c83936bf1af
--- /dev/null
+++ b/source/blender/draw/intern/draw_debug.hh
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ *
+ * \brief Simple API to draw debug shapes and log in the viewport.
+ *
+ * Both CPU and GPU implementation are supported and symmetrical (meaning GPU shader can use it
+ * too, see common_debug_print/draw_lib.glsl).
+ *
+ * NOTE: CPU logging will overlap GPU logging on screen as it is drawn after.
+ */
+
+#pragma once
+
+#include "BLI_math_vec_types.hh"
+#include "BLI_string_ref.hh"
+#include "BLI_vector.hh"
+#include "DNA_object_types.h"
+#include "DRW_gpu_wrapper.hh"
+
+namespace blender::draw {
+
+/* Shortcuts to avoid boilerplate code and match shader API. */
+#define drw_debug_line(...) DRW_debug_get()->draw_line(__VA_ARGS__)
+#define drw_debug_polygon(...) DRW_debug_get()->draw_polygon(__VA_ARGS__)
+#define drw_debug_bbox(...) DRW_debug_get()->draw_bbox(__VA_ARGS__)
+#define drw_debug_sphere(...) DRW_debug_get()->draw_sphere(__VA_ARGS__)
+#define drw_debug_point(...) DRW_debug_get()->draw_point(__VA_ARGS__)
+#define drw_debug_matrix(...) DRW_debug_get()->draw_matrix(__VA_ARGS__)
+#define drw_debug_matrix_as_bbox(...) DRW_debug_get()->draw_matrix_as_bbox(__VA_ARGS__)
+#define drw_print(...) DRW_debug_get()->print(__VA_ARGS__)
+#define drw_print_hex(...) DRW_debug_get()->print_hex(__VA_ARGS__)
+#define drw_print_binary(...) DRW_debug_get()->print_binary(__VA_ARGS__)
+#define drw_print_no_endl(...) DRW_debug_get()->print_no_endl(__VA_ARGS__)
+
+/* Will log variable along with its name, like the shader version of print(). */
+#define drw_print_id(v_) DRW_debug_get()->print(#v_, "= ", v_)
+#define drw_print_id_no_endl(v_) DRW_debug_get()->print_no_endl(#v_, "= ", v_)
+
+class DebugDraw {
+ private:
+  using DebugDrawBuf = StorageBuffer<DRWDebugDrawBuffer>;
+  using DebugPrintBuf = StorageBuffer<DRWDebugPrintBuffer>;
+
+  /** Data buffers containing all verts or chars to draw. */
+  DebugDrawBuf cpu_draw_buf_ = {"DebugDrawBuf-CPU"};
+  DebugDrawBuf gpu_draw_buf_ = {"DebugDrawBuf-GPU"};
+  DebugPrintBuf cpu_print_buf_ = {"DebugPrintBuf-CPU"};
+  DebugPrintBuf gpu_print_buf_ = {"DebugPrintBuf-GPU"};
+  /** True if the gpu buffer have been requested and may contain data to draw. */
+  bool gpu_print_buf_used = false;
+  bool gpu_draw_buf_used = false;
+  /** Matrix applied to all points before drawing. Could be a stack if needed. */
+  float4x4 model_mat_;
+  /** Precomputed shapes verts. */
+  Vector<float3> sphere_verts_;
+  Vector<float3> point_verts_;
+  /** Cursor position for print functionality. */
+  uint print_col_ = 0;
+  uint print_row_ = 0;
+
+ public:
+  DebugDraw();
+  ~DebugDraw(){};
+
+  /**
+   * Resets all buffers and reset model matrix state.
+   * Not to be called by user.
+   */
+  void init();
+
+  /**
+   * Resets model matrix state to identity.
+   */
+  void modelmat_reset();
+  /**
+   * Sets model matrix transform to apply to any vertex passed to drawing functions.
+   */
+  void modelmat_set(const float modelmat[4][4]);
+
+  /**
+   * Drawing functions that will draw wire-frames with the given color.
+   */
+  void draw_line(float3 v1, float3 v2, float4 color = {1, 0, 0, 1});
+  void draw_polygon(Span<float3> poly_verts, float4 color = {1, 0, 0, 1});
+  void draw_bbox(const BoundBox &bbox, const float4 color = {1, 0, 0, 1});
+  void draw_sphere(const float3 center, float radius, const float4 color = {1, 0, 0, 1});
+  void draw_point(const float3 center, float radius = 0.01f, const float4 color = {1, 0, 0, 1});
+  /**
+   * Draw a matrix transformation as 3 colored axes.
+   */
+  void draw_matrix(const float4x4 m4);
+  /**
+   * Draw a matrix as a 2 units length bounding box, centered on origin.
+   */
+  void draw_matrix_as_bbox(float4x4 mat, const float4 color = {1, 0, 0, 1});
+
+  /**
+   * Will draw all debug shapes and text cached up until now to the current view / frame-buffer.
+   * Draw buffers will be emptied and ready for new debug data.
+   */
+  void display_to_view();
+
+  /**
+   * Log variable or strings inside the viewport.
+   * Using a unique non string argument will print the variable name with it.
+   * Concatenate by using multiple arguments. i.e: `print("Looped ", n, "times.")`.
+   */
+  template<typename... Ts> void print(StringRefNull str, Ts... args)
+  {
+    print_no_endl(str, args...);
+    print_newline();
+  }
+  template<typename T> void print(const T &value)
+  {
+    print_value(value);
+    print_newline();
+  }
+  template<typename T> void print_hex(const T &value)
+  {
+    print_value_hex(value);
+    print_newline();
+  }
+  template<typename T> void print_binary(const T &value)
+  {
+    print_value_binary(value);
+    print_newline();
+  }
+
+  /**
+   * Same as `print()` but does not finish the line.
+   */
+  void print_no_endl(std::string arg)
+  {
+    print_string(arg);
+  }
+  void print_no_endl(StringRef arg)
+  {
+    print_string(arg);
+  }
+  void print_no_endl(StringRefNull arg)
+  {
+    print_string(arg);
+  }
+  void print_no_endl(char const *arg)
+  {
+    print_string(StringRefNull(arg));
+  }
+  template<typename T> void print_no_endl(T arg)
+  {
+    print_value(arg);
+  }
+  template<typename T, typename... Ts> void print_no_endl(T arg, Ts... args)
+  {
+    print_no_endl(arg);
+    print_no_endl(args...);
+  }
+
+  /**
+   * Not to be called by user. Should become private.
+   */
+  GPUStorageBuf *gpu_draw_buf_get();
+  GPUStorageBuf *gpu_print_buf_get();
+
+ private:
+  uint color_pack(float4 color);
+  DRWDebugVert vert_pack(float3 pos, uint color);
+
+  void draw_line(float3 v1, float3 v2, uint color);
+
+  void print_newline();
+  void print_string_start(uint len);
+  void print_string(std::string str);
+  void print_char4(uint data);
+  void print_append_char(uint char1, uint &char4);
+  void print_append_digit(uint digit, uint &char4);
+  void print_append_space(uint &char4);
+  void print_value_binary(uint value);
+  void print_value_uint(uint value, const bool hex, bool is_negative, const bool is_unsigned);
+
+  template<typename T> void print_value(const T &value);
+  template<typename T> void print_value_hex(const T &value);
+  template<typename T> void print_value_binary(const T &value);
+
+  void display_lines();
+  void display_prints();
+};
+
+}  // namespace blender::draw
+
+/**
+ * Ease of use function to get the debug module.
+ * TODO(fclem): Should be removed once DRWManager is no longer global.
+ * IMPORTANT: Can return nullptr if storage buffer is not supported.
+ */
+blender::draw::DebugDraw *DRW_debug_get();
diff --git a/source/blender/draw/intern/draw_defines.h b/source/blender/draw/intern/draw_defines.h
new file mode 100644
index 00000000000..3df7e47cffb
--- /dev/null
+++ b/source/blender/draw/intern/draw_defines.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup draw
+ *
+ * List of defines that are shared with the GPUShaderCreateInfos. We do this to avoid
+ * dragging larger headers into the createInfo pipeline which would cause problems.
+ */
+
+#pragma once
+
+#define DRW_VIEW_UBO_SLOT 0
+
+#define DRW_RESOURCE_ID_SLOT 11
+#define DRW_OBJ_MAT_SLOT 10
+#define DRW_OBJ_INFOS_SLOT 9
+#define DRW_OBJ_ATTR_SLOT 8
+
+#define DRW_DEBUG_PRINT_SLOT 15
+#define DRW_DEBUG_DRAW_SLOT 14
+
+#define DRW_COMMAND_GROUP_SIZE 64
+#define DRW_FINALIZE_GROUP_SIZE 64
+/* Must be multiple of 32. Set to 32 for shader simplicity. */
+#define DRW_VISIBILITY_GROUP_SIZE 32
diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc
index 0a3c16e0d71..ceee1c7cb48 100644
--- a/source/blender/draw/intern/draw_hair.cc
+++ b/source/blender/draw/intern/draw_hair.cc
@@ -22,33 +22,29 @@
 #include "GPU_batch.h"
 #include "GPU_capabilities.h"
 #include "GPU_compute.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
 #include "GPU_vertex_buffer.h"
 
+#include "DRW_gpu_wrapper.hh"
+
 #include "draw_hair_private.h"
 #include "draw_shader.h"
-
-#ifndef __APPLE__
-#  define USE_TRANSFORM_FEEDBACK
-#  define USE_COMPUTE_SHADERS
-#endif
+#include "draw_shader_shared.h"
 
 BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
 {
-#ifdef USE_COMPUTE_SHADERS
   if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
     return PART_REFINE_SHADER_COMPUTE;
   }
-#endif
-#ifdef USE_TRANSFORM_FEEDBACK
-  return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
-#endif
+  if (GPU_transform_feedback_support()) {
+    return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+  }
   return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
 }
 
-#ifndef USE_TRANSFORM_FEEDBACK
 struct ParticleRefineCall {
   struct ParticleRefineCall *next;
   GPUVertBuf *vbo;
@@ -60,11 +56,11 @@ static ParticleRefineCall *g_tf_calls = nullptr;
 static int g_tf_id_offset;
 static int g_tf_target_width;
 static int g_tf_target_height;
-#endif
 
 static GPUVertBuf *g_dummy_vbo = nullptr;
 static GPUTexture *g_dummy_texture = nullptr;
-static DRWPass *g_tf_pass; /* XXX can be a problem with multiple DRWManager in the future */
+static DRWPass *g_tf_pass; /* XXX can be a problem with multiple #DRWManager in the future */
+static blender::draw::UniformBuffer<CurvesInfos> *g_dummy_curves_info = nullptr;
 
 static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement)
 {
@@ -73,26 +69,35 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement)
 
 void DRW_hair_init(void)
 {
-#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
-  g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
-#else
-  g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
-#endif
+  if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
+    g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
+  }
+  else {
+    g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
+  }
 
   if (g_dummy_vbo == nullptr) {
     /* initialize vertex format */
     GPUVertFormat format = {0};
     uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+    g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
     const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
     GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
     GPU_vertbuf_attr_fill(g_dummy_vbo, dummy_id, vert);
-    /* Create vbo immediately to bind to texture buffer. */
+    /* Create VBO immediately to bind to texture buffer. */
     GPU_vertbuf_use(g_dummy_vbo);
 
     g_dummy_texture = GPU_texture_create_from_vertbuf("hair_dummy_attr", g_dummy_vbo);
+
+    g_dummy_curves_info = MEM_new<blender::draw::UniformBuffer<CurvesInfos>>(
+        "g_dummy_curves_info");
+    memset(g_dummy_curves_info->is_point_attribute,
+           0,
+           sizeof(g_dummy_curves_info->is_point_attribute));
+    g_dummy_curves_info->push_update();
   }
 }
 
@@ -135,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache
   if (final_points_len > 0) {
     GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM);
 
-#ifdef USE_TRANSFORM_FEEDBACK
-    DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(
-        tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
-#else
-    DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
-
-    ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
-    pr_call->next = g_tf_calls;
-    pr_call->vbo = cache->final[subdiv].proc_buf;
-    pr_call->shgrp = tf_shgrp;
-    pr_call->vert_len = final_points_len;
-    g_tf_calls = pr_call;
-    DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
-    DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
-    DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
-#endif
+    DRWShadingGroup *tf_shgrp = nullptr;
+    if (GPU_transform_feedback_support()) {
+      tf_shgrp = DRW_shgroup_transform_feedback_create(
+          tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
+    }
+    else {
+      tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+      ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
+      pr_call->next = g_tf_calls;
+      pr_call->vbo = cache->final[subdiv].proc_buf;
+      pr_call->shgrp = tf_shgrp;
+      pr_call->vert_len = final_points_len;
+      g_tf_calls = pr_call;
+      DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+      DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+      DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+    }
+    BLI_assert(tf_shgrp != nullptr);
 
     drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv);
     DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
@@ -239,7 +247,7 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
 
   DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
 
-  /* TODO: optimize this. Only bind the ones GPUMaterial needs. */
+  /* TODO: optimize this. Only bind the ones #GPUMaterial needs. */
   for (int i = 0; i < hair_cache->num_uv_layers; i++) {
     for (int n = 0; n < MAX_LAYER_NAME_CT && hair_cache->uv_layer_names[i][n][0] != '\0'; n++) {
       DRW_shgroup_uniform_texture(shgrp, hair_cache->uv_layer_names[i][n], hair_cache->uv_tex[i]);
@@ -276,6 +284,8 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
   if (hair_cache->length_tex) {
     DRW_shgroup_uniform_texture(shgrp, "l", hair_cache->length_tex);
   }
+
+  DRW_shgroup_uniform_block(shgrp, "drw_curves", *g_dummy_curves_info);
   DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1);
   DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape);
@@ -293,85 +303,122 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
 
 void DRW_hair_update()
 {
-#ifndef USE_TRANSFORM_FEEDBACK
-  /**
-   * Workaround to transform feedback not working on mac.
-   * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
-   *
-   * So instead of using transform feedback we render to a texture,
-   * read back the result to system memory and re-upload as VBO data.
-   * It is really not ideal performance wise, but it is the simplest
-   * and the most local workaround that still uses the power of the GPU.
-   */
-
-  if (g_tf_calls == nullptr) {
-    return;
-  }
+  if (!GPU_transform_feedback_support()) {
+    /**
+     * Workaround to transform feedback not working on mac.
+     * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+     *
+     * So instead of using transform feedback we render to a texture,
+     * read back the result to system memory and re-upload as VBO data.
+     * It is really not ideal performance wise, but it is the simplest
+     * and the most local workaround that still uses the power of the GPU.
+     */
+
+    if (g_tf_calls == nullptr) {
+      return;
+    }
 
-  /* Search ideal buffer size. */
-  uint max_size = 0;
-  for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
-    max_size = max_ii(max_size, pr_call->vert_len);
-  }
+    /* Search ideal buffer size. */
+    uint max_size = 0;
+    for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+      max_size = max_ii(max_size, pr_call->vert_len);
+    }
+
+    /* Create target Texture / Frame-buffer */
+    /* Don't use max size as it can be really heavy and fail.
+     * Do chunks of maximum 2048 * 2048 hair points. */
+    int width = 2048;
+    int height = min_ii(width, 1 + max_size / width);
+    GPUTexture *tex = DRW_texture_pool_query_2d(
+        width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
+    g_tf_target_height = height;
+    g_tf_target_width = width;
+
+    GPUFrameBuffer *fb = nullptr;
+    GPU_framebuffer_ensure_config(&fb,
+                                  {
+                                      GPU_ATTACHMENT_NONE,
+                                      GPU_ATTACHMENT_TEXTURE(tex),
+                                  });
+
+    float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
+
+    GPU_framebuffer_bind(fb);
+    while (g_tf_calls != nullptr) {
+      ParticleRefineCall *pr_call = g_tf_calls;
+      g_tf_calls = g_tf_calls->next;
+
+      g_tf_id_offset = 0;
+      while (pr_call->vert_len > 0) {
+        int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+        DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+        /* Read back result to main memory. */
+        GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+        /* Upload back to VBO. */
+        GPU_vertbuf_use(pr_call->vbo);
+        GPU_vertbuf_update_sub(pr_call->vbo,
+                               sizeof(float[4]) * g_tf_id_offset,
+                               sizeof(float[4]) * max_read_px_len,
+                               data);
+
+        g_tf_id_offset += max_read_px_len;
+        pr_call->vert_len -= max_read_px_len;
+      }
 
-  /* Create target Texture / Frame-buffer */
-  /* Don't use max size as it can be really heavy and fail.
-   * Do chunks of maximum 2048 * 2048 hair points. */
-  int width = 2048;
-  int height = min_ii(width, 1 + max_size / width);
-  GPUTexture *tex = DRW_texture_pool_query_2d(
-      width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
-  g_tf_target_height = height;
-  g_tf_target_width = width;
-
-  GPUFrameBuffer *fb = nullptr;
-  GPU_framebuffer_ensure_config(&fb,
-                                {
-                                    GPU_ATTACHMENT_NONE,
-                                    GPU_ATTACHMENT_TEXTURE(tex),
-                                });
-
-  float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
-
-  GPU_framebuffer_bind(fb);
-  while (g_tf_calls != nullptr) {
-    ParticleRefineCall *pr_call = g_tf_calls;
-    g_tf_calls = g_tf_calls->next;
-
-    g_tf_id_offset = 0;
-    while (pr_call->vert_len > 0) {
-      int max_read_px_len = min_ii(width * height, pr_call->vert_len);
-
-      DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
-      /* Read back result to main memory. */
-      GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
-      /* Upload back to VBO. */
-      GPU_vertbuf_use(pr_call->vbo);
-      GPU_vertbuf_update_sub(pr_call->vbo,
-                             sizeof(float[4]) * g_tf_id_offset,
-                             sizeof(float[4]) * max_read_px_len,
-                             data);
-
-      g_tf_id_offset += max_read_px_len;
-      pr_call->vert_len -= max_read_px_len;
+      MEM_freeN(pr_call);
     }
 
-    MEM_freeN(pr_call);
+    MEM_freeN(data);
+    GPU_framebuffer_free(fb);
   }
+  else {
+    /* NOTE(Metal): If compute is not supported, bind a temporary frame-buffer to avoid
+     * side-effects from rendering in the active buffer.
+     * We also need to guarantee that a frame-buffer is active to perform any rendering work,
+     * even if there is no output. */
+    GPUFrameBuffer *temp_fb = nullptr;
+    GPUFrameBuffer *prev_fb = nullptr;
+    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+      if (!GPU_compute_shader_support()) {
+        prev_fb = GPU_framebuffer_active_get();
+        char errorOut[256];
+        /* if the frame-buffer is invalid we need a dummy frame-buffer to be bound. */
+        if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
+          int width = 64;
+          int height = 64;
+          GPUTexture *tex = DRW_texture_pool_query_2d(
+              width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
+          g_tf_target_height = height;
+          g_tf_target_width = width;
+
+          GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
+
+          GPU_framebuffer_bind(temp_fb);
+        }
+      }
+    }
+
+    /* Just render the pass when using compute shaders or transform feedback. */
+    DRW_draw_pass(g_tf_pass);
+    if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
 
-  MEM_freeN(data);
-  GPU_framebuffer_free(fb);
-#else
-  /* Just render the pass when using compute shaders or transform feedback. */
-  DRW_draw_pass(g_tf_pass);
-  if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
-    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    /* Release temporary frame-buffer. */
+    if (temp_fb != nullptr) {
+      GPU_framebuffer_free(temp_fb);
+    }
+    /* Rebind existing frame-buffer */
+    if (prev_fb != nullptr) {
+      GPU_framebuffer_bind(prev_fb);
+    }
   }
-#endif
 }
 
 void DRW_hair_free(void)
 {
   GPU_VERTBUF_DISCARD_SAFE(g_dummy_vbo);
   DRW_TEXTURE_FREE_SAFE(g_dummy_texture);
+  MEM_delete(g_dummy_curves_info);
 }
diff --git a/source/blender/draw/intern/draw_hair_private.h b/source/blender/draw/intern/draw_hair_private.h
index 5d84c8863f2..c7e9e1e22de 100644
--- a/source/blender/draw/intern/draw_hair_private.h
+++ b/source/blender/draw/intern/draw_hair_private.h
@@ -61,9 +61,9 @@ typedef struct ParticleHairCache {
   GPUTexture *uv_tex[MAX_MTFACE];
   char uv_layer_names[MAX_MTFACE][MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN];
 
-  GPUVertBuf *proc_col_buf[MAX_MCOL];
-  GPUTexture *col_tex[MAX_MCOL];
-  char col_layer_names[MAX_MCOL][MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN];
+  GPUVertBuf **proc_col_buf;
+  GPUTexture **col_tex;
+  char (*col_layer_names)[MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN];
 
   int num_uv_layers;
   int num_col_layers;
diff --git a/source/blender/draw/intern/draw_handle.hh b/source/blender/draw/intern/draw_handle.hh
new file mode 100644
index 00000000000..5f96bfa5dcd
--- /dev/null
+++ b/source/blender/draw/intern/draw_handle.hh
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * A unique identifier for each object component.
+ * It is used to access each component data such as matrices and object attributes.
+ * It is valid only for the current draw, it is not persistent.
+ *
+ * The most significant bit is used to encode if the object needs to invert the front face winding
+ * because of its object matrix handedness. This is handy because this means sorting inside
+ * #DrawGroup command will put all inverted commands last.
+ *
+ * Default value of 0 points toward an non-cull-able object with unit bounding box centered at
+ * the origin.
+ */
+
+#include "draw_shader_shared.h"
+
+struct Object;
+struct DupliObject;
+
+namespace blender::draw {
+
+struct ResourceHandle {
+  uint raw;
+
+  ResourceHandle() = default;
+  ResourceHandle(uint raw_) : raw(raw_){};
+  ResourceHandle(uint index, bool inverted_handedness)
+  {
+    raw = index;
+    SET_FLAG_FROM_TEST(raw, inverted_handedness, 0x80000000u);
+  }
+
+  bool has_inverted_handedness() const
+  {
+    return (raw & 0x80000000u) != 0;
+  }
+
+  uint resource_index() const
+  {
+    return (raw & 0x7FFFFFFFu);
+  }
+};
+
+/* TODO(fclem): Move to somewhere more appropriated after cleaning up the header dependencies. */
+struct ObjectRef {
+  Object *object;
+  /** Dupli object that corresponds to the current object. */
+  DupliObject *dupli_object;
+  /** Object that created the dupli-list the current object is part of. */
+  Object *dupli_parent;
+};
+
+};  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c
index 0e4e67f3320..ac2aea4524d 100644
--- a/source/blender/draw/intern/draw_instance_data.c
+++ b/source/blender/draw/intern/draw_instance_data.c
@@ -27,6 +27,7 @@
 #include "BKE_duplilist.h"
 
 #include "RNA_access.h"
+#include "RNA_path.h"
 
 #include "BLI_bitmap.h"
 #include "BLI_memblock.h"
@@ -563,7 +564,8 @@ typedef struct DRWUniformAttrBuf {
   struct DRWUniformAttrBuf *next_empty;
 } DRWUniformAttrBuf;
 
-static DRWUniformAttrBuf *drw_uniform_attrs_pool_ensure(GHash *table, GPUUniformAttrList *key)
+static DRWUniformAttrBuf *drw_uniform_attrs_pool_ensure(GHash *table,
+                                                        const GPUUniformAttrList *key)
 {
   void **pkey, **pval;
 
@@ -641,23 +643,16 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr,
 {
   copy_v4_fl(r_data, 0);
 
-  char idprop_name[(sizeof(attr->name) * 2) + 4];
-  {
-    char attr_name_esc[sizeof(attr->name) * 2];
-    BLI_str_escape(attr_name_esc, attr->name, sizeof(attr_name_esc));
-    SNPRINTF(idprop_name, "[\"%s\"]", attr_name_esc);
-  }
-
   /* If requesting instance data, check the parent particle system and object. */
   if (attr->use_dupli) {
     if (dupli_source && dupli_source->particle_system) {
       ParticleSettings *settings = dupli_source->particle_system->part;
-      if (drw_uniform_property_lookup((ID *)settings, idprop_name, r_data) ||
+      if (drw_uniform_property_lookup((ID *)settings, attr->name_id_prop, r_data) ||
           drw_uniform_property_lookup((ID *)settings, attr->name, r_data)) {
         return;
       }
     }
-    if (drw_uniform_property_lookup((ID *)dupli_parent, idprop_name, r_data) ||
+    if (drw_uniform_property_lookup((ID *)dupli_parent, attr->name_id_prop, r_data) ||
         drw_uniform_property_lookup((ID *)dupli_parent, attr->name, r_data)) {
       return;
     }
@@ -665,9 +660,9 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr,
 
   /* Check the object and mesh. */
   if (ob) {
-    if (drw_uniform_property_lookup((ID *)ob, idprop_name, r_data) ||
+    if (drw_uniform_property_lookup((ID *)ob, attr->name_id_prop, r_data) ||
         drw_uniform_property_lookup((ID *)ob, attr->name, r_data) ||
-        drw_uniform_property_lookup((ID *)ob->data, idprop_name, r_data) ||
+        drw_uniform_property_lookup((ID *)ob->data, attr->name_id_prop, r_data) ||
         drw_uniform_property_lookup((ID *)ob->data, attr->name, r_data)) {
       return;
     }
@@ -675,7 +670,7 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr,
 }
 
 void drw_uniform_attrs_pool_update(GHash *table,
-                                   GPUUniformAttrList *key,
+                                   const GPUUniformAttrList *key,
                                    DRWResourceHandle *handle,
                                    Object *ob,
                                    Object *dupli_parent,
@@ -696,7 +691,8 @@ void drw_uniform_attrs_pool_update(GHash *table,
   }
 }
 
-DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(GHash *table, struct GPUUniformAttrList *key)
+DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(GHash *table,
+                                                     const struct GPUUniformAttrList *key)
 {
   DRWUniformAttrBuf *buffer = BLI_ghash_lookup(table, key);
   return buffer ? &buffer->ubos : NULL;
diff --git a/source/blender/draw/intern/draw_instance_data.h b/source/blender/draw/intern/draw_instance_data.h
index 4b5cf63bb3b..9053544d98a 100644
--- a/source/blender/draw/intern/draw_instance_data.h
+++ b/source/blender/draw/intern/draw_instance_data.h
@@ -106,4 +106,4 @@ struct GHash *DRW_uniform_attrs_pool_new(void);
 void DRW_uniform_attrs_pool_flush_all(struct GHash *table);
 void DRW_uniform_attrs_pool_clear_all(struct GHash *table);
 struct DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(struct GHash *table,
-                                                            struct GPUUniformAttrList *key);
+                                                            const struct GPUUniformAttrList *key);
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index bc9d0a3d02a..9761aa8c789 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -43,6 +43,7 @@
 #include "DNA_camera_types.h"
 #include "DNA_mesh_types.h"
 #include "DNA_meshdata_types.h"
+#include "DNA_userdef_types.h"
 #include "DNA_world_types.h"
 
 #include "ED_gpencil.h"
@@ -84,6 +85,7 @@
 #include "draw_cache_impl.h"
 
 #include "engines/basic/basic_engine.h"
+#include "engines/compositor/compositor_engine.h"
 #include "engines/eevee/eevee_engine.h"
 #include "engines/eevee_next/eevee_engine.h"
 #include "engines/external/external_engine.h"
@@ -179,7 +181,7 @@ static void drw_task_graph_deinit(void)
 
 bool DRW_object_is_renderable(const Object *ob)
 {
-  BLI_assert((ob->base_flag & BASE_VISIBLE_DEPSGRAPH) != 0);
+  BLI_assert((ob->base_flag & BASE_ENABLED_AND_MAYBE_VISIBLE_IN_VIEWPORT) != 0);
 
   if (ob->type == OB_MESH) {
     if ((ob == DST.draw_ctx.object_edit) || DRW_object_is_in_edit_mode(ob)) {
@@ -212,17 +214,6 @@ int DRW_object_visibility_in_active_context(const Object *ob)
   return BKE_object_visibility(ob, mode);
 }
 
-bool DRW_object_is_flat_normal(const Object *ob)
-{
-  if (ob->type == OB_MESH) {
-    const Mesh *me = ob->data;
-    if (me->mpoly && me->mpoly[0].flag & ME_SMOOTH) {
-      return false;
-    }
-  }
-  return true;
-}
-
 bool DRW_object_use_hide_faces(const struct Object *ob)
 {
   if (ob->type == OB_MESH) {
@@ -235,7 +226,7 @@ bool DRW_object_use_hide_faces(const struct Object *ob)
         return (me->editflag & ME_EDIT_PAINT_FACE_SEL) != 0;
       case OB_MODE_VERTEX_PAINT:
       case OB_MODE_WEIGHT_PAINT:
-        return (me->editflag & (ME_EDIT_PAINT_FACE_SEL | ME_EDIT_PAINT_VERT_SEL)) != 0;
+        return true;
     }
   }
 
@@ -1010,6 +1001,8 @@ static void drw_engines_init(void)
 
 static void drw_engines_cache_init(void)
 {
+  DRW_manager_begin_sync();
+
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     if (data->text_draw_cache) {
       DRW_text_cache_destroy(data->text_draw_cache);
@@ -1081,6 +1074,8 @@ static void drw_engines_cache_finish(void)
       engine->cache_finish(data);
     }
   }
+
+  DRW_manager_end_sync();
 }
 
 static void drw_engines_draw_scene(void)
@@ -1225,6 +1220,31 @@ static void drw_engines_enable_editors(void)
   }
 }
 
+static bool is_compositor_enabled(void)
+{
+  if (!U.experimental.use_realtime_compositor) {
+    return false;
+  }
+
+  if (!(DST.draw_ctx.v3d->shading.flag & V3D_SHADING_COMPOSITOR)) {
+    return false;
+  }
+
+  if (!(DST.draw_ctx.v3d->shading.type >= OB_MATERIAL)) {
+    return false;
+  }
+
+  if (!DST.draw_ctx.scene->use_nodes) {
+    return false;
+  }
+
+  if (!DST.draw_ctx.scene->nodetree) {
+    return false;
+  }
+
+  return true;
+}
+
 static void drw_engines_enable(ViewLayer *UNUSED(view_layer),
                                RenderEngineType *engine_type,
                                bool gpencil_engine_needed)
@@ -1237,6 +1257,11 @@ static void drw_engines_enable(ViewLayer *UNUSED(view_layer),
   if (gpencil_engine_needed && ((drawtype >= OB_SOLID) || !use_xray)) {
     use_drw_engine(&draw_engine_gpencil_type);
   }
+
+  if (is_compositor_enabled()) {
+    use_drw_engine(&draw_engine_compositor_type);
+  }
+
   drw_engines_enable_overlays();
 
 #ifdef WITH_DRAW_DEBUG
@@ -1299,13 +1324,14 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx)
   /* Reset before using it. */
   drw_state_prepare_clean_for_draw(&DST);
 
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .rv3d = rv3d,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .engine_type = engine_type,
       .depsgraph = depsgraph,
       .object_mode = OB_MODE_OBJECT,
@@ -1323,11 +1349,7 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx)
     drw_engines_enable(view_layer, engine_type, gpencil_engine_needed);
     drw_engines_data_validate();
 
-    DRW_ENABLED_ENGINE_ITER (DST.view_data_active, draw_engine, data) {
-      if (draw_engine->view_update) {
-        draw_engine->view_update(data);
-      }
-    }
+    DRW_view_data_engines_view_update(DST.view_data_active);
 
     drw_engines_disable();
   }
@@ -1356,13 +1378,14 @@ static void drw_notify_view_update_offscreen(struct Depsgraph *depsgraph,
     /* Reset before using it. */
     drw_state_prepare_clean_for_draw(&DST);
 
+    BKE_view_layer_synced_ensure(scene, view_layer);
     DST.draw_ctx = (DRWContextState){
         .region = region,
         .rv3d = rv3d,
         .v3d = v3d,
         .scene = scene,
         .view_layer = view_layer,
-        .obact = OBACT(view_layer),
+        .obact = BKE_view_layer_active_object_get(view_layer),
         .engine_type = engine_type,
         .depsgraph = depsgraph,
     };
@@ -1379,11 +1402,7 @@ static void drw_notify_view_update_offscreen(struct Depsgraph *depsgraph,
       drw_engines_enable(view_layer, engine_type, gpencil_engine_needed);
       drw_engines_data_validate();
 
-      DRW_ENABLED_ENGINE_ITER (DST.view_data_active, draw_engine, data) {
-        if (draw_engine->view_update) {
-          draw_engine->view_update(data);
-        }
-      }
+      DRW_view_data_engines_view_update(DST.view_data_active);
 
       drw_engines_disable();
     }
@@ -1608,11 +1627,11 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
                              GPUViewport *viewport,
                              const bContext *evil_C)
 {
-
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
   ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
   RegionView3D *rv3d = region->regiondata;
 
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx.evil_C = evil_C;
   DST.draw_ctx = (DRWContextState){
       .region = region,
@@ -1620,7 +1639,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .engine_type = engine_type,
       .depsgraph = depsgraph,
 
@@ -2127,12 +2146,13 @@ void DRW_draw_render_loop_2d_ex(struct Depsgraph *depsgraph,
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
   ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
 
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx.evil_C = evil_C;
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .depsgraph = depsgraph,
       .space_data = CTX_wm_space_data(evil_C),
 
@@ -2333,7 +2353,9 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
   RenderEngineType *engine_type = ED_view3d_engine_type(scene, v3d->shading.type);
   ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
-  Object *obact = OBACT(view_layer);
+
+  BKE_view_layer_synced_ensure(scene, view_layer);
+  Object *obact = BKE_view_layer_active_object_get(view_layer);
   Object *obedit = use_obedit_skip ? NULL : OBEDIT_FROM_OBACT(obact);
 #ifndef USE_GPU_SELECT
   UNUSED_VARS(scene, view_layer, v3d, region, rect);
@@ -2442,7 +2464,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
     drw_engines_world_update(scene);
 
     if (use_obedit) {
-      FOREACH_OBJECT_IN_MODE_BEGIN (view_layer, v3d, object_type, object_mode, ob_iter) {
+      FOREACH_OBJECT_IN_MODE_BEGIN (scene, view_layer, v3d, object_type, object_mode, ob_iter) {
         drw_engines_cache_populate(ob_iter);
       }
       FOREACH_OBJECT_IN_MODE_END;
@@ -2463,7 +2485,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
         }
 
         if (use_pose_exception && (ob->mode & OB_MODE_POSE)) {
-          if ((ob->base_flag & BASE_VISIBLE_VIEWLAYER) == 0) {
+          if ((ob->base_flag & BASE_ENABLED_AND_VISIBLE_IN_DEFAULT_VIEWPORT) == 0) {
             continue;
           }
         }
@@ -2564,13 +2586,14 @@ static void drw_draw_depth_loop_impl(struct Depsgraph *depsgraph,
   DST.options.is_depth = true;
 
   /* Instead of 'DRW_context_state_init(C, &DST.draw_ctx)', assign from args */
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .rv3d = rv3d,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .engine_type = engine_type,
       .depsgraph = depsgraph,
   };
@@ -2683,7 +2706,7 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
   GPUViewport *viewport = WM_draw_region_get_viewport(region);
   if (!viewport) {
     /* Selection engine requires a viewport.
-     * TODO(germano): This should be done internally in the engine. */
+     * TODO(@germano): This should be done internally in the engine. */
     sel_ctx->is_dirty = true;
     sel_ctx->objects_drawn_len = 0;
     sel_ctx->index_drawn_len = 1;
@@ -2697,13 +2720,14 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
   drw_state_prepare_clean_for_draw(&DST);
 
   /* Instead of 'DRW_context_state_init(C, &DST.draw_ctx)', assign from args */
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .rv3d = region->regiondata,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .depsgraph = depsgraph,
   };
   drw_task_graph_init();
@@ -2959,6 +2983,7 @@ void DRW_engines_register(void)
   DRW_engine_register(&draw_engine_overlay_type);
   DRW_engine_register(&draw_engine_select_type);
   DRW_engine_register(&draw_engine_basic_type);
+  DRW_engine_register(&draw_engine_compositor_type);
 #ifdef WITH_DRAW_DEBUG
   DRW_engine_register(&draw_engine_debug_select_type);
 #endif
@@ -2968,9 +2993,6 @@ void DRW_engines_register(void)
 
   /* setup callbacks */
   {
-    BKE_mball_batch_cache_dirty_tag_cb = DRW_mball_batch_cache_dirty_tag;
-    BKE_mball_batch_cache_free_cb = DRW_mball_batch_cache_free;
-
     BKE_curve_batch_cache_dirty_tag_cb = DRW_curve_batch_cache_dirty_tag;
     BKE_curve_batch_cache_free_cb = DRW_curve_batch_cache_free;
 
@@ -3039,6 +3061,9 @@ void DRW_engines_free(void)
   DRW_stats_free();
   DRW_globals_free();
 
+  drw_debug_module_free(DST.debug);
+  DST.debug = NULL;
+
   DRW_UBO_FREE_SAFE(G_draw.block_ubo);
   DRW_UBO_FREE_SAFE(G_draw.view_ubo);
   DRW_TEXTURE_FREE_SAFE(G_draw.ramp);
diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc
new file mode 100644
index 00000000000..169d86b2ea1
--- /dev/null
+++ b/source/blender/draw/intern/draw_manager.cc
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "BKE_global.h"
+#include "GPU_compute.h"
+
+#include "draw_debug.hh"
+#include "draw_defines.h"
+#include "draw_manager.h"
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader.h"
+
+namespace blender::draw {
+
+Manager::~Manager()
+{
+  for (GPUTexture *texture : acquired_textures) {
+    /* Decrease refcount and free if 0. */
+    GPU_texture_free(texture);
+  }
+}
+
+void Manager::begin_sync()
+{
+  /* TODO: This means the reference is kept until further redraw or manager tear-down. Instead,
+   * they should be released after each draw loop. But for now, mimics old DRW behavior. */
+  for (GPUTexture *texture : acquired_textures) {
+    /* Decrease refcount and free if 0. */
+    GPU_texture_free(texture);
+  }
+
+  acquired_textures.clear();
+
+#ifdef DEBUG
+  /* Detect uninitialized data. */
+  memset(matrix_buf.data(), 0xF0, resource_len_ * sizeof(*matrix_buf.data()));
+  memset(bounds_buf.data(), 0xF0, resource_len_ * sizeof(*bounds_buf.data()));
+  memset(infos_buf.data(), 0xF0, resource_len_ * sizeof(*infos_buf.data()));
+#endif
+  resource_len_ = 0;
+  attribute_len_ = 0;
+  /* TODO(fclem): Resize buffers if too big, but with an hysteresis threshold. */
+
+  object_active = DST.draw_ctx.obact;
+
+  /* Init the 0 resource. */
+  resource_handle(float4x4::identity());
+}
+
+void Manager::end_sync()
+{
+  GPU_debug_group_begin("Manager.end_sync");
+
+  matrix_buf.push_update();
+  bounds_buf.push_update();
+  infos_buf.push_update();
+  attributes_buf.push_update();
+  attributes_buf_legacy.push_update();
+
+  /* Useful for debugging the following resource finalize. But will trigger the drawing of the GPU
+   * debug draw/print buffers for every frame. Not nice for performance. */
+  // debug_bind();
+
+  /* Dispatch compute to finalize the resources on GPU. Save a bit of CPU time. */
+  uint thread_groups = divide_ceil_u(resource_len_, DRW_FINALIZE_GROUP_SIZE);
+  GPUShader *shader = DRW_shader_draw_resource_finalize_get();
+  GPU_shader_bind(shader);
+  GPU_shader_uniform_1i(shader, "resource_len", resource_len_);
+  GPU_storagebuf_bind(matrix_buf, GPU_shader_get_ssbo(shader, "matrix_buf"));
+  GPU_storagebuf_bind(bounds_buf, GPU_shader_get_ssbo(shader, "bounds_buf"));
+  GPU_storagebuf_bind(infos_buf, GPU_shader_get_ssbo(shader, "infos_buf"));
+  GPU_compute_dispatch(shader, thread_groups, 1, 1);
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  GPU_debug_group_end();
+}
+
+void Manager::debug_bind()
+{
+#ifdef DEBUG
+  if (DST.debug == nullptr) {
+    return;
+  }
+  GPU_storagebuf_bind(drw_debug_gpu_draw_buf_get(), DRW_DEBUG_DRAW_SLOT);
+  GPU_storagebuf_bind(drw_debug_gpu_print_buf_get(), DRW_DEBUG_PRINT_SLOT);
+#  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
+  /* Add a barrier to allow multiple shader writing to the same buffer. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+#  endif
+#endif
+}
+
+void Manager::resource_bind()
+{
+  GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
+  GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
+  GPU_storagebuf_bind(attributes_buf, DRW_OBJ_ATTR_SLOT);
+  /* 2 is the hardcoded location of the uniform attr UBO. */
+  /* TODO(@fclem): Remove this workaround. */
+  GPU_uniformbuf_bind(attributes_buf_legacy, 2);
+}
+
+void Manager::submit(PassSimple &pass, View &view)
+{
+  view.bind();
+
+  debug_bind();
+
+  command::RecordingState state;
+  state.inverted_view = view.is_inverted();
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
+
+  resource_bind();
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+void Manager::submit(PassMain &pass, View &view)
+{
+  view.bind();
+
+  debug_bind();
+
+  bool freeze_culling = (U.experimental.use_viewport_debug && DST.draw_ctx.v3d &&
+                         (DST.draw_ctx.v3d->debug_flag & V3D_DEBUG_FREEZE_CULLING) != 0);
+
+  view.compute_visibility(bounds_buf, resource_len_, freeze_culling);
+
+  command::RecordingState state;
+  state.inverted_view = view.is_inverted();
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_, view.visibility_buf_);
+
+  resource_bind();
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+void Manager::submit(PassSortable &pass, View &view)
+{
+  pass.sort();
+
+  this->submit(static_cast<PassMain &>(pass), view);
+}
+
+void Manager::submit(PassSimple &pass)
+{
+  debug_bind();
+
+  command::RecordingState state;
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
+
+  resource_bind();
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+Manager::SubmitDebugOutput Manager::submit_debug(PassSimple &pass, View &view)
+{
+  submit(pass, view);
+
+  pass.draw_commands_buf_.resource_id_buf_.read();
+
+  Manager::SubmitDebugOutput output;
+  output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
+                        pass.draw_commands_buf_.resource_id_count_};
+  /* There is no visibility data for PassSimple. */
+  output.visibility = {(uint *)view.visibility_buf_.data(), 0};
+  return output;
+}
+
+Manager::SubmitDebugOutput Manager::submit_debug(PassMain &pass, View &view)
+{
+  submit(pass, view);
+
+  GPU_finish();
+
+  pass.draw_commands_buf_.resource_id_buf_.read();
+  view.visibility_buf_.read();
+
+  Manager::SubmitDebugOutput output;
+  output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
+                        pass.draw_commands_buf_.resource_id_count_};
+  output.visibility = {(uint *)view.visibility_buf_.data(), divide_ceil_u(resource_len_, 32)};
+  return output;
+}
+
+Manager::DataDebugOutput Manager::data_debug()
+{
+  matrix_buf.read();
+  bounds_buf.read();
+  infos_buf.read();
+
+  Manager::DataDebugOutput output;
+  output.matrices = {matrix_buf.data(), resource_len_};
+  output.bounds = {bounds_buf.data(), resource_len_};
+  output.infos = {infos_buf.data(), resource_len_};
+  return output;
+}
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 6d384c599d8..4f71e665390 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -188,6 +188,7 @@ typedef enum {
   DRW_CMD_DRAW_INSTANCE = 2,
   DRW_CMD_DRAW_INSTANCE_RANGE = 3,
   DRW_CMD_DRAW_PROCEDURAL = 4,
+  DRW_CMD_DRAW_INDIRECT = 5,
 
   /* Compute Commands. */
   DRW_CMD_COMPUTE = 8,
@@ -203,7 +204,7 @@ typedef enum {
   /* Needs to fit in 4bits */
 } eDRWCommandType;
 
-#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL
+#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_INDIRECT
 
 typedef struct DRWCommandDraw {
   GPUBatch *batch;
@@ -232,6 +233,12 @@ typedef struct DRWCommandDrawInstanceRange {
   uint inst_count;
 } DRWCommandDrawInstanceRange;
 
+typedef struct DRWCommandDrawIndirect {
+  GPUBatch *batch;
+  DRWResourceHandle handle;
+  GPUStorageBuf *indirect_buf;
+} DRWCommandDrawIndirect;
+
 typedef struct DRWCommandCompute {
   int groups_x_len;
   int groups_y_len;
@@ -286,6 +293,7 @@ typedef union DRWCommand {
   DRWCommandDrawInstance instance;
   DRWCommandDrawInstanceRange instance_range;
   DRWCommandDrawProcedural procedural;
+  DRWCommandDrawIndirect draw_indirect;
   DRWCommandCompute compute;
   DRWCommandComputeRef compute_ref;
   DRWCommandComputeIndirect compute_indirect;
@@ -369,7 +377,7 @@ struct DRWUniform {
     /* DRW_UNIFORM_INT_COPY */
     int ivalue[4];
     /* DRW_UNIFORM_BLOCK_OBATTRS */
-    struct GPUUniformAttrList *uniform_attrs;
+    const struct GPUUniformAttrList *uniform_attrs;
   };
   int location;      /* Uniform location or binding point for textures and UBO's. */
   uint8_t type;      /* #DRWUniformType */
@@ -395,7 +403,7 @@ struct DRWShadingGroup {
       DRWResourceHandle pass_handle; /* Memblock key to parent pass. */
 
       /* Set of uniform attributes used by this shader. */
-      struct GPUUniformAttrList *uniform_attrs;
+      const struct GPUUniformAttrList *uniform_attrs;
     };
     /* This struct is used after cache populate if using the Z sorting.
      * It will not conflict with the above struct. */
@@ -493,20 +501,6 @@ typedef struct DRWCommandSmallChunk {
 BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16);
 #endif
 
-/* ------------- DRAW DEBUG ------------ */
-
-typedef struct DRWDebugLine {
-  struct DRWDebugLine *next; /* linked list */
-  float pos[2][3];
-  float color[4];
-} DRWDebugLine;
-
-typedef struct DRWDebugSphere {
-  struct DRWDebugSphere *next; /* linked list */
-  float mat[4][4];
-  float color[4];
-} DRWDebugSphere;
-
 /* ------------- Memory Pools ------------ */
 
 /* Contains memory pools information */
@@ -533,10 +527,12 @@ typedef struct DRWData {
   void *volume_grids_ubos; /* VolumeUniformBufPool */
   /** List of smoke textures to free after drawing. */
   ListBase smoke_textures;
-  /** Texture pool to reuse temp texture across engines. */
-  /* TODO(@fclem): The pool could be shared even between view-ports. */
+  /**
+   * Texture pool to reuse temp texture across engines.
+   * TODO(@fclem): The pool could be shared even between view-ports.
+   */
   struct DRWTexturePool *texture_pool;
-  /** Per stereo view data. Contains engine data and default framebuffers. */
+  /** Per stereo view data. Contains engine data and default frame-buffers. */
   struct DRWViewData *view_data[2];
   /** Per draw-call curves object data. */
   struct CurvesUniformBufPool *curves_ubos;
@@ -646,11 +642,7 @@ typedef struct DRWManager {
 
   GPUDrawList *draw_list;
 
-  struct {
-    /* TODO(@fclem): optimize: use chunks. */
-    DRWDebugLine *lines;
-    DRWDebugSphere *spheres;
-  } debug;
+  DRWDebugModule *debug;
 } DRWManager;
 
 extern DRWManager DST; /* TODO: get rid of this and allow multi-threaded rendering. */
@@ -665,6 +657,9 @@ void drw_state_set(DRWState state);
 
 void drw_debug_draw(void);
 void drw_debug_init(void);
+void drw_debug_module_free(DRWDebugModule *module);
+GPUStorageBuf *drw_debug_gpu_draw_buf_get(void);
+GPUStorageBuf *drw_debug_gpu_print_buf_get(void);
 
 eDRWCommandType command_type_get(const uint64_t *command_type_bits, int index);
 
@@ -683,9 +678,10 @@ void drw_resource_buffer_finish(DRWData *vmempool);
 GPUBatch *drw_cache_procedural_points_get(void);
 GPUBatch *drw_cache_procedural_lines_get(void);
 GPUBatch *drw_cache_procedural_triangles_get(void);
+GPUBatch *drw_cache_procedural_triangle_strips_get(void);
 
 void drw_uniform_attrs_pool_update(struct GHash *table,
-                                   struct GPUUniformAttrList *key,
+                                   const struct GPUUniformAttrList *key,
                                    DRWResourceHandle *handle,
                                    struct Object *ob,
                                    struct Object *dupli_parent,
@@ -698,6 +694,9 @@ bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_
 void drw_engine_data_cache_release(GPUViewport *viewport);
 void drw_engine_data_free(GPUViewport *viewport);
 
+void DRW_manager_begin_sync(void);
+void DRW_manager_end_sync(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh
new file mode 100644
index 00000000000..fbd3d28d3f4
--- /dev/null
+++ b/source/blender/draw/intern/draw_manager.hh
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * `draw::Manager` is the interface between scene data and viewport engines.
+ *
+ * It holds per component data (`ObjectInfo`, `ObjectMatrices`, ...) indexed per `ResourceHandle`.
+ *
+ * \note It is currently work in progress and should replace the old global draw manager.
+ */
+
+#include "BLI_listbase_wrapper.hh"
+#include "BLI_sys_types.h"
+#include "GPU_material.h"
+
+#include "draw_resource.hh"
+#include "draw_view.hh"
+
+#include <string>
+
+namespace blender::draw {
+
+/* Forward declarations. */
+
+namespace detail {
+template<typename T> class Pass;
+}  // namespace detail
+
+namespace command {
+class DrawCommandBuf;
+class DrawMultiBuf;
+}  // namespace command
+
+using PassSimple = detail::Pass<command::DrawCommandBuf>;
+using PassMain = detail::Pass<command::DrawMultiBuf>;
+class PassSortable;
+
+class Manager {
+  using ObjectMatricesBuf = StorageArrayBuffer<ObjectMatrices, 128>;
+  using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
+  using ObjectInfosBuf = StorageArrayBuffer<ObjectInfos, 128>;
+  using ObjectAttributeBuf = StorageArrayBuffer<ObjectAttribute, 128>;
+  /**
+   * TODO(@fclem): Remove once we get rid of old EEVEE code-base.
+   * `DRW_RESOURCE_CHUNK_LEN = 512`.
+   */
+  using ObjectAttributeLegacyBuf = UniformArrayBuffer<float4, 8 * 512>;
+
+ public:
+  struct SubmitDebugOutput {
+    /** Indexed by resource id. */
+    Span<uint32_t> visibility;
+    /** Indexed by drawn instance. */
+    Span<uint32_t> resource_id;
+  };
+
+  struct DataDebugOutput {
+    /** Indexed by resource id. */
+    Span<ObjectMatrices> matrices;
+    /** Indexed by resource id. */
+    Span<ObjectBounds> bounds;
+    /** Indexed by resource id. */
+    Span<ObjectInfos> infos;
+  };
+
+  /**
+   * Buffers containing all object data. Referenced by resource index.
+   * Exposed as public members for shader access after sync.
+   */
+  ObjectMatricesBuf matrix_buf;
+  ObjectBoundsBuf bounds_buf;
+  ObjectInfosBuf infos_buf;
+
+  /**
+   * Object Attributes are reference by indirection data inside ObjectInfos.
+   * This is because attribute list is arbitrary.
+   */
+  ObjectAttributeBuf attributes_buf;
+  /**
+   * TODO(@fclem): Remove once we get rid of old EEVEE code-base.
+   * Only here to satisfy bindings.
+   */
+  ObjectAttributeLegacyBuf attributes_buf_legacy;
+
+  /**
+   * List of textures coming from Image data-blocks.
+   * They need to be reference-counted in order to avoid being freed in another thread.
+   */
+  Vector<GPUTexture *> acquired_textures;
+
+ private:
+  /** Number of resource handle recorded. */
+  uint resource_len_ = 0;
+  /** Number of object attribute recorded. */
+  uint attribute_len_ = 0;
+
+  Object *object_active = nullptr;
+
+ public:
+  Manager(){};
+  ~Manager();
+
+  /**
+   * Create a new resource handle for the given object. Can be called multiple time with the
+   * same object **successively** without duplicating the data.
+   */
+  ResourceHandle resource_handle(const ObjectRef ref);
+  /**
+   * Get resource id for a loose matrix. The draw-calls for this resource handle won't be culled
+   * and there won't be any associated object info / bounds. Assumes correct handedness / winding.
+   */
+  ResourceHandle resource_handle(const float4x4 &model_matrix);
+  /**
+   * Get resource id for a loose matrix with bounds. The draw-calls for this resource handle will
+   * be culled bute there won't be any associated object info / bounds. Assumes correct handedness
+   * / winding.
+   */
+  ResourceHandle resource_handle(const float4x4 &model_matrix,
+                                 const float3 &bounds_center,
+                                 const float3 &bounds_half_extent);
+
+  /**
+   * Populate additional per resource data on demand.
+   */
+  void extract_object_attributes(ResourceHandle handle,
+                                 const ObjectRef &ref,
+                                 Span<GPUMaterial *> materials);
+
+  /**
+   * Submit a pass for drawing. All resource reference will be dereferenced and commands will be
+   * sent to GPU.
+   */
+  void submit(PassSimple &pass, View &view);
+  void submit(PassMain &pass, View &view);
+  void submit(PassSortable &pass, View &view);
+  /**
+   * Variant without any view. Must not contain any shader using `draw_view` create info.
+   */
+  void submit(PassSimple &pass);
+
+  /**
+   * Submit a pass for drawing but read back all data buffers for inspection.
+   */
+  SubmitDebugOutput submit_debug(PassSimple &pass, View &view);
+  SubmitDebugOutput submit_debug(PassMain &pass, View &view);
+
+  /**
+   * Check data buffers of the draw manager. Only to be used after end_sync().
+   */
+  DataDebugOutput data_debug();
+
+  /**
+   * Will acquire the texture using ref counting and release it after drawing. To be used for
+   * texture coming from blender Image.
+   */
+  void acquire_texture(GPUTexture *texture)
+  {
+    GPU_texture_ref(texture);
+    acquired_textures.append(texture);
+  }
+
+  /** TODO(fclem): The following should become private at some point. */
+  void begin_sync();
+  void end_sync();
+
+  void debug_bind();
+  void resource_bind();
+};
+
+inline ResourceHandle Manager::resource_handle(const ObjectRef ref)
+{
+  bool is_active_object = (ref.dupli_object ? ref.dupli_parent : ref.object) == object_active;
+  matrix_buf.get_or_resize(resource_len_).sync(*ref.object);
+  bounds_buf.get_or_resize(resource_len_).sync(*ref.object);
+  infos_buf.get_or_resize(resource_len_).sync(ref, is_active_object);
+  return ResourceHandle(resource_len_++, (ref.object->transflag & OB_NEG_SCALE) != 0);
+}
+
+inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix)
+{
+  matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
+  bounds_buf.get_or_resize(resource_len_).sync();
+  infos_buf.get_or_resize(resource_len_).sync();
+  return ResourceHandle(resource_len_++, false);
+}
+
+inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix,
+                                               const float3 &bounds_center,
+                                               const float3 &bounds_half_extent)
+{
+  matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
+  bounds_buf.get_or_resize(resource_len_).sync(bounds_center, bounds_half_extent);
+  infos_buf.get_or_resize(resource_len_).sync();
+  return ResourceHandle(resource_len_++, false);
+}
+
+inline void Manager::extract_object_attributes(ResourceHandle handle,
+                                               const ObjectRef &ref,
+                                               Span<GPUMaterial *> materials)
+{
+  ObjectInfos &infos = infos_buf.get_or_resize(handle.resource_index());
+  infos.object_attrs_offset = attribute_len_;
+
+  /* Simple cache solution to avoid duplicates. */
+  Vector<uint32_t, 4> hash_cache;
+
+  for (const GPUMaterial *mat : materials) {
+    const GPUUniformAttrList *attr_list = GPU_material_uniform_attributes(mat);
+    if (attr_list == nullptr) {
+      continue;
+    }
+
+    LISTBASE_FOREACH (const GPUUniformAttr *, attr, &attr_list->list) {
+      /** WATCH: Linear Search. Avoid duplicate attributes across materials. */
+      if ((mat != materials.first()) && (hash_cache.first_index_of_try(attr->hash_code) != -1)) {
+        /* Attribute has already been added to the attribute buffer by another material. */
+        continue;
+      }
+      hash_cache.append(attr->hash_code);
+      if (attributes_buf.get_or_resize(attribute_len_).sync(ref, *attr)) {
+        infos.object_attrs_len++;
+        attribute_len_++;
+      }
+    }
+  }
+}
+
+}  // namespace blender::draw
+
+/* TODO(@fclem): This is for testing. The manager should be passed to the engine through the
+ * callbacks. */
+blender::draw::Manager *DRW_manager_get();
+blender::draw::ObjectRef DRW_object_ref_get(Object *object);
diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c
index 188d9114cd7..c75049508f9 100644
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -17,9 +17,14 @@
 #include "BKE_pbvh.h"
 #include "BKE_volume.h"
 
+/* For debug cursor position. */
+#include "WM_api.h"
+#include "wm_window.h"
+
 #include "DNA_curve_types.h"
 #include "DNA_mesh_types.h"
 #include "DNA_meta_types.h"
+#include "DNA_screen_types.h"
 
 #include "BLI_alloca.h"
 #include "BLI_hash.h"
@@ -39,6 +44,16 @@
 
 #include "intern/gpu_codegen.h"
 
+/**
+ * IMPORTANT:
+ * In order to be able to write to the same print buffer sequentially, we add a barrier to allow
+ * multiple shader calls writing to the same buffer.
+ * However, this adds explicit synchronization events which might change the rest of the
+ * application behavior and hide some bugs. If you know you are using shader debug print in only
+ * one shader pass, you can comment this out to remove the aforementioned barrier.
+ */
+#define DISABLE_DEBUG_SHADER_PRINT_BARRIER
+
 /* -------------------------------------------------------------------- */
 /** \name Uniform Buffer Object (DRW_uniformbuffer)
  * \{ */
@@ -878,6 +893,17 @@ static void drw_command_draw_procedural(DRWShadingGroup *shgroup,
   cmd->vert_count = vert_count;
 }
 
+static void drw_command_draw_indirect(DRWShadingGroup *shgroup,
+                                      GPUBatch *batch,
+                                      DRWResourceHandle handle,
+                                      GPUStorageBuf *indirect_buf)
+{
+  DRWCommandDrawIndirect *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_INDIRECT);
+  cmd->batch = batch;
+  cmd->handle = handle;
+  cmd->indirect_buf = indirect_buf;
+}
+
 static void drw_command_set_select_id(DRWShadingGroup *shgroup, GPUVertBuf *buf, uint select_id)
 {
   /* Only one can be valid. */
@@ -1005,6 +1031,7 @@ void DRW_shgroup_call_compute_indirect(DRWShadingGroup *shgroup, GPUStorageBuf *
 
   drw_command_compute_indirect(shgroup, indirect_buf);
 }
+
 void DRW_shgroup_barrier(DRWShadingGroup *shgroup, eGPUBarrier type)
 {
   BLI_assert(GPU_compute_shader_support());
@@ -1044,6 +1071,38 @@ void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *shgroup, Object *ob,
   drw_shgroup_call_procedural_add_ex(shgroup, geom, ob, tri_count * 3);
 }
 
+void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup,
+                                          GPUPrimType primitive_type,
+                                          Object *ob,
+                                          GPUStorageBuf *indirect_buf)
+{
+  struct GPUBatch *geom = NULL;
+  switch (primitive_type) {
+    case GPU_PRIM_POINTS:
+      geom = drw_cache_procedural_points_get();
+      break;
+    case GPU_PRIM_LINES:
+      geom = drw_cache_procedural_lines_get();
+      break;
+    case GPU_PRIM_TRIS:
+      geom = drw_cache_procedural_triangles_get();
+      break;
+    case GPU_PRIM_TRI_STRIP:
+      geom = drw_cache_procedural_triangle_strips_get();
+      break;
+    default:
+      BLI_assert_msg(0,
+                     "Unsupported primitive type in DRW_shgroup_call_procedural_indirect. Add new "
+                     "one as needed.");
+      break;
+  }
+  if (G.f & G_FLAG_PICKSEL) {
+    drw_command_set_select_id(shgroup, NULL, DST.select_id);
+  }
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : NULL, ob);
+  drw_command_draw_indirect(shgroup, geom, handle, indirect_buf);
+}
+
 void DRW_shgroup_call_instances(DRWShadingGroup *shgroup,
                                 Object *ob,
                                 struct GPUBatch *geom,
@@ -1129,16 +1188,15 @@ static void sculpt_draw_cb(DRWSculptCallbackData *scd, GPU_PBVH_Buffers *buffers
       DRW_shgroup_uniform_vec3(
           shgrp, "materialDiffuseColor", SCULPT_DEBUG_COLOR(scd->debug_node_nr++), 1);
     }
+
     /* DRW_shgroup_call_no_cull reuses matrices calculations for all the drawcalls of this
      * object. */
     DRW_shgroup_call_no_cull(shgrp, geom, scd->ob);
   }
 }
 
-static void sculpt_debug_cb(void *user_data,
-                            const float bmin[3],
-                            const float bmax[3],
-                            PBVHNodeFlags flag)
+static void sculpt_debug_cb(
+    PBVHNode *node, void *user_data, const float bmin[3], const float bmax[3], PBVHNodeFlags flag)
 {
   int *debug_node_nr = (int *)user_data;
   BoundBox bb;
@@ -1153,7 +1211,10 @@ static void sculpt_debug_cb(void *user_data,
   }
 #else /* Color coded leaf bounds. */
   if (flag & PBVH_Leaf) {
-    DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR((*debug_node_nr)++));
+    int color = (*debug_node_nr)++;
+    color += BKE_pbvh_debug_draw_gen_get(node);
+
+    DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR(color));
   }
 #endif
 }
@@ -1246,8 +1307,8 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd)
     DRW_debug_modelmat(scd->ob->obmat);
     BKE_pbvh_draw_debug_cb(
         pbvh,
-        (void (*)(
-            void *d, const float min[3], const float max[3], PBVHNodeFlags f))sculpt_debug_cb,
+        (void (*)(PBVHNode * n, void *d, const float min[3], const float max[3], PBVHNodeFlags f))
+            sculpt_debug_cb,
         &debug_node_nr);
   }
 }
@@ -1466,6 +1527,27 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader)
         shgroup, view_ubo_location, DRW_UNIFORM_BLOCK, G_draw.view_ubo, 0, 0, 1);
   }
 
+#ifdef DEBUG
+  int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
+  if (debug_print_location != -1) {
+    GPUStorageBuf *buf = drw_debug_gpu_print_buf_get();
+    drw_shgroup_uniform_create_ex(
+        shgroup, debug_print_location, DRW_UNIFORM_STORAGE_BLOCK, buf, 0, 0, 1);
+#  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
+    /* Add a barrier to allow multiple shader writing to the same buffer. */
+    DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE);
+#  endif
+  }
+
+  int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
+  if (debug_draw_location != -1) {
+    GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get();
+    drw_shgroup_uniform_create_ex(
+        shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, 0, 0, 1);
+    /* NOTE(fclem): No barrier as ordering is not important. */
+  }
+#endif
+
   /* Not supported. */
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW_INV) == -1);
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW) == -1);
@@ -1556,7 +1638,7 @@ void DRW_shgroup_add_material_resources(DRWShadingGroup *grp, struct GPUMaterial
     DRW_shgroup_uniform_block(grp, GPU_UBO_BLOCK_NAME, ubo);
   }
 
-  GPUUniformAttrList *uattrs = GPU_material_uniform_attributes(material);
+  const GPUUniformAttrList *uattrs = GPU_material_uniform_attributes(material);
   if (uattrs != NULL) {
     int loc = GPU_shader_get_uniform_block_binding(grp->shader, GPU_ATTRIBUTE_UBO_BLOCK_NAME);
     drw_shgroup_uniform_create_ex(grp, loc, DRW_UNIFORM_BLOCK_OBATTRS, uattrs, 0, 0, 1);
@@ -1942,6 +2024,13 @@ DRWView *DRW_view_create(const float viewmat[4][4],
 
   copy_v4_fl4(view->storage.viewcamtexcofac, 1.0f, 1.0f, 0.0f, 0.0f);
 
+  if (DST.draw_ctx.evil_C && DST.draw_ctx.region) {
+    int region_origin[2] = {DST.draw_ctx.region->winrct.xmin, DST.draw_ctx.region->winrct.ymin};
+    struct wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C);
+    wm_cursor_position_get(win, &view->storage.mouse_pixel[0], &view->storage.mouse_pixel[1]);
+    sub_v2_v2v2_int(view->storage.mouse_pixel, view->storage.mouse_pixel, region_origin);
+  }
+
   DRW_view_update(view, viewmat, winmat, culling_viewmat, culling_winmat);
 
   return view;
@@ -2041,6 +2130,14 @@ void DRW_view_update(DRWView *view,
   draw_frustum_bound_sphere_calc(
       &view->frustum_corners, viewinv, winmat, wininv, &view->frustum_bsphere);
 
+  /* TODO(fclem): Deduplicate. */
+  for (int i = 0; i < 8; i++) {
+    copy_v3_v3(view->storage.frustum_corners[i], view->frustum_corners.vec[i]);
+  }
+  for (int i = 0; i < 6; i++) {
+    copy_v4_v4(view->storage.frustum_planes[i], view->frustum_planes[i]);
+  }
+
 #ifdef DRW_DEBUG_CULLING
   if (G.debug_value != 0) {
     DRW_debug_sphere(
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index e7e0e0ce41f..0e39cc1d3b9 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -318,6 +318,7 @@ void DRW_state_reset(void)
   DRW_state_reset_ex(DRW_STATE_DEFAULT);
 
   GPU_texture_unbind_all();
+  GPU_texture_image_unbind_all();
   GPU_uniformbuf_unbind_all();
   GPU_storagebuf_unbind_all();
 
@@ -874,6 +875,25 @@ static void draw_call_single_do(DRWShadingGroup *shgroup,
                         state->baseinst_loc);
 }
 
+/* Not to be mistaken with draw_indirect_call which does batch many drawcalls together. This one
+ * only execute an indirect drawcall with user indirect buffer. */
+static void draw_call_indirect(DRWShadingGroup *shgroup,
+                               DRWCommandsState *state,
+                               GPUBatch *batch,
+                               DRWResourceHandle handle,
+                               GPUStorageBuf *indirect_buf)
+{
+  draw_call_batching_flush(shgroup, state);
+  draw_call_resource_bind(state, &handle);
+
+  if (G.f & G_FLAG_PICKSEL) {
+    GPU_select_load_id(state->select_id);
+  }
+
+  GPU_batch_set_shader(batch, shgroup->shader);
+  GPU_batch_draw_indirect(batch, indirect_buf, 0);
+}
+
 static void draw_call_batching_start(DRWCommandsState *state)
 {
   state->neg_scale = false;
@@ -970,6 +990,7 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
       /* Unbinding can be costly. Skip in normal condition. */
       if (G.debug & G_DEBUG_GPU) {
         GPU_texture_unbind_all();
+        GPU_texture_image_unbind_all();
         GPU_uniformbuf_unbind_all();
         GPU_storagebuf_unbind_all();
       }
@@ -996,12 +1017,13 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
     while ((cmd = draw_command_iter_step(&iter, &cmd_type))) {
 
       switch (cmd_type) {
+        case DRW_CMD_DRAW_PROCEDURAL:
         case DRW_CMD_DRWSTATE:
         case DRW_CMD_STENCIL:
           draw_call_batching_flush(shgroup, &state);
           break;
         case DRW_CMD_DRAW:
-        case DRW_CMD_DRAW_PROCEDURAL:
+        case DRW_CMD_DRAW_INDIRECT:
         case DRW_CMD_DRAW_INSTANCE:
           if (draw_call_is_culled(&cmd->instance.handle, DST.view_active)) {
             continue;
@@ -1055,6 +1077,13 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
                               1,
                               true);
           break;
+        case DRW_CMD_DRAW_INDIRECT:
+          draw_call_indirect(shgroup,
+                             &state,
+                             cmd->draw_indirect.batch,
+                             cmd->draw_indirect.handle,
+                             cmd->draw_indirect.indirect_buf);
+          break;
         case DRW_CMD_DRAW_INSTANCE:
           draw_call_single_do(shgroup,
                               &state,
diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 4bc3898c5e7..1ada99093c6 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -297,6 +297,18 @@ GPUShader *DRW_shader_create_with_lib_ex(const char *vert,
   return sh;
 }
 
+GPUShader *DRW_shader_create_compute_with_shaderlib(const char *comp,
+                                                    const DRWShaderLibrary *lib,
+                                                    const char *defines,
+                                                    const char *name)
+{
+  char *comp_with_lib = DRW_shader_library_create_shader_string(lib, comp);
+  GPUShader *sh = GPU_shader_create_compute(comp_with_lib, NULL, defines, name);
+  MEM_SAFE_FREE(comp_with_lib);
+
+  return sh;
+}
+
 GPUShader *DRW_shader_create_with_shaderlib_ex(const char *vert,
                                                const char *geom,
                                                const char *frag,
diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh
new file mode 100644
index 00000000000..e1a0a6652ac
--- /dev/null
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -0,0 +1,1005 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Passes record draw commands. Commands are executed only when a pass is submitted for execution.
+ *
+ * `PassMain`:
+ * Should be used on heavy load passes such as ones that may contain scene objects. Draw call
+ * submission is optimized for large number of draw calls. But has a significant overhead per
+ * #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of
+ * commands. \note The draw call order inside a batch of multiple draw with the exact same state is
+ * not guaranteed and is not even deterministic. Use a #PassSimple or #PassSortable if ordering is
+ * needed. \note As of now, it is also quite limited in the type of draw command it can record
+ * (no custom vertex count, no custom first vertex).
+ *
+ * `PassSimple`:
+ * Does not have the overhead of #PassMain but does not have the culling and batching optimization.
+ * It should be used for passes that needs a few commands or that needs guaranteed draw call order.
+ *
+ * `Pass<T>::Sub`:
+ * A lightweight #Pass that lives inside a main #Pass. It can only be created from #Pass.sub()
+ * and is auto managed. This mean it can be created, filled and thrown away. A #PassSub reference
+ * is valid until the next #Pass.init() of the parent pass. Commands recorded inside a #PassSub are
+ * inserted inside the parent #Pass where the sub have been created during submission.
+ *
+ * `PassSortable`:
+ * This is a sort of `PassMain` augmented with a per sub-pass sorting value. They can't directly
+ * contain draw command, everything needs to be inside sub-passes. Sub-passes are automatically
+ * sorted before submission.
+ *
+ * \note A pass can be recorded once and resubmitted any number of time. This can be a good
+ * optimization for passes that are always the same for each frame. The only thing to be aware of
+ * is the life time of external resources. If a pass contains draw-calls with non default
+ * #ResourceHandle (not 0) or a reference to any non static resources
+ * (#GPUBatch, #PushConstant ref, #ResourceBind ref) it will have to be re-recorded
+ * if any of these reference becomes invalid.
+ */
+
+#include "BKE_image.h"
+#include "BLI_vector.hh"
+#include "DRW_gpu_wrapper.hh"
+#include "GPU_debug.h"
+#include "GPU_material.h"
+
+#include "draw_command.hh"
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader_shared.h"
+#include "draw_state.h"
+
+#include "intern/gpu_codegen.h"
+
+namespace blender::draw {
+
+using namespace blender::draw;
+using namespace blender::draw::command;
+
+class Manager;
+
+/* -------------------------------------------------------------------- */
+/** \name Pass API
+ * \{ */
+
+namespace detail {
+
+/**
+ * Special container that never moves allocated items and has fast indexing.
+ */
+template<typename T,
+         /** Numbers of element of type T to allocate together. */
+         int64_t block_size = 16>
+class SubPassVector {
+ private:
+  Vector<std::unique_ptr<Vector<T, block_size>>, 0> blocks_;
+
+ public:
+  void clear()
+  {
+    blocks_.clear();
+  }
+
+  int64_t append_and_get_index(T &&elem)
+  {
+    /* Do not go over the inline size so that existing members never move. */
+    if (blocks_.is_empty() || blocks_.last()->size() == block_size) {
+      blocks_.append(std::make_unique<Vector<T, block_size>>());
+    }
+    return blocks_.last()->append_and_get_index(std::move(elem)) +
+           (blocks_.size() - 1) * block_size;
+  }
+
+  T &operator[](int64_t index)
+  {
+    return (*blocks_[index / block_size])[index % block_size];
+  }
+
+  const T &operator[](int64_t index) const
+  {
+    return (*blocks_[index / block_size])[index % block_size];
+  }
+};
+
+/**
+ * Public API of a draw pass.
+ */
+template<
+    /** Type of command buffer used to create the draw calls. */
+    typename DrawCommandBufType>
+class PassBase {
+  friend Manager;
+
+  /** Will use texture own sampler state. */
+  static constexpr eGPUSamplerState sampler_auto = GPU_SAMPLER_MAX;
+
+ protected:
+  /** Highest level of the command stream. Split command stream in different command types. */
+  Vector<command::Header, 0> headers_;
+  /** Commands referenced by headers (which contains their types). */
+  Vector<command::Undetermined, 0> commands_;
+  /* Reference to draw commands buffer. Either own or from parent pass. */
+  DrawCommandBufType &draw_commands_buf_;
+  /* Reference to sub-pass commands buffer. Either own or from parent pass. */
+  SubPassVector<PassBase<DrawCommandBufType>> &sub_passes_;
+  /** Currently bound shader. Used for interface queries. */
+  GPUShader *shader_;
+
+ public:
+  const char *debug_name;
+
+  PassBase(const char *name,
+           DrawCommandBufType &draw_command_buf,
+           SubPassVector<PassBase<DrawCommandBufType>> &sub_passes,
+           GPUShader *shader = nullptr)
+      : draw_commands_buf_(draw_command_buf),
+        sub_passes_(sub_passes),
+        shader_(shader),
+        debug_name(name){};
+
+  /**
+   * Reset the pass command pool.
+   * \note Implemented in derived class. Not a virtual function to avoid indirection. Here only for
+   * API readability listing.
+   */
+  void init();
+
+  /**
+   * Create a sub-pass inside this pass.
+   */
+  PassBase<DrawCommandBufType> &sub(const char *name);
+
+  /**
+   * Changes the fixed function pipeline state.
+   * Starts as DRW_STATE_NO_DRAW at the start of a Pass submission.
+   * SubPass inherit previous pass state.
+   *
+   * IMPORTANT: This does not set the stencil mask/reference values. Add a call to state_stencil()
+   * to ensure correct behavior of stencil aware draws.
+   */
+  void state_set(DRWState state);
+
+  /**
+   * Clear the current frame-buffer.
+   */
+  void clear_color(float4 color);
+  void clear_depth(float depth);
+  void clear_stencil(uint8_t stencil);
+  void clear_depth_stencil(float depth, uint8_t stencil);
+  void clear_color_depth_stencil(float4 color, float depth, uint8_t stencil);
+
+  /**
+   * Reminders:
+   * - (compare_mask & reference) is what is tested against (compare_mask & stencil_value)
+   *   stencil_value being the value stored in the stencil buffer.
+   * - (write-mask & reference) is what gets written if the test condition is fulfilled.
+   */
+  void state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask);
+
+  /**
+   * Bind a shader. Any following bind() or push_constant() call will use its interface.
+   */
+  void shader_set(GPUShader *shader);
+
+  /**
+   * Bind a material shader along with its associated resources. Any following bind() or
+   * push_constant() call will use its interface.
+   * IMPORTANT: Assumes material is compiled and can be used (no compilation error).
+   */
+  void material_set(Manager &manager, GPUMaterial *material);
+
+  /**
+   * Record a draw call.
+   * \note Setting the count or first to -1 will use the values from the batch.
+   * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded.
+   */
+  void draw(GPUBatch *batch,
+            uint instance_len = -1,
+            uint vertex_len = -1,
+            uint vertex_first = -1,
+            ResourceHandle handle = {0});
+
+  /**
+   * Shorter version for the common case.
+   * \note Implemented in derived class. Not a virtual function to avoid indirection.
+   */
+  void draw(GPUBatch *batch, ResourceHandle handle);
+
+  /**
+   * Record a procedural draw call. Geometry is **NOT** source from a GPUBatch.
+   * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded.
+   */
+  void draw_procedural(GPUPrimType primitive,
+                       uint instance_len,
+                       uint vertex_len,
+                       uint vertex_first = -1,
+                       ResourceHandle handle = {0});
+
+  /**
+   * Indirect variants.
+   * \note If needed, the resource id need to also be set accordingly in the DrawCommand.
+   */
+  void draw_indirect(GPUBatch *batch,
+                     StorageBuffer<DrawCommand, true> &indirect_buffer,
+                     ResourceHandle handle = {0});
+  void draw_procedural_indirect(GPUPrimType primitive,
+                                StorageBuffer<DrawCommand, true> &indirect_buffer,
+                                ResourceHandle handle = {0});
+
+  /**
+   * Record a compute dispatch call.
+   */
+  void dispatch(int3 group_len);
+  void dispatch(int3 *group_len);
+  void dispatch(StorageBuffer<DispatchCommand> &indirect_buffer);
+
+  /**
+   * Record a barrier call to synchronize arbitrary load/store operation between draw calls.
+   */
+  void barrier(eGPUBarrier type);
+
+  /**
+   * Bind a shader resource.
+   *
+   * Reference versions are to be used when the resource might be resize / realloc or even change
+   * between the time it is referenced and the time it is dereferenced for drawing.
+   *
+   * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
+   * still alive until pass submission.
+   *
+   * \note Variations using slot will not query a shader interface and can be used before
+   * binding a shader.
+   */
+  void bind_image(const char *name, GPUTexture *image);
+  void bind_image(const char *name, GPUTexture **image);
+  void bind_image(int slot, GPUTexture *image);
+  void bind_image(int slot, GPUTexture **image);
+  void bind_texture(const char *name, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(const char *name, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
+  void bind_ssbo(const char *name, GPUStorageBuf *buffer);
+  void bind_ssbo(const char *name, GPUStorageBuf **buffer);
+  void bind_ssbo(int slot, GPUStorageBuf *buffer);
+  void bind_ssbo(int slot, GPUStorageBuf **buffer);
+  void bind_ubo(const char *name, GPUUniformBuf *buffer);
+  void bind_ubo(const char *name, GPUUniformBuf **buffer);
+  void bind_ubo(int slot, GPUUniformBuf *buffer);
+  void bind_ubo(int slot, GPUUniformBuf **buffer);
+
+  /**
+   * Update a shader constant.
+   *
+   * Reference versions are to be used when the resource might change between the time it is
+   * referenced and the time it is dereferenced for drawing.
+   *
+   * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
+   * still alive until pass submission.
+   *
+   * \note bool reference version is expected to take bool1 reference which is aliased to int.
+   */
+  void push_constant(const char *name, const float &data);
+  void push_constant(const char *name, const float2 &data);
+  void push_constant(const char *name, const float3 &data);
+  void push_constant(const char *name, const float4 &data);
+  void push_constant(const char *name, const int &data);
+  void push_constant(const char *name, const int2 &data);
+  void push_constant(const char *name, const int3 &data);
+  void push_constant(const char *name, const int4 &data);
+  void push_constant(const char *name, const bool &data);
+  void push_constant(const char *name, const float4x4 &data);
+  void push_constant(const char *name, const float *data, int array_len = 1);
+  void push_constant(const char *name, const float2 *data, int array_len = 1);
+  void push_constant(const char *name, const float3 *data, int array_len = 1);
+  void push_constant(const char *name, const float4 *data, int array_len = 1);
+  void push_constant(const char *name, const int *data, int array_len = 1);
+  void push_constant(const char *name, const int2 *data, int array_len = 1);
+  void push_constant(const char *name, const int3 *data, int array_len = 1);
+  void push_constant(const char *name, const int4 *data, int array_len = 1);
+  void push_constant(const char *name, const float4x4 *data);
+
+  /**
+   * Turn the pass into a string for inspection.
+   */
+  std::string serialize(std::string line_prefix = "") const;
+
+  friend std::ostream &operator<<(std::ostream &stream, const PassBase &pass)
+  {
+    return stream << pass.serialize();
+  }
+
+ protected:
+  /**
+   * Internal Helpers
+   */
+
+  int push_constant_offset(const char *name);
+
+  void clear(eGPUFrameBufferBits planes, float4 color, float depth, uint8_t stencil);
+
+  GPUBatch *procedural_batch_get(GPUPrimType primitive);
+
+  /**
+   * Return a new command recorded with the given type.
+   */
+  command::Undetermined &create_command(command::Type type);
+
+  void submit(command::RecordingState &state) const;
+};
+
+template<typename DrawCommandBufType> class Pass : public detail::PassBase<DrawCommandBufType> {
+ public:
+  using Sub = detail::PassBase<DrawCommandBufType>;
+
+ private:
+  /** Sub-passes referenced by headers. */
+  SubPassVector<detail::PassBase<DrawCommandBufType>> sub_passes_main_;
+  /** Draws are recorded as indirect draws for compatibility with the multi-draw pipeline. */
+  DrawCommandBufType draw_commands_buf_main_;
+
+ public:
+  Pass(const char *name)
+      : detail::PassBase<DrawCommandBufType>(name, draw_commands_buf_main_, sub_passes_main_){};
+
+  void init()
+  {
+    this->headers_.clear();
+    this->commands_.clear();
+    this->sub_passes_.clear();
+    this->draw_commands_buf_.clear();
+  }
+};  // namespace blender::draw
+
+}  // namespace detail
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Pass types
+ * \{ */
+
+/**
+ * Normal pass type. No visibility or draw-call optimization.
+ */
+// using PassSimple = detail::Pass<DrawCommandBuf>;
+
+/**
+ * Main pass type.
+ * Optimized for many draw calls and sub-pass.
+ *
+ * IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially
+ * high overhead due to batching and culling optimizations.
+ */
+// using PassMain = detail::Pass<DrawMultiBuf>;
+
+/**
+ * Special pass type for rendering transparent objects.
+ * The base level can only be composed of sub passes that will be ordered by a sorting value.
+ */
+class PassSortable : public PassMain {
+  friend Manager;
+
+ private:
+  /** Sorting value associated with each sub pass. */
+  Vector<float> sorting_values_;
+
+  bool sorted_ = false;
+
+ public:
+  PassSortable(const char *name_) : PassMain(name_){};
+
+  void init()
+  {
+    sorting_values_.clear();
+    sorted_ = false;
+    PassMain::init();
+  }
+
+  PassMain::Sub &sub(const char *name, float sorting_value)
+  {
+    int64_t index = sub_passes_.append_and_get_index(
+        PassBase(name, draw_commands_buf_, sub_passes_, shader_));
+    headers_.append({Type::SubPass, static_cast<uint>(index)});
+    sorting_values_.append(sorting_value);
+    return sub_passes_[index];
+  }
+
+  std::string serialize(std::string line_prefix = "") const
+  {
+    if (sorted_ == false) {
+      const_cast<PassSortable *>(this)->sort();
+    }
+    return PassMain::serialize(line_prefix);
+  }
+
+ protected:
+  void sort()
+  {
+    if (sorted_ == false) {
+      std::sort(headers_.begin(), headers_.end(), [&](Header &a, Header &b) {
+        BLI_assert(a.type == Type::SubPass && b.type == Type::SubPass);
+        float a_val = sorting_values_[a.index];
+        float b_val = sorting_values_[b.index];
+        return a_val < b_val || (a_val == b_val && a.index < b.index);
+      });
+      sorted_ = true;
+    }
+  }
+};
+
+/** \} */
+
+namespace detail {
+
+/* -------------------------------------------------------------------- */
+/** \name PassBase Implementation
+ * \{ */
+
+template<class T> inline command::Undetermined &PassBase<T>::create_command(command::Type type)
+{
+  int64_t index = commands_.append_and_get_index({});
+  headers_.append({type, static_cast<uint>(index)});
+  return commands_[index];
+}
+
+template<class T>
+inline void PassBase<T>::clear(eGPUFrameBufferBits planes,
+                               float4 color,
+                               float depth,
+                               uint8_t stencil)
+{
+  create_command(command::Type::Clear).clear = {(uint8_t)planes, stencil, depth, color};
+}
+
+template<class T> inline GPUBatch *PassBase<T>::procedural_batch_get(GPUPrimType primitive)
+{
+  switch (primitive) {
+    case GPU_PRIM_POINTS:
+      return drw_cache_procedural_points_get();
+    case GPU_PRIM_LINES:
+      return drw_cache_procedural_lines_get();
+    case GPU_PRIM_TRIS:
+      return drw_cache_procedural_triangles_get();
+    case GPU_PRIM_TRI_STRIP:
+      return drw_cache_procedural_triangle_strips_get();
+    default:
+      /* Add new one as needed. */
+      BLI_assert_unreachable();
+      return nullptr;
+  }
+}
+
+template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name)
+{
+  int64_t index = sub_passes_.append_and_get_index(
+      PassBase(name, draw_commands_buf_, sub_passes_, shader_));
+  headers_.append({command::Type::SubPass, static_cast<uint>(index)});
+  return sub_passes_[index];
+}
+
+template<class T> void PassBase<T>::submit(command::RecordingState &state) const
+{
+  GPU_debug_group_begin(debug_name);
+
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        sub_passes_[header.index].submit(state);
+        break;
+      case command::Type::ShaderBind:
+        commands_[header.index].shader_bind.execute(state);
+        break;
+      case command::Type::ResourceBind:
+        commands_[header.index].resource_bind.execute();
+        break;
+      case command::Type::PushConstant:
+        commands_[header.index].push_constant.execute(state);
+        break;
+      case command::Type::Draw:
+        commands_[header.index].draw.execute(state);
+        break;
+      case command::Type::DrawMulti:
+        commands_[header.index].draw_multi.execute(state);
+        break;
+      case command::Type::DrawIndirect:
+        commands_[header.index].draw_indirect.execute(state);
+        break;
+      case command::Type::Dispatch:
+        commands_[header.index].dispatch.execute(state);
+        break;
+      case command::Type::DispatchIndirect:
+        commands_[header.index].dispatch_indirect.execute(state);
+        break;
+      case command::Type::Barrier:
+        commands_[header.index].barrier.execute();
+        break;
+      case command::Type::Clear:
+        commands_[header.index].clear.execute();
+        break;
+      case command::Type::StateSet:
+        commands_[header.index].state_set.execute(state);
+        break;
+      case command::Type::StencilSet:
+        commands_[header.index].stencil_set.execute();
+        break;
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
+template<class T> std::string PassBase<T>::serialize(std::string line_prefix) const
+{
+  std::stringstream ss;
+  ss << line_prefix << "." << debug_name << std::endl;
+  line_prefix += "  ";
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        ss << sub_passes_[header.index].serialize(line_prefix);
+        break;
+      case Type::ShaderBind:
+        ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl;
+        break;
+      case Type::ResourceBind:
+        ss << line_prefix << commands_[header.index].resource_bind.serialize() << std::endl;
+        break;
+      case Type::PushConstant:
+        ss << line_prefix << commands_[header.index].push_constant.serialize() << std::endl;
+        break;
+      case Type::Draw:
+        ss << line_prefix << commands_[header.index].draw.serialize() << std::endl;
+        break;
+      case Type::DrawMulti:
+        ss << commands_[header.index].draw_multi.serialize(line_prefix);
+        break;
+      case Type::DrawIndirect:
+        ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl;
+        break;
+      case Type::Dispatch:
+        ss << line_prefix << commands_[header.index].dispatch.serialize() << std::endl;
+        break;
+      case Type::DispatchIndirect:
+        ss << line_prefix << commands_[header.index].dispatch_indirect.serialize() << std::endl;
+        break;
+      case Type::Barrier:
+        ss << line_prefix << commands_[header.index].barrier.serialize() << std::endl;
+        break;
+      case Type::Clear:
+        ss << line_prefix << commands_[header.index].clear.serialize() << std::endl;
+        break;
+      case Type::StateSet:
+        ss << line_prefix << commands_[header.index].state_set.serialize() << std::endl;
+        break;
+      case Type::StencilSet:
+        ss << line_prefix << commands_[header.index].stencil_set.serialize() << std::endl;
+        break;
+    }
+  }
+  return ss.str();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw calls
+ * \{ */
+
+template<class T>
+inline void PassBase<T>::draw(
+    GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle)
+{
+  if (instance_len == 0 || vertex_len == 0) {
+    return;
+  }
+  BLI_assert(shader_);
+  draw_commands_buf_.append_draw(
+      headers_, commands_, batch, instance_len, vertex_len, vertex_first, handle);
+}
+
+template<class T> inline void PassBase<T>::draw(GPUBatch *batch, ResourceHandle handle)
+{
+  this->draw(batch, -1, -1, -1, handle);
+}
+
+template<class T>
+inline void PassBase<T>::draw_procedural(GPUPrimType primitive,
+                                         uint instance_len,
+                                         uint vertex_len,
+                                         uint vertex_first,
+                                         ResourceHandle handle)
+{
+  this->draw(procedural_batch_get(primitive), instance_len, vertex_len, vertex_first, handle);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Indirect draw calls
+ * \{ */
+
+template<class T>
+inline void PassBase<T>::draw_indirect(GPUBatch *batch,
+                                       StorageBuffer<DrawCommand, true> &indirect_buffer,
+                                       ResourceHandle handle)
+{
+  BLI_assert(shader_);
+  create_command(Type::DrawIndirect).draw_indirect = {batch, &indirect_buffer, handle};
+}
+
+template<class T>
+inline void PassBase<T>::draw_procedural_indirect(
+    GPUPrimType primitive,
+    StorageBuffer<DrawCommand, true> &indirect_buffer,
+    ResourceHandle handle)
+{
+  this->draw_indirect(procedural_batch_get(primitive), indirect_buffer, handle);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Compute Dispatch Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::dispatch(int3 group_len)
+{
+  BLI_assert(shader_);
+  create_command(Type::Dispatch).dispatch = {group_len};
+}
+
+template<class T> inline void PassBase<T>::dispatch(int3 *group_len)
+{
+  BLI_assert(shader_);
+  create_command(Type::Dispatch).dispatch = {group_len};
+}
+
+template<class T>
+inline void PassBase<T>::dispatch(StorageBuffer<DispatchCommand> &indirect_buffer)
+{
+  BLI_assert(shader_);
+  create_command(Type::DispatchIndirect).dispatch_indirect = {&indirect_buffer};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Clear Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::clear_color(float4 color)
+{
+  this->clear(GPU_COLOR_BIT, color, 0.0f, 0);
+}
+
+template<class T> inline void PassBase<T>::clear_depth(float depth)
+{
+  this->clear(GPU_DEPTH_BIT, float4(0.0f), depth, 0);
+}
+
+template<class T> inline void PassBase<T>::clear_stencil(uint8_t stencil)
+{
+  this->clear(GPU_STENCIL_BIT, float4(0.0f), 0.0f, stencil);
+}
+
+template<class T> inline void PassBase<T>::clear_depth_stencil(float depth, uint8_t stencil)
+{
+  this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT, float4(0.0f), depth, stencil);
+}
+
+template<class T>
+inline void PassBase<T>::clear_color_depth_stencil(float4 color, float depth, uint8_t stencil)
+{
+  this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT | GPU_COLOR_BIT, color, depth, stencil);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Barrier Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::barrier(eGPUBarrier type)
+{
+  create_command(Type::Barrier).barrier = {type};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name State Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::state_set(DRWState state)
+{
+  create_command(Type::StateSet).state_set = {state};
+}
+
+template<class T>
+inline void PassBase<T>::state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask)
+{
+  create_command(Type::StencilSet).stencil_set = {write_mask, reference, compare_mask};
+}
+
+template<class T> inline void PassBase<T>::shader_set(GPUShader *shader)
+{
+  shader_ = shader;
+  create_command(Type::ShaderBind).shader_bind = {shader};
+}
+
+template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
+{
+  GPUPass *gpupass = GPU_material_get_pass(material);
+  shader_set(GPU_pass_shader_get(gpupass));
+
+  /* Bind all textures needed by the material. */
+  ListBase textures = GPU_material_textures(material);
+  for (GPUMaterialTexture *tex : ListBaseWrapper<GPUMaterialTexture>(textures)) {
+    if (tex->ima) {
+      /* Image */
+      ImageUser *iuser = tex->iuser_available ? &tex->iuser : nullptr;
+      if (tex->tiled_mapping_name[0]) {
+        GPUTexture *tiles = BKE_image_get_gpu_tiles(tex->ima, iuser, nullptr);
+        manager.acquire_texture(tiles);
+        bind_texture(tex->sampler_name, tiles, (eGPUSamplerState)tex->sampler_state);
+
+        GPUTexture *tile_map = BKE_image_get_gpu_tilemap(tex->ima, iuser, nullptr);
+        manager.acquire_texture(tile_map);
+        bind_texture(tex->tiled_mapping_name, tile_map, (eGPUSamplerState)tex->sampler_state);
+      }
+      else {
+        GPUTexture *texture = BKE_image_get_gpu_texture(tex->ima, iuser, nullptr);
+        manager.acquire_texture(texture);
+        bind_texture(tex->sampler_name, texture, (eGPUSamplerState)tex->sampler_state);
+      }
+    }
+    else if (tex->colorband) {
+      /* Color Ramp */
+      bind_texture(tex->sampler_name, *tex->colorband);
+    }
+  }
+
+  GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material);
+  if (ubo != nullptr) {
+    bind_ubo(GPU_UBO_BLOCK_NAME, ubo);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Resource bind Implementation
+ * \{ */
+
+template<class T> inline int PassBase<T>::push_constant_offset(const char *name)
+{
+  return GPU_shader_get_uniform(shader_, name);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf *buffer)
+{
+  this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf *buffer)
+{
+  this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(const char *name,
+                                      GPUTexture *texture,
+                                      eGPUSamplerState state)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
+}
+
+template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture *image)
+{
+  this->bind_image(GPU_shader_get_texture_binding(shader_, name), image);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf *buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf *buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
+}
+
+template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture *image)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf **buffer)
+{
+  this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf **buffer)
+{
+  this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(const char *name,
+                                      GPUTexture **texture,
+                                      eGPUSamplerState state)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
+}
+
+template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture **image)
+{
+  this->bind_image(GPU_shader_get_texture_binding(shader_, name), image);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf **buffer)
+{
+
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf **buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
+}
+
+template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture **image)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Push Constant Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float2 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float3 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int2 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int3 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int4 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const bool &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float2 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float3 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float4 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int2 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int3 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int4 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 *data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 &data)
+{
+  /* WORKAROUND: Push 3 consecutive commands to hold the 64 bytes of the float4x4.
+   * This assumes that all commands are always stored in flat array of memory. */
+  Undetermined commands[3];
+
+  PushConstant &cmd = commands[0].push_constant;
+  cmd.location = push_constant_offset(name);
+  cmd.array_len = 1;
+  cmd.comp_len = 16;
+  cmd.type = PushConstant::Type::FloatValue;
+  /* Copy overrides the next 2 commands. We append them as Type::None to not evaluate them. */
+  *reinterpret_cast<float4x4 *>(&cmd.float4_value) = data;
+
+  create_command(Type::PushConstant) = commands[0];
+  create_command(Type::None) = commands[1];
+  create_command(Type::None) = commands[2];
+}
+
+/** \} */
+
+}  // namespace detail
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_resource.cc b/source/blender/draw/intern/draw_resource.cc
new file mode 100644
index 00000000000..689df4edb31
--- /dev/null
+++ b/source/blender/draw/intern/draw_resource.cc
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "DNA_particle_types.h"
+#include "RNA_access.h"
+#include "RNA_path.h"
+#include "RNA_types.h"
+
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_shader_shared.h"
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectAttributes
+ * \{ */
+
+/**
+ * Extract object attribute from RNA property.
+ * Returns true if the attribute was correctly extracted.
+ * This function mirrors lookup_property in cycles/blender/blender_object.cpp
+ */
+bool ObjectAttribute::id_property_lookup(ID *id, const char *name)
+{
+  PointerRNA ptr, id_ptr;
+  PropertyRNA *prop;
+
+  if (id == nullptr) {
+    return false;
+  }
+
+  RNA_id_pointer_create(id, &id_ptr);
+
+  if (!RNA_path_resolve(&id_ptr, name, &ptr, &prop)) {
+    return false;
+  }
+
+  if (prop == nullptr) {
+    return false;
+  }
+
+  PropertyType type = RNA_property_type(prop);
+  int array_len = RNA_property_array_length(&ptr, prop);
+
+  if (array_len == 0) {
+    float value;
+
+    if (type == PROP_FLOAT) {
+      value = RNA_property_float_get(&ptr, prop);
+    }
+    else if (type == PROP_INT) {
+      value = RNA_property_int_get(&ptr, prop);
+    }
+    else {
+      return false;
+    }
+
+    *reinterpret_cast<float4 *>(&data_x) = float4(value, value, value, 1.0f);
+    return true;
+  }
+
+  if (type == PROP_FLOAT && array_len <= 4) {
+    *reinterpret_cast<float4 *>(&data_x) = float4(0.0f, 0.0f, 0.0f, 1.0f);
+    RNA_property_float_get_array(&ptr, prop, &data_x);
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Go through all possible source of the given object uniform attribute.
+ * Returns true if the attribute was correctly filled.
+ * This function mirrors lookup_instance_property in cycles/blender/blender_object.cpp
+ */
+bool ObjectAttribute::sync(const blender::draw::ObjectRef &ref, const GPUUniformAttr &attr)
+{
+  hash_code = attr.hash_code;
+
+  /* If requesting instance data, check the parent particle system and object. */
+  if (attr.use_dupli) {
+    if ((ref.dupli_object != nullptr) && (ref.dupli_object->particle_system != nullptr)) {
+      ParticleSettings *settings = ref.dupli_object->particle_system->part;
+      if (this->id_property_lookup((ID *)settings, attr.name_id_prop) ||
+          this->id_property_lookup((ID *)settings, attr.name)) {
+        return true;
+      }
+    }
+    if (this->id_property_lookup((ID *)ref.dupli_parent, attr.name_id_prop) ||
+        this->id_property_lookup((ID *)ref.dupli_parent, attr.name)) {
+      return true;
+    }
+  }
+
+  /* Check the object and mesh. */
+  if (ref.object != nullptr) {
+    if (this->id_property_lookup((ID *)ref.object, attr.name_id_prop) ||
+        this->id_property_lookup((ID *)ref.object, attr.name) ||
+        this->id_property_lookup((ID *)ref.object->data, attr.name_id_prop) ||
+        this->id_property_lookup((ID *)ref.object->data, attr.name)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_resource.hh b/source/blender/draw/intern/draw_resource.hh
new file mode 100644
index 00000000000..2df38e32ed2
--- /dev/null
+++ b/source/blender/draw/intern/draw_resource.hh
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Component / Object level resources like object attributes, matrices, visibility etc...
+ * Each of them are reference by resource index (#ResourceHandle).
+ */
+
+#include "BKE_curve.h"
+#include "BKE_duplilist.h"
+#include "BKE_mesh.h"
+#include "BKE_object.h"
+#include "BKE_volume.h"
+#include "BLI_hash.h"
+#include "DNA_curve_types.h"
+#include "DNA_layer_types.h"
+#include "DNA_meta_types.h"
+#include "DNA_object_types.h"
+
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_shader_shared.h"
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectMatrices
+ * \{ */
+
+inline void ObjectMatrices::sync(const Object &object)
+{
+  model = object.obmat;
+  model_inverse = object.imat;
+}
+
+inline void ObjectMatrices::sync(const float4x4 &model_matrix)
+{
+  model = model_matrix;
+  model_inverse = model_matrix.inverted();
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectMatrices &matrices)
+{
+  stream << "ObjectMatrices(" << std::endl;
+  stream << "model=" << matrices.model << ", " << std::endl;
+  stream << "model_inverse=" << matrices.model_inverse << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectInfos
+ * \{ */
+
+ENUM_OPERATORS(eObjectInfoFlag, OBJECT_NEGATIVE_SCALE)
+
+inline void ObjectInfos::sync()
+{
+  object_attrs_len = 0;
+  object_attrs_offset = 0;
+
+  flag = eObjectInfoFlag::OBJECT_NO_INFO;
+}
+
+inline void ObjectInfos::sync(const blender::draw::ObjectRef ref, bool is_active_object)
+{
+  object_attrs_len = 0;
+  object_attrs_offset = 0;
+
+  color = ref.object->color;
+  index = ref.object->index;
+  SET_FLAG_FROM_TEST(flag, is_active_object, eObjectInfoFlag::OBJECT_ACTIVE);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_SELECTED, eObjectInfoFlag::OBJECT_SELECTED);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_FROM_DUPLI, eObjectInfoFlag::OBJECT_FROM_DUPLI);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_FROM_SET, eObjectInfoFlag::OBJECT_FROM_SET);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->transflag & OB_NEG_SCALE, eObjectInfoFlag::OBJECT_NEGATIVE_SCALE);
+
+  if (ref.dupli_object == nullptr) {
+    /* TODO(fclem): this is rather costly to do at draw time. Maybe we can
+     * put it in ob->runtime and make depsgraph ensure it is up to date. */
+    random = BLI_hash_int_2d(BLI_hash_string(ref.object->id.name + 2), 0) *
+             (1.0f / (float)0xFFFFFFFF);
+  }
+  else {
+    random = ref.dupli_object->random_id * (1.0f / (float)0xFFFFFFFF);
+  }
+  /* Default values. Set if needed. */
+  random = 0.0f;
+
+  if (ref.object->data == nullptr) {
+    orco_add = float3(0.0f);
+    orco_mul = float3(1.0f);
+    return;
+  }
+
+  switch (GS(reinterpret_cast<ID *>(ref.object->data)->name)) {
+    case ID_VO: {
+      BoundBox &bbox = *BKE_volume_boundbox_get(ref.object);
+      orco_add = (float3(bbox.vec[6]) + float3(bbox.vec[0])) * 0.5f; /* Center. */
+      orco_mul = float3(bbox.vec[6]) - float3(bbox.vec[0]);          /* Size. */
+      break;
+    }
+    case ID_ME: {
+      BKE_mesh_texspace_get(static_cast<Mesh *>(ref.object->data), orco_add, orco_mul);
+      break;
+    }
+    case ID_CU_LEGACY: {
+      Curve &cu = *static_cast<Curve *>(ref.object->data);
+      BKE_curve_texspace_ensure(&cu);
+      orco_add = cu.loc;
+      orco_mul = cu.size;
+      break;
+    }
+    case ID_MB: {
+      MetaBall &mb = *static_cast<MetaBall *>(ref.object->data);
+      orco_add = mb.loc;
+      orco_mul = mb.size;
+      break;
+    }
+    default:
+      orco_add = float3(0.0f);
+      orco_mul = float3(1.0f);
+      break;
+  }
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectInfos &infos)
+{
+  stream << "ObjectInfos(";
+  if (infos.flag == eObjectInfoFlag::OBJECT_NO_INFO) {
+    stream << "skipped)" << std::endl;
+    return stream;
+  }
+  stream << "orco_add=" << infos.orco_add << ", ";
+  stream << "orco_mul=" << infos.orco_mul << ", ";
+  stream << "color=" << infos.color << ", ";
+  stream << "index=" << infos.index << ", ";
+  stream << "random=" << infos.random << ", ";
+  stream << "flag=" << infos.flag << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectBounds
+ * \{ */
+
+inline void ObjectBounds::sync()
+{
+  bounding_sphere.w = -1.0f; /* Disable test. */
+}
+
+inline void ObjectBounds::sync(Object &ob)
+{
+  const BoundBox *bbox = BKE_object_boundbox_get(&ob);
+  if (bbox == nullptr) {
+    bounding_sphere.w = -1.0f; /* Disable test. */
+    return;
+  }
+  *reinterpret_cast<float3 *>(&bounding_corners[0]) = bbox->vec[0];
+  *reinterpret_cast<float3 *>(&bounding_corners[1]) = bbox->vec[4];
+  *reinterpret_cast<float3 *>(&bounding_corners[2]) = bbox->vec[3];
+  *reinterpret_cast<float3 *>(&bounding_corners[3]) = bbox->vec[1];
+  bounding_sphere.w = 0.0f; /* Enable test. */
+}
+
+inline void ObjectBounds::sync(const float3 &center, const float3 &size)
+{
+  *reinterpret_cast<float3 *>(&bounding_corners[0]) = center - size;
+  *reinterpret_cast<float3 *>(&bounding_corners[1]) = center + float3(+size.x, -size.y, -size.z);
+  *reinterpret_cast<float3 *>(&bounding_corners[2]) = center + float3(-size.x, +size.y, -size.z);
+  *reinterpret_cast<float3 *>(&bounding_corners[3]) = center + float3(-size.x, -size.y, +size.z);
+  bounding_sphere.w = 0.0; /* Enable test. */
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectBounds &bounds)
+{
+  stream << "ObjectBounds(";
+  if (bounds.bounding_sphere.w == -1.0f) {
+    stream << "skipped)" << std::endl;
+    return stream;
+  }
+  stream << std::endl;
+  stream << ".bounding_corners[0]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[0]) << std::endl;
+  stream << ".bounding_corners[1]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[1]) << std::endl;
+  stream << ".bounding_corners[2]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[2]) << std::endl;
+  stream << ".bounding_corners[3]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[3]) << std::endl;
+  stream << ".sphere=(pos=" << float3(bounds.bounding_sphere)
+         << ", rad=" << bounds.bounding_sphere.w << std::endl;
+  stream << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_shader.cc b/source/blender/draw/intern/draw_shader.cc
index 001ceb0ae8d..960348b4a94 100644
--- a/source/blender/draw/intern/draw_shader.cc
+++ b/source/blender/draw/intern/draw_shader.cc
@@ -17,13 +17,15 @@
 #include "draw_shader.h"
 
 extern "C" char datatoc_common_hair_lib_glsl[];
-
 extern "C" char datatoc_common_hair_refine_vert_glsl[];
-extern "C" char datatoc_common_hair_refine_comp_glsl[];
-extern "C" char datatoc_gpu_shader_3D_smooth_color_frag_glsl[];
 
 static struct {
   struct GPUShader *hair_refine_sh[PART_REFINE_MAX_SHADER];
+  struct GPUShader *debug_print_display_sh;
+  struct GPUShader *debug_draw_display_sh;
+  struct GPUShader *draw_visibility_compute_sh;
+  struct GPUShader *draw_resource_finalize_sh;
+  struct GPUShader *draw_command_generate_sh;
 } e_data = {{nullptr}};
 
 /* -------------------------------------------------------------------- */
@@ -109,6 +111,47 @@ GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type, eParticleRefineSh
   return e_data.hair_refine_sh[type];
 }
 
+GPUShader *DRW_shader_debug_print_display_get()
+{
+  if (e_data.debug_print_display_sh == nullptr) {
+    e_data.debug_print_display_sh = GPU_shader_create_from_info_name("draw_debug_print_display");
+  }
+  return e_data.debug_print_display_sh;
+}
+
+GPUShader *DRW_shader_debug_draw_display_get()
+{
+  if (e_data.debug_draw_display_sh == nullptr) {
+    e_data.debug_draw_display_sh = GPU_shader_create_from_info_name("draw_debug_draw_display");
+  }
+  return e_data.debug_draw_display_sh;
+}
+
+GPUShader *DRW_shader_draw_visibility_compute_get()
+{
+  if (e_data.draw_visibility_compute_sh == nullptr) {
+    e_data.draw_visibility_compute_sh = GPU_shader_create_from_info_name(
+        "draw_visibility_compute");
+  }
+  return e_data.draw_visibility_compute_sh;
+}
+
+GPUShader *DRW_shader_draw_resource_finalize_get()
+{
+  if (e_data.draw_resource_finalize_sh == nullptr) {
+    e_data.draw_resource_finalize_sh = GPU_shader_create_from_info_name("draw_resource_finalize");
+  }
+  return e_data.draw_resource_finalize_sh;
+}
+
+GPUShader *DRW_shader_draw_command_generate_get()
+{
+  if (e_data.draw_command_generate_sh == nullptr) {
+    e_data.draw_command_generate_sh = GPU_shader_create_from_info_name("draw_command_generate");
+  }
+  return e_data.draw_command_generate_sh;
+}
+
 /** \} */
 
 void DRW_shaders_free()
@@ -116,4 +159,9 @@ void DRW_shaders_free()
   for (int i = 0; i < PART_REFINE_MAX_SHADER; i++) {
     DRW_SHADER_FREE_SAFE(e_data.hair_refine_sh[i]);
   }
+  DRW_SHADER_FREE_SAFE(e_data.debug_print_display_sh);
+  DRW_SHADER_FREE_SAFE(e_data.debug_draw_display_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_visibility_compute_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_resource_finalize_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_command_generate_sh);
 }
diff --git a/source/blender/draw/intern/draw_shader.h b/source/blender/draw/intern/draw_shader.h
index 63d755cc334..3b8c0425fa9 100644
--- a/source/blender/draw/intern/draw_shader.h
+++ b/source/blender/draw/intern/draw_shader.h
@@ -30,6 +30,12 @@ struct GPUShader *DRW_shader_hair_refine_get(ParticleRefineShader refinement,
 struct GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type,
                                                eParticleRefineShaderType sh_type);
 
+struct GPUShader *DRW_shader_debug_print_display_get(void);
+struct GPUShader *DRW_shader_debug_draw_display_get(void);
+struct GPUShader *DRW_shader_draw_visibility_compute_get(void);
+struct GPUShader *DRW_shader_draw_resource_finalize_get(void);
+struct GPUShader *DRW_shader_draw_command_generate_get(void);
+
 void DRW_shaders_free(void);
 
 #ifdef __cplusplus
diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h
index 94c0c53dab7..bedbedcf438 100644
--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@@ -1,14 +1,42 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
 #ifndef GPU_SHADER
+#  pragma once
+
 #  include "GPU_shader.h"
 #  include "GPU_shader_shared_utils.h"
+#  include "draw_defines.h"
 
 typedef struct ViewInfos ViewInfos;
 typedef struct ObjectMatrices ObjectMatrices;
 typedef struct ObjectInfos ObjectInfos;
+typedef struct ObjectBounds ObjectBounds;
 typedef struct VolumeInfos VolumeInfos;
 typedef struct CurvesInfos CurvesInfos;
+typedef struct ObjectAttribute ObjectAttribute;
+typedef struct DrawCommand DrawCommand;
+typedef struct DispatchCommand DispatchCommand;
+typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
+typedef struct DRWDebugVert DRWDebugVert;
+typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
+
+#  ifdef __cplusplus
+/* C++ only forward declarations. */
+struct Object;
+struct ID;
+struct GPUUniformAttr;
+
+namespace blender::draw {
+
+struct ObjectRef;
+
+}  // namespace blender::draw
+
+#  else /* __cplusplus */
+/* C only forward declarations. */
+typedef enum eObjectInfoFlag eObjectInfoFlag;
+
+#  endif
 #endif
 
 #define DRW_SHADER_SHARED_H
@@ -40,9 +68,18 @@ struct ViewInfos {
   float2 viewport_size_inverse;
 
   /** Frustum culling data. */
-  /** NOTE: vec3 arrays are padded to vec4. */
+  /** \note vec3 array padded to vec4. */
   float4 frustum_corners[8];
   float4 frustum_planes[6];
+  float4 frustum_bound_sphere;
+
+  /** For debugging purpose */
+  /* Mouse pixel. */
+  int2 mouse_pixel;
+
+  /** True if facing needs to be inverted. */
+  bool1 is_inverted;
+  int _pad0;
 };
 BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 
@@ -60,23 +97,89 @@ BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 #  define CameraTexCoFactors drw_view.viewcamtexcofac
 #endif
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ * \{ */
+
 struct ObjectMatrices {
-  float4x4 drw_modelMatrix;
-  float4x4 drw_modelMatrixInverse;
+  float4x4 model;
+  float4x4 model_inverse;
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync(const Object &object);
+  void sync(const float4x4 &model_matrix);
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(ObjectMatrices, 16)
+
+enum eObjectInfoFlag {
+  OBJECT_SELECTED = (1u << 0u),
+  OBJECT_FROM_DUPLI = (1u << 1u),
+  OBJECT_FROM_SET = (1u << 2u),
+  OBJECT_ACTIVE = (1u << 3u),
+  OBJECT_NEGATIVE_SCALE = (1u << 4u),
+  /* Avoid skipped info to change culling. */
+  OBJECT_NO_INFO = ~OBJECT_NEGATIVE_SCALE
 };
-BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 
 struct ObjectInfos {
-  float4 drw_OrcoTexCoFactors[2];
-  float4 drw_ObjectColor;
-  float4 drw_Infos;
+#if defined(GPU_SHADER) && !defined(DRAW_FINALIZE_SHADER)
+  /* TODO Rename to struct member for glsl too. */
+  float4 orco_mul_bias[2];
+  float4 color;
+  float4 infos;
+#else
+  /** Uploaded as center + size. Converted to mul+bias to local coord. */
+  float3 orco_add;
+  uint object_attrs_offset;
+  float3 orco_mul;
+  uint object_attrs_len;
+
+  float4 color;
+  uint index;
+  uint _pad2;
+  float random;
+  eObjectInfoFlag flag;
+#endif
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync();
+  void sync(const blender::draw::ObjectRef ref, bool is_active_object);
+#endif
 };
-BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
+BLI_STATIC_ASSERT_ALIGN(ObjectInfos, 16)
+
+struct ObjectBounds {
+  /**
+   * Uploaded as vertex (0, 4, 3, 1) of the bbox in local space, matching XYZ axis order.
+   * Then processed by GPU and stored as (0, 4-0, 3-0, 1-0) in world space for faster culling.
+   */
+  float4 bounding_corners[4];
+  /** Bounding sphere derived from the bounding corner. Computed on GPU. */
+  float4 bounding_sphere;
+  /** Radius of the inscribed sphere derived from the bounding corner. Computed on GPU. */
+#define _inner_sphere_radius bounding_corners[3].w
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync();
+  void sync(Object &ob);
+  void sync(const float3 &center, const float3 &size);
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(ObjectBounds, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Object attributes
+ * \{ */
 
 struct VolumeInfos {
-  /* Object to grid-space. */
+  /** Object to grid-space. */
   float4x4 grids_xform[DRW_GRID_PER_VOLUME_MAX];
-  /* NOTE: vec4 for alignment. Only float3 needed. */
+  /** \note vec4 for alignment. Only float3 needed. */
   float4 color_mul;
   float density_scale;
   float temperature_mul;
@@ -86,13 +189,127 @@ struct VolumeInfos {
 BLI_STATIC_ASSERT_ALIGN(VolumeInfos, 16)
 
 struct CurvesInfos {
-  /* Per attribute scope, follows loading order.
-   * NOTE: uint as bool in GLSL is 4 bytes. */
-  uint is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
-  int _pad;
+  /** Per attribute scope, follows loading order.
+   * \note uint as bool in GLSL is 4 bytes.
+   * \note GLSL pad arrays of scalar to 16 bytes (std140). */
+  uint4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
 };
 BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16)
 
-#define OrcoTexCoFactors (drw_infos[resource_id].drw_OrcoTexCoFactors)
-#define ObjectInfo (drw_infos[resource_id].drw_Infos)
-#define ObjectColor (drw_infos[resource_id].drw_ObjectColor)
+#pragma pack(push, 4)
+struct ObjectAttribute {
+  /* Workaround the padding cost from alignment requirements.
+   * (see GL spec : 7.6.2.2 Standard Uniform Block Layout) */
+  float data_x, data_y, data_z, data_w;
+  uint hash_code;
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  bool sync(const blender::draw::ObjectRef &ref, const GPUUniformAttr &attr);
+  bool id_property_lookup(ID *id, const char *name);
+#endif
+};
+#pragma pack(pop)
+/** \note we only align to 4 bytes and fetch data manually so make sure
+ * C++ compiler gives us the same size. */
+BLI_STATIC_ASSERT_ALIGN(ObjectAttribute, 20)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Indirect commands structures.
+ * \{ */
+
+struct DrawCommand {
+  /* TODO(fclem): Rename */
+  uint vertex_len;
+  uint instance_len;
+  uint vertex_first;
+#if defined(GPU_SHADER)
+  uint base_index;
+  /** \note base_index is i_first for non-indexed draw-calls. */
+#  define _instance_first_array base_index
+#else
+  union {
+    uint base_index;
+    /* Use this instead of instance_first_indexed for non indexed draw calls. */
+    uint instance_first_array;
+  };
+#endif
+
+  uint instance_first_indexed;
+
+  uint _pad0, _pad1, _pad2;
+};
+BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16)
+
+struct DispatchCommand {
+  uint num_groups_x;
+  uint num_groups_y;
+  uint num_groups_z;
+  uint _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(DispatchCommand, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug print
+ * \{ */
+
+/* Take the header (DrawCommand) into account. */
+#define DRW_DEBUG_PRINT_MAX (8 * 1024) - 4
+/** \note Cannot be more than 255 (because of column encoding). */
+#define DRW_DEBUG_PRINT_WORD_WRAP_COLUMN 120u
+
+/* The debug print buffer is laid-out as the following struct.
+ * But we use plain array in shader code instead because of driver issues. */
+struct DRWDebugPrintBuffer {
+  DrawCommand command;
+  /** Each character is encoded as 3 `uchar` with char_index, row and column position. */
+  uint char_array[DRW_DEBUG_PRINT_MAX];
+};
+BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16)
+
+/* Use number of char as vertex count. Equivalent to `DRWDebugPrintBuffer.command.v_count`. */
+#define drw_debug_print_cursor drw_debug_print_buf[0]
+/* Reuse first instance as row index as we don't use instancing. Equivalent to
+ * `DRWDebugPrintBuffer.command.i_first`. */
+#define drw_debug_print_row_shared drw_debug_print_buf[3]
+/** Offset to the first data. Equal to: `sizeof(DrawCommand) / sizeof(uint)`.
+ * This is needed because we bind the whole buffer as a `uint` array. */
+#define drw_debug_print_offset 8
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ * \{ */
+
+struct DRWDebugVert {
+  /* This is a weird layout, but needed to be able to use DRWDebugVert as
+   * a DrawCommand and avoid alignment issues. See drw_debug_verts_buf[] definition. */
+  uint pos0;
+  uint pos1;
+  uint pos2;
+  uint color;
+};
+BLI_STATIC_ASSERT_ALIGN(DRWDebugVert, 16)
+
+/* Take the header (DrawCommand) into account. */
+#define DRW_DEBUG_DRAW_VERT_MAX (64 * 1024) - 1
+
+/* The debug draw buffer is laid-out as the following struct.
+ * But we use plain array in shader code instead because of driver issues. */
+struct DRWDebugDrawBuffer {
+  DrawCommand command;
+  DRWDebugVert verts[DRW_DEBUG_DRAW_VERT_MAX];
+};
+BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16)
+
+/* Equivalent to `DRWDebugDrawBuffer.command.v_count`. */
+#define drw_debug_draw_v_count drw_debug_verts_buf[0].pos0
+/** Offset to the first data. Equal to: `sizeof(DrawCommand) / sizeof(DRWDebugVert)`.
+ * This is needed because we bind the whole buffer as a `DRWDebugVert` array. */
+#define drw_debug_draw_offset 2
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_state.h b/source/blender/draw/intern/draw_state.h
new file mode 100644
index 00000000000..bf1e63e0852
--- /dev/null
+++ b/source/blender/draw/intern/draw_state.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file
+ * \ingroup draw
+ *
+ * Internal Pipeline State tracking. It is higher level than GPU state as everything fits a single
+ * enum.
+ */
+
+/**
+ * DRWState is a bit-mask that stores the current render state and the desired render state. Based
+ * on the differences the minimum state changes can be invoked to setup the desired render state.
+ *
+ * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
+ * therefore they aren't ordered as a bit mask.
+ */
+typedef enum {
+  /** To be used for compute passes. */
+  DRW_STATE_NO_DRAW = 0,
+  /** Write mask */
+  DRW_STATE_WRITE_DEPTH = (1 << 0),
+  DRW_STATE_WRITE_COLOR = (1 << 1),
+  /* Write Stencil. These options are mutual exclusive and packed into 2 bits */
+  DRW_STATE_WRITE_STENCIL = (1 << 2),
+  DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
+  DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
+  /** Depth test. These options are mutual exclusive and packed into 3 bits */
+  DRW_STATE_DEPTH_ALWAYS = (1 << 4),
+  DRW_STATE_DEPTH_LESS = (2 << 4),
+  DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
+  DRW_STATE_DEPTH_EQUAL = (4 << 4),
+  DRW_STATE_DEPTH_GREATER = (5 << 4),
+  DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
+  /** Culling test */
+  DRW_STATE_CULL_BACK = (1 << 7),
+  DRW_STATE_CULL_FRONT = (1 << 8),
+  /** Stencil test. These options are mutually exclusive and packed into 2 bits. */
+  DRW_STATE_STENCIL_ALWAYS = (1 << 9),
+  DRW_STATE_STENCIL_EQUAL = (2 << 9),
+  DRW_STATE_STENCIL_NEQUAL = (3 << 9),
+
+  /** Blend state. These options are mutual exclusive and packed into 4 bits */
+  DRW_STATE_BLEND_ADD = (1 << 11),
+  /** Same as additive but let alpha accumulate without pre-multiply. */
+  DRW_STATE_BLEND_ADD_FULL = (2 << 11),
+  /** Standard alpha blending. */
+  DRW_STATE_BLEND_ALPHA = (3 << 11),
+  /** Use that if color is already pre-multiply by alpha. */
+  DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
+  DRW_STATE_BLEND_BACKGROUND = (5 << 11),
+  DRW_STATE_BLEND_OIT = (6 << 11),
+  DRW_STATE_BLEND_MUL = (7 << 11),
+  DRW_STATE_BLEND_SUB = (8 << 11),
+  /** Use dual source blending. WARNING: Only one color buffer allowed. */
+  DRW_STATE_BLEND_CUSTOM = (9 << 11),
+  DRW_STATE_LOGIC_INVERT = (10 << 11),
+  DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
+
+  DRW_STATE_IN_FRONT_SELECT = (1 << 27),
+  DRW_STATE_SHADOW_OFFSET = (1 << 28),
+  DRW_STATE_CLIP_PLANES = (1 << 29),
+  DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
+  /** DO NOT USE. Assumed always enabled. Only used internally. */
+  DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
+} DRWState;
+
+ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
+
+#define DRW_STATE_DEFAULT \
+  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
+#define DRW_STATE_BLEND_ENABLED \
+  (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
+   DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
+   DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
+#define DRW_STATE_RASTERIZER_ENABLED \
+  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
+   DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
+#define DRW_STATE_DEPTH_TEST_ENABLED \
+  (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
+   DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
+#define DRW_STATE_STENCIL_TEST_ENABLED \
+  (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
+#define DRW_STATE_WRITE_STENCIL_ENABLED \
+  (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
+   DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+
+namespace blender::draw {
+
+/* -------------------------------------------------------------------- */
+/** \name DRWState to GPU state conversion
+ * \{ */
+
+static inline eGPUWriteMask to_write_mask(DRWState state)
+{
+  eGPUWriteMask write_mask = GPU_WRITE_NONE;
+  if (state & DRW_STATE_WRITE_DEPTH) {
+    write_mask |= GPU_WRITE_DEPTH;
+  }
+  if (state & DRW_STATE_WRITE_COLOR) {
+    write_mask |= GPU_WRITE_COLOR;
+  }
+  if (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
+    write_mask |= GPU_WRITE_STENCIL;
+  }
+  return write_mask;
+}
+
+static inline eGPUFaceCullTest to_face_cull_test(DRWState state)
+{
+  switch (state & (DRW_STATE_CULL_BACK | DRW_STATE_CULL_FRONT)) {
+    case DRW_STATE_CULL_BACK:
+      return GPU_CULL_BACK;
+    case DRW_STATE_CULL_FRONT:
+      return GPU_CULL_FRONT;
+    default:
+      return GPU_CULL_NONE;
+  }
+}
+
+static inline eGPUDepthTest to_depth_test(DRWState state)
+{
+  switch (state & DRW_STATE_DEPTH_TEST_ENABLED) {
+    case DRW_STATE_DEPTH_LESS:
+      return GPU_DEPTH_LESS;
+    case DRW_STATE_DEPTH_LESS_EQUAL:
+      return GPU_DEPTH_LESS_EQUAL;
+    case DRW_STATE_DEPTH_EQUAL:
+      return GPU_DEPTH_EQUAL;
+    case DRW_STATE_DEPTH_GREATER:
+      return GPU_DEPTH_GREATER;
+    case DRW_STATE_DEPTH_GREATER_EQUAL:
+      return GPU_DEPTH_GREATER_EQUAL;
+    case DRW_STATE_DEPTH_ALWAYS:
+      return GPU_DEPTH_ALWAYS;
+    default:
+      return GPU_DEPTH_NONE;
+  }
+}
+
+static inline eGPUStencilOp to_stencil_op(DRWState state)
+{
+  switch (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
+    case DRW_STATE_WRITE_STENCIL:
+      return GPU_STENCIL_OP_REPLACE;
+    case DRW_STATE_WRITE_STENCIL_SHADOW_PASS:
+      return GPU_STENCIL_OP_COUNT_DEPTH_PASS;
+    case DRW_STATE_WRITE_STENCIL_SHADOW_FAIL:
+      return GPU_STENCIL_OP_COUNT_DEPTH_FAIL;
+    default:
+      return GPU_STENCIL_OP_NONE;
+  }
+}
+
+static inline eGPUStencilTest to_stencil_test(DRWState state)
+{
+  switch (state & DRW_STATE_STENCIL_TEST_ENABLED) {
+    case DRW_STATE_STENCIL_ALWAYS:
+      return GPU_STENCIL_ALWAYS;
+    case DRW_STATE_STENCIL_EQUAL:
+      return GPU_STENCIL_EQUAL;
+    case DRW_STATE_STENCIL_NEQUAL:
+      return GPU_STENCIL_NEQUAL;
+    default:
+      return GPU_STENCIL_NONE;
+  }
+}
+
+static inline eGPUBlend to_blend(DRWState state)
+{
+  switch (state & DRW_STATE_BLEND_ENABLED) {
+    case DRW_STATE_BLEND_ADD:
+      return GPU_BLEND_ADDITIVE;
+    case DRW_STATE_BLEND_ADD_FULL:
+      return GPU_BLEND_ADDITIVE_PREMULT;
+    case DRW_STATE_BLEND_ALPHA:
+      return GPU_BLEND_ALPHA;
+    case DRW_STATE_BLEND_ALPHA_PREMUL:
+      return GPU_BLEND_ALPHA_PREMULT;
+    case DRW_STATE_BLEND_BACKGROUND:
+      return GPU_BLEND_BACKGROUND;
+    case DRW_STATE_BLEND_OIT:
+      return GPU_BLEND_OIT;
+    case DRW_STATE_BLEND_MUL:
+      return GPU_BLEND_MULTIPLY;
+    case DRW_STATE_BLEND_SUB:
+      return GPU_BLEND_SUBTRACT;
+    case DRW_STATE_BLEND_CUSTOM:
+      return GPU_BLEND_CUSTOM;
+    case DRW_STATE_LOGIC_INVERT:
+      return GPU_BLEND_INVERT;
+    case DRW_STATE_BLEND_ALPHA_UNDER_PREMUL:
+      return GPU_BLEND_ALPHA_UNDER_PREMUL;
+    default:
+      return GPU_BLEND_NONE;
+  }
+}
+
+static inline eGPUProvokingVertex to_provoking_vertex(DRWState state)
+{
+  switch (state & DRW_STATE_FIRST_VERTEX_CONVENTION) {
+    case DRW_STATE_FIRST_VERTEX_CONVENTION:
+      return GPU_VERTEX_FIRST;
+    default:
+      return GPU_VERTEX_LAST;
+  }
+}
+
+/** \} */
+
+};  // namespace blender::draw
+
+#endif
diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h
index 2d9f4713feb..37b025e761d 100644
--- a/source/blender/draw/intern/draw_subdivision.h
+++ b/source/blender/draw/intern/draw_subdivision.h
@@ -177,6 +177,10 @@ typedef struct DRWSubdivCache {
 
   /* UBO to store settings for the various compute shaders. */
   struct GPUUniformBuf *ubo;
+
+  /* Extra flags, passed to the UBO. */
+  bool is_edit_mode;
+  bool use_hide;
 } DRWSubdivCache;
 
 /* Only frees the data of the cache, caller is responsible to free the cache itself if necessary.
@@ -195,6 +199,7 @@ void DRW_create_subdivision(struct Object *ob,
                             const float obmat[4][4],
                             const bool do_final,
                             const bool do_uvedit,
+                            const bool do_cage,
                             const ToolSettings *ts,
                             const bool use_hide);
 
diff --git a/source/blender/draw/intern/draw_texture_pool.cc b/source/blender/draw/intern/draw_texture_pool.cc
index b36cb5c809e..017ecec7be2 100644
--- a/source/blender/draw/intern/draw_texture_pool.cc
+++ b/source/blender/draw/intern/draw_texture_pool.cc
@@ -160,6 +160,19 @@ void DRW_texture_pool_texture_release(DRWTexturePool *pool, GPUTexture *tmp_tex)
   pool->tmp_tex_released.append(tmp_tex);
 }
 
+void DRW_texture_pool_take_texture_ownership(DRWTexturePool *pool, GPUTexture *tex)
+{
+  pool->tmp_tex_acquired.remove_first_occurrence_and_reorder(tex);
+}
+
+void DRW_texture_pool_give_texture_ownership(DRWTexturePool *pool, GPUTexture *tex)
+{
+  BLI_assert(pool->tmp_tex_acquired.first_index_of_try(tex) == -1 &&
+             pool->tmp_tex_released.first_index_of_try(tex) == -1 &&
+             pool->tmp_tex_pruned.first_index_of_try(tex) == -1);
+  pool->tmp_tex_acquired.append(tex);
+}
+
 void DRW_texture_pool_reset(DRWTexturePool *pool)
 {
   pool->last_user_id = -1;
diff --git a/source/blender/draw/intern/draw_texture_pool.h b/source/blender/draw/intern/draw_texture_pool.h
index 1c30ea88552..9fbbf630833 100644
--- a/source/blender/draw/intern/draw_texture_pool.h
+++ b/source/blender/draw/intern/draw_texture_pool.h
@@ -26,6 +26,7 @@ void DRW_texture_pool_free(DRWTexturePool *pool);
 /**
  * Try to find a texture corresponding to params into the texture pool.
  * If no texture was found, create one and add it to the pool.
+ * DEPRECATED: Use DRW_texture_pool_texture_acquire instead and do it just before rendering.
  */
 GPUTexture *DRW_texture_pool_query(
     DRWTexturePool *pool, int width, int height, eGPUTextureFormat format, void *user);
@@ -40,6 +41,22 @@ GPUTexture *DRW_texture_pool_texture_acquire(DRWTexturePool *pool,
  * Releases a previously acquired texture.
  */
 void DRW_texture_pool_texture_release(DRWTexturePool *pool, GPUTexture *tmp_tex);
+
+/**
+ * This effectively remove a texture from the texture pool, giving full ownership to the caller.
+ * The given texture needs to be been acquired through DRW_texture_pool_texture_acquire().
+ * IMPORTANT: This removes the need for a DRW_texture_pool_texture_release() call on this texture.
+ */
+void DRW_texture_pool_take_texture_ownership(DRWTexturePool *pool, GPUTexture *tex);
+/**
+ * This Inserts a texture into the texture pool, giving full ownership to the texture pool.
+ * The texture needs not to be in the pool already.
+ * The texture may be reused in a latter call to DRW_texture_pool_texture_acquire();
+ * IMPORTANT: DRW_texture_pool_texture_release() still needs to be called on this texture
+ * after usage.
+ */
+void DRW_texture_pool_give_texture_ownership(DRWTexturePool *pool, GPUTexture *tex);
+
 /**
  * Resets the user bits for each texture in the pool and delete unused ones.
  */
diff --git a/source/blender/draw/intern/draw_view.c b/source/blender/draw/intern/draw_view.c
index 817f97cbea4..35ff8891a0f 100644
--- a/source/blender/draw/intern/draw_view.c
+++ b/source/blender/draw/intern/draw_view.c
@@ -175,7 +175,7 @@ void DRW_draw_cursor(void)
       GPU_matrix_scale_2f(U.widget_unit, U.widget_unit);
 
       GPUBatch *cursor_batch = DRW_cache_cursor_get(is_aligned);
-      GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_2D_FLAT_COLOR);
+      GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_3D_FLAT_COLOR);
       GPU_batch_set_shader(cursor_batch, shader);
 
       GPU_batch_draw(cursor_batch);
@@ -241,7 +241,7 @@ void DRW_draw_cursor_2d_ex(const ARegion *region, const float cursor[2])
 
   GPUBatch *cursor_batch = DRW_cache_cursor_get(true);
 
-  GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_2D_FLAT_COLOR);
+  GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_3D_FLAT_COLOR);
   GPU_batch_set_shader(cursor_batch, shader);
 
   GPU_batch_draw(cursor_batch);
diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc
new file mode 100644
index 00000000000..cb0e1370c28
--- /dev/null
+++ b/source/blender/draw/intern/draw_view.cc
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "BLI_math_geom.h"
+#include "GPU_compute.h"
+#include "GPU_debug.h"
+
+#include "draw_debug.hh"
+#include "draw_shader.h"
+#include "draw_view.hh"
+
+namespace blender::draw {
+
+void View::sync(const float4x4 &view_mat, const float4x4 &win_mat)
+{
+  data_.viewmat = view_mat;
+  data_.viewinv = view_mat.inverted();
+  data_.winmat = win_mat;
+  data_.wininv = win_mat.inverted();
+  data_.persmat = data_.winmat * data_.viewmat;
+  data_.persinv = data_.persmat.inverted();
+  /* Should not be used anymore. */
+  data_.viewcamtexcofac = float4(1.0f, 1.0f, 0.0f, 0.0f);
+
+  data_.is_inverted = (is_negative_m4(view_mat.ptr()) == is_negative_m4(win_mat.ptr()));
+
+  update_view_vectors();
+
+  BoundBox &bound_box = *reinterpret_cast<BoundBox *>(&data_.frustum_corners);
+  BoundSphere &bound_sphere = *reinterpret_cast<BoundSphere *>(&data_.frustum_bound_sphere);
+  frustum_boundbox_calc(bound_box);
+  frustum_culling_planes_calc();
+  frustum_culling_sphere_calc(bound_box, bound_sphere);
+
+  dirty_ = true;
+}
+
+void View::frustum_boundbox_calc(BoundBox &bbox)
+{
+  /* Extract the 8 corners from a Projection Matrix. */
+#if 0 /* Equivalent to this but it has accuracy problems. */
+  BKE_boundbox_init_from_minmax(&bbox, float3(-1.0f),float3(1.0f));
+  for (int i = 0; i < 8; i++) {
+    mul_project_m4_v3(data_.wininv.ptr(), bbox.vec[i]);
+  }
+#endif
+
+  float left, right, bottom, top, near, far;
+  bool is_persp = data_.winmat[3][3] == 0.0f;
+
+  projmat_dimensions(data_.winmat.ptr(), &left, &right, &bottom, &top, &near, &far);
+
+  bbox.vec[0][2] = bbox.vec[3][2] = bbox.vec[7][2] = bbox.vec[4][2] = -near;
+  bbox.vec[0][0] = bbox.vec[3][0] = left;
+  bbox.vec[4][0] = bbox.vec[7][0] = right;
+  bbox.vec[0][1] = bbox.vec[4][1] = bottom;
+  bbox.vec[7][1] = bbox.vec[3][1] = top;
+
+  /* Get the coordinates of the far plane. */
+  if (is_persp) {
+    float sca_far = far / near;
+    left *= sca_far;
+    right *= sca_far;
+    bottom *= sca_far;
+    top *= sca_far;
+  }
+
+  bbox.vec[1][2] = bbox.vec[2][2] = bbox.vec[6][2] = bbox.vec[5][2] = -far;
+  bbox.vec[1][0] = bbox.vec[2][0] = left;
+  bbox.vec[6][0] = bbox.vec[5][0] = right;
+  bbox.vec[1][1] = bbox.vec[5][1] = bottom;
+  bbox.vec[2][1] = bbox.vec[6][1] = top;
+
+  /* Transform into world space. */
+  for (int i = 0; i < 8; i++) {
+    mul_m4_v3(data_.viewinv.ptr(), bbox.vec[i]);
+  }
+}
+
+void View::frustum_culling_planes_calc()
+{
+  planes_from_projmat(data_.persmat.ptr(),
+                      data_.frustum_planes[0],
+                      data_.frustum_planes[5],
+                      data_.frustum_planes[1],
+                      data_.frustum_planes[3],
+                      data_.frustum_planes[4],
+                      data_.frustum_planes[2]);
+
+  /* Normalize. */
+  for (int p = 0; p < 6; p++) {
+    data_.frustum_planes[p].w /= normalize_v3(data_.frustum_planes[p]);
+  }
+}
+
+void View::frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere)
+{
+  /* Extract Bounding Sphere */
+  if (data_.winmat[3][3] != 0.0f) {
+    /* Orthographic */
+    /* The most extreme points on the near and far plane. (normalized device coords). */
+    const float *nearpoint = bbox.vec[0];
+    const float *farpoint = bbox.vec[6];
+
+    /* just use median point */
+    mid_v3_v3v3(bsphere.center, farpoint, nearpoint);
+    bsphere.radius = len_v3v3(bsphere.center, farpoint);
+  }
+  else if (data_.winmat[2][0] == 0.0f && data_.winmat[2][1] == 0.0f) {
+    /* Perspective with symmetrical frustum. */
+
+    /* We obtain the center and radius of the circumscribed circle of the
+     * isosceles trapezoid composed by the diagonals of the near and far clipping plane */
+
+    /* center of each clipping plane */
+    float mid_min[3], mid_max[3];
+    mid_v3_v3v3(mid_min, bbox.vec[3], bbox.vec[4]);
+    mid_v3_v3v3(mid_max, bbox.vec[2], bbox.vec[5]);
+
+    /* square length of the diagonals of each clipping plane */
+    float a_sq = len_squared_v3v3(bbox.vec[3], bbox.vec[4]);
+    float b_sq = len_squared_v3v3(bbox.vec[2], bbox.vec[5]);
+
+    /* distance squared between clipping planes */
+    float h_sq = len_squared_v3v3(mid_min, mid_max);
+
+    float fac = (4 * h_sq + b_sq - a_sq) / (8 * h_sq);
+
+    /* The goal is to get the smallest sphere,
+     * not the sphere that passes through each corner */
+    CLAMP(fac, 0.0f, 1.0f);
+
+    interp_v3_v3v3(bsphere.center, mid_min, mid_max, fac);
+
+    /* distance from the center to one of the points of the far plane (1, 2, 5, 6) */
+    bsphere.radius = len_v3v3(bsphere.center, bbox.vec[1]);
+  }
+  else {
+    /* Perspective with asymmetrical frustum. */
+
+    /* We put the sphere center on the line that goes from origin
+     * to the center of the far clipping plane. */
+
+    /* Detect which of the corner of the far clipping plane is the farthest to the origin */
+    float nfar[4];               /* most extreme far point in NDC space */
+    float farxy[2];              /* far-point projection onto the near plane */
+    float farpoint[3] = {0.0f};  /* most extreme far point in camera coordinate */
+    float nearpoint[3];          /* most extreme near point in camera coordinate */
+    float farcenter[3] = {0.0f}; /* center of far clipping plane in camera coordinate */
+    float F = -1.0f, N;          /* square distance of far and near point to origin */
+    float f, n; /* distance of far and near point to z axis. f is always > 0 but n can be < 0 */
+    float e, s; /* far and near clipping distance (<0) */
+    float c;    /* slope of center line = distance of far clipping center
+                 * to z axis / far clipping distance. */
+    float z;    /* projection of sphere center on z axis (<0) */
+
+    /* Find farthest corner and center of far clip plane. */
+    float corner[3] = {1.0f, 1.0f, 1.0f}; /* in clip space */
+    for (int i = 0; i < 4; i++) {
+      float point[3];
+      mul_v3_project_m4_v3(point, data_.wininv.ptr(), corner);
+      float len = len_squared_v3(point);
+      if (len > F) {
+        copy_v3_v3(nfar, corner);
+        copy_v3_v3(farpoint, point);
+        F = len;
+      }
+      add_v3_v3(farcenter, point);
+      /* rotate by 90 degree to walk through the 4 points of the far clip plane */
+      float tmp = corner[0];
+      corner[0] = -corner[1];
+      corner[1] = tmp;
+    }
+
+    /* the far center is the average of the far clipping points */
+    mul_v3_fl(farcenter, 0.25f);
+    /* the extreme near point is the opposite point on the near clipping plane */
+    copy_v3_fl3(nfar, -nfar[0], -nfar[1], -1.0f);
+    mul_v3_project_m4_v3(nearpoint, data_.wininv.ptr(), nfar);
+    /* this is a frustum projection */
+    N = len_squared_v3(nearpoint);
+    e = farpoint[2];
+    s = nearpoint[2];
+    /* distance to view Z axis */
+    f = len_v2(farpoint);
+    /* get corresponding point on the near plane */
+    mul_v2_v2fl(farxy, farpoint, s / e);
+    /* this formula preserve the sign of n */
+    sub_v2_v2(nearpoint, farxy);
+    n = f * s / e - len_v2(nearpoint);
+    c = len_v2(farcenter) / e;
+    /* the big formula, it simplifies to (F-N)/(2(e-s)) for the symmetric case */
+    z = (F - N) / (2.0f * (e - s + c * (f - n)));
+
+    bsphere.center[0] = farcenter[0] * z / e;
+    bsphere.center[1] = farcenter[1] * z / e;
+    bsphere.center[2] = z;
+
+    /* For XR, the view matrix may contain a scale factor. Then, transforming only the center
+     * into world space after calculating the radius will result in incorrect behavior. */
+    mul_m4_v3(data_.viewinv.ptr(), bsphere.center); /* Transform to world space. */
+    mul_m4_v3(data_.viewinv.ptr(), farpoint);
+    bsphere.radius = len_v3v3(bsphere.center, farpoint);
+  }
+}
+
+void View::set_clip_planes(Span<float4> planes)
+{
+  BLI_assert(planes.size() <= ARRAY_SIZE(data_.clip_planes));
+  int i = 0;
+  for (const auto &plane : planes) {
+    data_.clip_planes[i++] = plane;
+  }
+}
+
+void View::update_viewport_size()
+{
+  float4 viewport;
+  GPU_viewport_size_get_f(viewport);
+  float2 viewport_size = float2(viewport.z, viewport.w);
+  if (assign_if_different(data_.viewport_size, viewport_size)) {
+    dirty_ = true;
+  }
+}
+
+void View::update_view_vectors()
+{
+  bool is_persp = data_.winmat[3][3] == 0.0f;
+
+  /* Near clip distance. */
+  data_.viewvecs[0][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f) :
+                                      -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
+
+  /* Far clip distance. */
+  data_.viewvecs[1][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f) :
+                                      -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
+
+  /* View vectors for the corners of the view frustum.
+   * Can be used to recreate the world space position easily */
+  float3 view_vecs[4] = {
+      {-1.0f, -1.0f, -1.0f},
+      {1.0f, -1.0f, -1.0f},
+      {-1.0f, 1.0f, -1.0f},
+      {-1.0f, -1.0f, 1.0f},
+  };
+
+  /* Convert the view vectors to view space */
+  for (int i = 0; i < 4; i++) {
+    mul_project_m4_v3(data_.wininv.ptr(), view_vecs[i]);
+    /* Normalized trick see:
+     * http://www.derschmale.com/2014/01/26/reconstructing-positions-from-the-depth-buffer */
+    if (is_persp) {
+      view_vecs[i].x /= view_vecs[i].z;
+      view_vecs[i].y /= view_vecs[i].z;
+    }
+  }
+
+  /**
+   * - If orthographic:
+   *   `view_vecs[0]` is the near-bottom-left corner of the frustum and
+   *   `view_vecs[1]` is the vector going from the near-bottom-left corner to
+   *   the far-top-right corner.
+   * - If perspective:
+   *   `view_vecs[0].xy` and `view_vecs[1].xy` are respectively the bottom-left corner
+   *   when `Z = 1`, and top-left corner if `Z = 1`.
+   *   `view_vecs[0].z` the near clip distance and `view_vecs[1].z` is the (signed)
+   *   distance from the near plane to the far clip plane.
+   */
+  copy_v3_v3(data_.viewvecs[0], view_vecs[0]);
+
+  /* we need to store the differences */
+  data_.viewvecs[1][0] = view_vecs[1][0] - view_vecs[0][0];
+  data_.viewvecs[1][1] = view_vecs[2][1] - view_vecs[0][1];
+  data_.viewvecs[1][2] = view_vecs[3][2] - view_vecs[0][2];
+}
+
+void View::bind()
+{
+  update_viewport_size();
+
+  if (dirty_) {
+    dirty_ = false;
+    data_.push_update();
+  }
+
+  GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
+}
+
+void View::compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze)
+{
+  if (debug_freeze && frozen_ == false) {
+    data_freeze_ = static_cast<ViewInfos>(data_);
+    data_freeze_.push_update();
+  }
+#ifdef DEBUG
+  if (debug_freeze) {
+    drw_debug_matrix_as_bbox(data_freeze_.persinv, float4(0, 1, 0, 1));
+  }
+#endif
+  frozen_ = debug_freeze;
+
+  GPU_debug_group_begin("View.compute_visibility");
+
+  /* TODO(fclem): Early out if visibility hasn't changed. */
+  /* TODO(fclem): Resize to nearest pow2 to reduce fragmentation. */
+  visibility_buf_.resize(divide_ceil_u(resource_len, 128));
+
+  uint32_t data = 0xFFFFFFFFu;
+  GPU_storagebuf_clear(visibility_buf_, GPU_R32UI, GPU_DATA_UINT, &data);
+
+  if (do_visibility_) {
+    GPUShader *shader = DRW_shader_draw_visibility_compute_get();
+    GPU_shader_bind(shader);
+    GPU_shader_uniform_1i(shader, "resource_len", resource_len);
+    GPU_storagebuf_bind(bounds, GPU_shader_get_ssbo(shader, "bounds_buf"));
+    GPU_storagebuf_bind(visibility_buf_, GPU_shader_get_ssbo(shader, "visibility_buf"));
+    GPU_uniformbuf_bind((frozen_) ? data_freeze_ : data_, DRW_VIEW_UBO_SLOT);
+    GPU_compute_dispatch(shader, divide_ceil_u(resource_len, DRW_VISIBILITY_GROUP_SIZE), 1, 1);
+    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+
+  if (frozen_) {
+    /* Bind back the non frozen data. */
+    GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
+  }
+
+  GPU_debug_group_end();
+}
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_view.hh b/source/blender/draw/intern/draw_view.hh
new file mode 100644
index 00000000000..27e7a7a0028
--- /dev/null
+++ b/source/blender/draw/intern/draw_view.hh
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "DRW_gpu_wrapper.hh"
+#include "DRW_render.h"
+
+#include "draw_shader_shared.h"
+
+namespace blender::draw {
+
+class Manager;
+
+/* TODO: de-duplicate. */
+using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
+/** \note Using uint4 for declaration but bound as uint. */
+using VisibilityBuf = StorageArrayBuffer<uint4, 1, true>;
+
+class View {
+  friend Manager;
+
+ private:
+  UniformBuffer<ViewInfos> data_;
+  /** Frozen version of data_ used for debugging culling. */
+  UniformBuffer<ViewInfos> data_freeze_;
+  /** Result of the visibility computation. 1 bit per resource ID. */
+  VisibilityBuf visibility_buf_;
+
+  const char *debug_name_;
+
+  bool do_visibility_ = true;
+  bool dirty_ = true;
+  bool frozen_ = false;
+
+ public:
+  View(const char *name) : visibility_buf_(name), debug_name_(name){};
+  /* For compatibility with old system. Will be removed at some point. */
+  View(const char *name, const DRWView *view) : visibility_buf_(name), debug_name_(name)
+  {
+    float4x4 view_mat, win_mat;
+    DRW_view_viewmat_get(view, view_mat.ptr(), false);
+    DRW_view_winmat_get(view, win_mat.ptr(), false);
+    this->sync(view_mat, win_mat);
+  }
+
+  void set_clip_planes(Span<float4> planes);
+
+  void sync(const float4x4 &view_mat, const float4x4 &win_mat);
+
+  bool is_persp() const
+  {
+    return data_.winmat[3][3] == 0.0f;
+  }
+
+  bool is_inverted() const
+  {
+    return data_.is_inverted;
+  }
+
+  float far_clip() const
+  {
+    if (is_persp()) {
+      return -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f);
+    }
+    return -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
+  }
+
+  float near_clip() const
+  {
+    if (is_persp()) {
+      return -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f);
+    }
+    return -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
+  }
+
+ private:
+  /** Called from draw manager. */
+  void bind();
+  void compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze);
+
+  void update_view_vectors();
+  void update_viewport_size();
+
+  void frustum_boundbox_calc(BoundBox &bbox);
+  void frustum_culling_planes_calc();
+  void frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere);
+};
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_view_data.cc b/source/blender/draw/intern/draw_view_data.cc
index 3dc28dc9a9a..58d826e0218 100644
--- a/source/blender/draw/intern/draw_view_data.cc
+++ b/source/blender/draw/intern/draw_view_data.cc
@@ -7,6 +7,7 @@
 
 #include "BLI_vector.hh"
 
+#include "GPU_capabilities.h"
 #include "GPU_viewport.h"
 
 #include "DRW_render.h"
@@ -16,6 +17,7 @@
 #include "draw_manager_text.h"
 
 #include "draw_manager.h"
+#include "draw_manager.hh"
 #include "draw_view_data.h"
 
 using namespace blender;
@@ -33,6 +35,22 @@ struct DRWViewData {
 
   Vector<ViewportEngineData> engines;
   Vector<ViewportEngineData *> enabled_engines;
+
+  /** New per view/viewport manager. Null if not supported by current hardware. */
+  draw::Manager *manager = nullptr;
+
+  DRWViewData()
+  {
+    /* Only for GL >= 4.3 implementation for now. */
+    if (GPU_shader_storage_buffer_objects_support() && GPU_compute_shader_support()) {
+      manager = new draw::Manager();
+    }
+  };
+
+  ~DRWViewData()
+  {
+    delete manager;
+  };
 };
 
 DRWViewData *DRW_view_data_create(ListBase *engine_types)
@@ -197,6 +215,16 @@ void DRW_view_data_free_unused(DRWViewData *view_data)
   }
 }
 
+void DRW_view_data_engines_view_update(DRWViewData *view_data)
+{
+  for (ViewportEngineData &engine_data : view_data->engines) {
+    DrawEngineType *draw_engine = engine_data.engine_type->draw_engine;
+    if (draw_engine->view_update) {
+      draw_engine->view_update(&engine_data);
+    }
+  }
+}
+
 double *DRW_view_data_cache_time_get(DRWViewData *view_data)
 {
   return &view_data->cache_time;
@@ -227,3 +255,31 @@ ViewportEngineData *DRW_view_data_enabled_engine_iter_step(DRWEngineIterator *it
   ViewportEngineData *engine = iterator->engines[iterator->id++];
   return engine;
 }
+
+draw::Manager *DRW_manager_get()
+{
+  BLI_assert(DST.view_data_active->manager);
+  return reinterpret_cast<draw::Manager *>(DST.view_data_active->manager);
+}
+
+draw::ObjectRef DRW_object_ref_get(Object *object)
+{
+  BLI_assert(DST.view_data_active->manager);
+  return {object, DST.dupli_source, DST.dupli_parent};
+}
+
+void DRW_manager_begin_sync()
+{
+  if (DST.view_data_active->manager == nullptr) {
+    return;
+  }
+  reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->begin_sync();
+}
+
+void DRW_manager_end_sync()
+{
+  if (DST.view_data_active->manager == nullptr) {
+    return;
+  }
+  reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->end_sync();
+}
diff --git a/source/blender/draw/intern/draw_view_data.h b/source/blender/draw/intern/draw_view_data.h
index 918b9e81f87..f2c34c15f08 100644
--- a/source/blender/draw/intern/draw_view_data.h
+++ b/source/blender/draw/intern/draw_view_data.h
@@ -107,6 +107,7 @@ ViewportEngineData *DRW_view_data_engine_data_get_ensure(DRWViewData *view_data,
 void DRW_view_data_use_engine(DRWViewData *view_data, struct DrawEngineType *engine_type);
 void DRW_view_data_reset(DRWViewData *view_data);
 void DRW_view_data_free_unused(DRWViewData *view_data);
+void DRW_view_data_engines_view_update(DRWViewData *view_data);
 double *DRW_view_data_cache_time_get(DRWViewData *view_data);
 DefaultFramebufferList *DRW_view_data_default_framebuffer_list_get(DRWViewData *view_data);
 DefaultTextureList *DRW_view_data_default_texture_list_get(DRWViewData *view_data);
diff --git a/source/blender/draw/intern/draw_volume.cc b/source/blender/draw/intern/draw_volume.cc
index c4e58ab24cb..8f4383a98d8 100644
--- a/source/blender/draw/intern/draw_volume.cc
+++ b/source/blender/draw/intern/draw_volume.cc
@@ -89,6 +89,10 @@ void DRW_volume_free(void)
 
 static GPUTexture *grid_default_texture(eGPUDefaultValue default_value)
 {
+  if (g_data.dummy_one == nullptr) {
+    drw_volume_globals_init();
+  }
+
   switch (default_value) {
     case GPU_DEFAULT_0:
       return g_data.dummy_zero;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.hh b/source/blender/draw/intern/mesh_extractors/extract_mesh.hh
index 8052b277d45..10b94291e35 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh.hh
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.hh
@@ -29,7 +29,6 @@ struct DRWSubdivCache;
 
 enum eMRExtractType {
   MR_EXTRACT_BMESH,
-  MR_EXTRACT_MAPPED,
   MR_EXTRACT_MESH,
 };
 
@@ -81,11 +80,18 @@ struct MeshRenderData {
   BMFace *efa_act_uv;
   /* Data created on-demand (usually not for #BMesh based data). */
   MLoopTri *mlooptri;
+  const int *material_indices;
   const float (*vert_normals)[3];
   const float (*poly_normals)[3];
+  const bool *hide_vert;
+  const bool *hide_edge;
+  const bool *hide_poly;
   float (*loop_normals)[3];
   int *lverts, *ledges;
 
+  const char *active_color_name;
+  const char *default_color_name;
+
   struct {
     int *tri_first_index;
     int *mat_tri_len;
@@ -93,6 +99,82 @@ struct MeshRenderData {
   } poly_sorted;
 };
 
+BLI_INLINE const Mesh *editmesh_final_or_this(const Object *object, const Mesh *me)
+{
+  if (me->edit_mesh != nullptr) {
+    Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(object);
+    if (editmesh_eval_final != nullptr) {
+      return editmesh_eval_final;
+    }
+  }
+
+  return me;
+}
+
+BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->ldata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->ldata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->ldata;
+}
+
+BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->pdata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->pdata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->pdata;
+}
+
+BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->edata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->edata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->edata;
+}
+
+BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->vdata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->vdata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->vdata;
+}
+
 BLI_INLINE BMFace *bm_original_face_get(const MeshRenderData *mr, int idx)
 {
   return ((mr->p_origindex != NULL) && (mr->p_origindex[idx] != ORIGINDEX_NONE) && mr->bm) ?
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
index 9824602b129..2f2e59c8c3b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
@@ -22,7 +22,7 @@ struct MeshExtract_EditUvElem_Data {
 };
 
 static void extract_edituv_tris_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *UNUSED(ibo),
                                      void *tls_data)
 {
@@ -59,17 +59,15 @@ static void extract_edituv_tris_iter_looptri_mesh(const MeshRenderData *mr,
                                                   void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  edituv_tri_add(data,
-                 (mp->flag & ME_HIDE) != 0,
-                 (mp->flag & ME_FACE_SEL) != 0,
-                 mlt->tri[0],
-                 mlt->tri[1],
-                 mlt->tri[2]);
+  const BMFace *efa = bm_original_face_get(mr, mlt->poly);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
+  edituv_tri_add(data, mp_hidden, mp_select, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
 }
 
 static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr),
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *buf,
                                        void *_data)
 {
@@ -117,7 +115,7 @@ static void extract_edituv_tris_iter_subdiv_bm(const DRWSubdivCache *UNUSED(subd
 }
 
 static void extract_edituv_tris_iter_subdiv_mesh(const DRWSubdivCache *UNUSED(subdiv_cache),
-                                                 const MeshRenderData *UNUSED(mr),
+                                                 const MeshRenderData *mr,
                                                  void *_data,
                                                  uint subdiv_quad_index,
                                                  const MPoly *coarse_quad)
@@ -125,24 +123,17 @@ static void extract_edituv_tris_iter_subdiv_mesh(const DRWSubdivCache *UNUSED(su
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   const uint loop_idx = subdiv_quad_index * 4;
 
-  edituv_tri_add(data,
-                 (coarse_quad->flag & ME_HIDE) != 0,
-                 (coarse_quad->flag & ME_FACE_SEL) != 0,
-                 loop_idx,
-                 loop_idx + 1,
-                 loop_idx + 2);
+  const BMFace *efa = bm_original_face_get(mr, coarse_quad - mr->mpoly);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
 
-  edituv_tri_add(data,
-                 (coarse_quad->flag & ME_HIDE) != 0,
-                 (coarse_quad->flag & ME_FACE_SEL) != 0,
-                 loop_idx,
-                 loop_idx + 2,
-                 loop_idx + 3);
+  edituv_tri_add(data, mp_hidden, mp_select, loop_idx, loop_idx + 1, loop_idx + 2);
+  edituv_tri_add(data, mp_hidden, mp_select, loop_idx, loop_idx + 2, loop_idx + 3);
 }
 
 static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
                                               const MeshRenderData *UNUSED(mr),
-                                              struct MeshBatchCache *UNUSED(cache),
+                                              MeshBatchCache *UNUSED(cache),
                                               void *buf,
                                               void *_data)
 {
@@ -176,7 +167,7 @@ constexpr MeshExtract create_extractor_edituv_tris()
  * \{ */
 
 static void extract_edituv_lines_init(const MeshRenderData *mr,
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *UNUSED(ibo),
                                       void *tls_data)
 {
@@ -214,12 +205,24 @@ static void extract_edituv_lines_iter_poly_bm(const MeshRenderData *UNUSED(mr),
 
 static void extract_edituv_lines_iter_poly_mesh(const MeshRenderData *mr,
                                                 const MPoly *mp,
-                                                const int UNUSED(mp_index),
+                                                const int mp_index,
                                                 void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
+
+  bool mp_hidden, mp_select;
+  if (mr->bm) {
+    const BMFace *efa = bm_original_face_get(mr, mp_index);
+    mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+    mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+  }
+  else {
+    mp_hidden = (mr->hide_poly) ? mr->hide_poly[mp_index] : false;
+    mp_select = (mp->flag & ME_FACE_SEL) != 0;
+  }
+
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
     const MLoop *ml = &mloop[ml_index];
 
@@ -227,16 +230,12 @@ static void extract_edituv_lines_iter_poly_mesh(const MeshRenderData *mr,
     const int ml_index_next = (ml_index == ml_index_last) ? mp->loopstart : (ml_index + 1);
     const bool real_edge = (mr->e_origindex == nullptr ||
                             mr->e_origindex[ml->e] != ORIGINDEX_NONE);
-    edituv_edge_add(data,
-                    (mp->flag & ME_HIDE) != 0 || !real_edge,
-                    (mp->flag & ME_FACE_SEL) != 0,
-                    ml_index,
-                    ml_index_next);
+    edituv_edge_add(data, mp_hidden || !real_edge, mp_select, ml_index, ml_index_next);
   }
 }
 
 static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr),
-                                        struct MeshBatchCache *UNUSED(cache),
+                                        MeshBatchCache *UNUSED(cache),
                                         void *buf,
                                         void *_data)
 {
@@ -266,6 +265,9 @@ static void extract_edituv_lines_iter_subdiv_bm(const DRWSubdivCache *subdiv_cac
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
 
+  const bool mp_hidden = BM_elem_flag_test_bool(coarse_poly, BM_ELEM_HIDDEN);
+  const bool mp_select = BM_elem_flag_test_bool(coarse_poly, BM_ELEM_SELECT);
+
   uint start_loop_idx = subdiv_quad_index * 4;
   uint end_loop_idx = (subdiv_quad_index + 1) * 4;
   for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
@@ -274,8 +276,8 @@ static void extract_edituv_lines_iter_subdiv_bm(const DRWSubdivCache *subdiv_cac
                             (mr->e_origindex == nullptr ||
                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE));
     edituv_edge_add(data,
-                    BM_elem_flag_test_bool(coarse_poly, BM_ELEM_HIDDEN) != 0 || !real_edge,
-                    BM_elem_flag_test_bool(coarse_poly, BM_ELEM_SELECT) != 0,
+                    mp_hidden || !real_edge,
+                    mp_select,
                     loop_idx,
                     (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
   }
@@ -290,6 +292,17 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
 
+  bool mp_hidden, mp_select;
+  if (mr->bm) {
+    const BMFace *efa = bm_original_face_get(mr, coarse_poly - mr->mpoly);
+    mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+    mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+  }
+  else {
+    mp_hidden = (mr->hide_poly) ? mr->hide_poly[coarse_poly - mr->mpoly] : false;
+    mp_select = (coarse_poly->flag & ME_FACE_SEL) != 0;
+  }
+
   uint start_loop_idx = subdiv_quad_index * 4;
   uint end_loop_idx = (subdiv_quad_index + 1) * 4;
   for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
@@ -298,8 +311,8 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c
                             (mr->e_origindex == nullptr ||
                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE));
     edituv_edge_add(data,
-                    (coarse_poly->flag & ME_HIDE) != 0 || !real_edge,
-                    (coarse_poly->flag & ME_FACE_SEL) != 0,
+                    mp_hidden || !real_edge,
+                    mp_select,
                     loop_idx,
                     (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
   }
@@ -307,7 +320,7 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c
 
 static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
                                                const MeshRenderData *UNUSED(mr),
-                                               struct MeshBatchCache *UNUSED(cache),
+                                               MeshBatchCache *UNUSED(cache),
                                                void *buf,
                                                void *_data)
 {
@@ -341,7 +354,7 @@ constexpr MeshExtract create_extractor_edituv_lines()
  * \{ */
 
 static void extract_edituv_points_init(const MeshRenderData *mr,
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *UNUSED(ibo),
                                        void *tls_data)
 {
@@ -378,23 +391,27 @@ static void extract_edituv_points_iter_poly_bm(const MeshRenderData *UNUSED(mr),
 
 static void extract_edituv_points_iter_poly_mesh(const MeshRenderData *mr,
                                                  const MPoly *mp,
-                                                 const int UNUSED(mp_index),
+                                                 const int mp_index,
                                                  void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+
+  const BMFace *efa = bm_original_face_get(mr, mp_index);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
     const MLoop *ml = &mloop[ml_index];
 
     const bool real_vert = !mr->v_origindex || mr->v_origindex[ml->v] != ORIGINDEX_NONE;
-    edituv_point_add(
-        data, ((mp->flag & ME_HIDE) != 0) || !real_vert, (mp->flag & ME_FACE_SEL) != 0, ml_index);
+    edituv_point_add(data, mp_hidden || !real_vert, mp_select, ml_index);
   }
 }
 
 static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr),
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *buf,
                                          void *_data)
 {
@@ -444,22 +461,23 @@ static void extract_edituv_points_iter_subdiv_mesh(const DRWSubdivCache *subdiv_
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
 
+  const BMFace *efa = bm_original_face_get(mr, coarse_quad - mr->mpoly);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
   uint start_loop_idx = subdiv_quad_index * 4;
   uint end_loop_idx = (subdiv_quad_index + 1) * 4;
   for (uint i = start_loop_idx; i < end_loop_idx; i++) {
     const int vert_origindex = subdiv_loop_vert_index[i];
     const bool real_vert = !mr->v_origindex || (vert_origindex != -1 &&
                                                 mr->v_origindex[vert_origindex] != ORIGINDEX_NONE);
-    edituv_point_add(data,
-                     ((coarse_quad->flag & ME_HIDE) != 0) || !real_vert,
-                     (coarse_quad->flag & ME_FACE_SEL) != 0,
-                     i);
+    edituv_point_add(data, mp_hidden || !real_vert, mp_select, i);
   }
 }
 
 static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
                                                 const MeshRenderData *UNUSED(mr),
-                                                struct MeshBatchCache *UNUSED(cache),
+                                                MeshBatchCache *UNUSED(cache),
                                                 void *buf,
                                                 void *_data)
 {
@@ -493,7 +511,7 @@ constexpr MeshExtract create_extractor_edituv_points()
  * \{ */
 
 static void extract_edituv_fdots_init(const MeshRenderData *mr,
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *UNUSED(ibo),
                                       void *tls_data)
 {
@@ -533,6 +551,11 @@ static void extract_edituv_fdots_iter_poly_mesh(const MeshRenderData *mr,
                                                 void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+
+  const BMFace *efa = bm_original_face_get(mr, mp_index);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
   if (mr->use_subsurf_fdots) {
     const BLI_bitmap *facedot_tags = mr->me->runtime.subsurf_face_dot_tags;
 
@@ -543,21 +566,17 @@ static void extract_edituv_fdots_iter_poly_mesh(const MeshRenderData *mr,
 
       const bool real_fdot = !mr->p_origindex || (mr->p_origindex[mp_index] != ORIGINDEX_NONE);
       const bool subd_fdot = BLI_BITMAP_TEST(facedot_tags, ml->v);
-      edituv_facedot_add(data,
-                         ((mp->flag & ME_HIDE) != 0) || !real_fdot || !subd_fdot,
-                         (mp->flag & ME_FACE_SEL) != 0,
-                         mp_index);
+      edituv_facedot_add(data, mp_hidden || !real_fdot || !subd_fdot, mp_select, mp_index);
     }
   }
   else {
     const bool real_fdot = !mr->p_origindex || (mr->p_origindex[mp_index] != ORIGINDEX_NONE);
-    edituv_facedot_add(
-        data, ((mp->flag & ME_HIDE) != 0) || !real_fdot, (mp->flag & ME_FACE_SEL) != 0, mp_index);
+    edituv_facedot_add(data, mp_hidden || !real_fdot, mp_select, mp_index);
   }
 }
 
 static void extract_edituv_fdots_finish(const MeshRenderData *UNUSED(mr),
-                                        struct MeshBatchCache *UNUSED(cache),
+                                        MeshBatchCache *UNUSED(cache),
                                         void *buf,
                                         void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc
index 4eebea1b79f..8dc00617039 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc
@@ -15,7 +15,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_fdots_init(const MeshRenderData *mr,
-                               struct MeshBatchCache *UNUSED(cache),
+                               MeshBatchCache *UNUSED(cache),
                                void *UNUSED(buf),
                                void *tls_data)
 {
@@ -42,6 +42,8 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
                                          const int mp_index,
                                          void *_userdata)
 {
+  const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mp - mr->mpoly];
+
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata);
   if (mr->use_subsurf_fdots) {
     const BLI_bitmap *facedot_tags = mr->me->runtime.subsurf_face_dot_tags;
@@ -50,7 +52,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
     const int ml_index_end = mp->loopstart + mp->totloop;
     for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
       const MLoop *ml = &mloop[ml_index];
-      if (BLI_BITMAP_TEST(facedot_tags, ml->v) && !(mr->use_hide && (mp->flag & ME_HIDE))) {
+      if (BLI_BITMAP_TEST(facedot_tags, ml->v) && !hidden) {
         GPU_indexbuf_set_point_vert(elb, mp_index, mp_index);
         return;
       }
@@ -58,7 +60,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
     GPU_indexbuf_set_point_restart(elb, mp_index);
   }
   else {
-    if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
+    if (!hidden) {
       GPU_indexbuf_set_point_vert(elb, mp_index, mp_index);
     }
     else {
@@ -68,7 +70,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
 }
 
 static void extract_fdots_finish(const MeshRenderData *UNUSED(mr),
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *_userdata)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
index 4e89b34c0a0..9c564c2cdda 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
@@ -18,7 +18,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_lines_init(const MeshRenderData *mr,
-                               struct MeshBatchCache *UNUSED(cache),
+                               MeshBatchCache *UNUSED(cache),
                                void *UNUSED(buf),
                                void *tls_data)
 {
@@ -58,16 +58,13 @@ static void extract_lines_iter_poly_mesh(const MeshRenderData *mr,
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
   /* Using poly & loop iterator would complicate accessing the adjacent loop. */
   const MLoop *mloop = mr->mloop;
-  const MEdge *medge = mr->medge;
-  if (mr->use_hide || (mr->extract_type == MR_EXTRACT_MAPPED) || (mr->e_origindex != nullptr)) {
+  if (mr->use_hide || (mr->e_origindex != nullptr)) {
     const int ml_index_last = mp->loopstart + (mp->totloop - 1);
     int ml_index = ml_index_last, ml_index_next = mp->loopstart;
     do {
       const MLoop *ml = &mloop[ml_index];
-      const MEdge *med = &medge[ml->e];
-      if (!((mr->use_hide && (med->flag & ME_HIDE)) ||
-            ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-             (mr->e_origindex[ml->e] == ORIGINDEX_NONE)))) {
+      if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[ml->e]) ||
+            ((mr->e_origindex) && (mr->e_origindex[ml->e] == ORIGINDEX_NONE)))) {
         GPU_indexbuf_set_line_verts(elb, ml->e, ml_index, ml_index_next);
       }
       else {
@@ -111,9 +108,8 @@ static void extract_lines_iter_ledge_mesh(const MeshRenderData *mr,
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
   const int l_index_offset = mr->edge_len + ledge_index;
   const int e_index = mr->ledges[ledge_index];
-  if (!((mr->use_hide && (med->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-         (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
+  if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[med - mr->medge]) ||
+        ((mr->e_origindex) && (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
     const int l_index = mr->loop_len + ledge_index * 2;
     GPU_indexbuf_set_line_verts(elb, l_index_offset, l_index, l_index + 1);
   }
@@ -132,7 +128,7 @@ static void extract_lines_task_reduce(void *_userdata_to, void *_userdata_from)
 }
 
 static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *data)
 {
@@ -143,7 +139,7 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
 
 static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                       const MeshRenderData *UNUSED(mr),
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *buffer,
                                       void *UNUSED(data))
 {
@@ -183,17 +179,54 @@ static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
 
   uint *flags_data = static_cast<uint *>(GPU_vertbuf_get_data(flags));
 
-  if (mr->extract_type == MR_EXTRACT_MESH) {
-    const MEdge *medge = mr->medge;
-    for (DRWSubdivLooseEdge edge : loose_edges) {
-      *flags_data++ = (medge[edge.coarse_edge_index].flag & ME_HIDE) != 0;
+  switch (mr->extract_type) {
+    case MR_EXTRACT_MESH: {
+      if (mr->e_origindex == nullptr) {
+        const bool *hide_edge = mr->hide_edge;
+        if (hide_edge) {
+          for (DRWSubdivLooseEdge edge : loose_edges) {
+            *flags_data++ = hide_edge[edge.coarse_edge_index];
+          }
+        }
+        else {
+          MutableSpan<uint>(flags_data, loose_edges.size()).fill(0);
+        }
+      }
+      else {
+        if (mr->bm) {
+          for (DRWSubdivLooseEdge edge : loose_edges) {
+            const BMEdge *bm_edge = bm_original_edge_get(mr, edge.coarse_edge_index);
+            *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0;
+          }
+        }
+        else {
+          const bool *hide_edge = mr->hide_edge;
+          if (hide_edge) {
+            for (DRWSubdivLooseEdge edge : loose_edges) {
+              int e = edge.coarse_edge_index;
+
+              if (mr->e_origindex && mr->e_origindex[e] != ORIGINDEX_NONE) {
+                *flags_data++ = hide_edge[edge.coarse_edge_index];
+              }
+              else {
+                *flags_data++ = false;
+              }
+            }
+          }
+          else {
+            MutableSpan<uint>(flags_data, loose_edges.size()).fill(0);
+          }
+        }
+      }
+      break;
     }
-  }
-  else {
-    BMesh *bm = mr->bm;
-    for (DRWSubdivLooseEdge edge : loose_edges) {
-      const BMEdge *bm_edge = BM_edge_at_index(bm, edge.coarse_edge_index);
-      *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0;
+    case MR_EXTRACT_BMESH: {
+      BMesh *bm = mr->bm;
+      for (DRWSubdivLooseEdge edge : loose_edges) {
+        const BMEdge *bm_edge = BM_edge_at_index(bm, edge.coarse_edge_index);
+        *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0;
+      }
+      break;
     }
   }
 
@@ -229,7 +262,7 @@ constexpr MeshExtract create_extractor_lines()
 /** \name Extract Lines and Loose Edges Sub Buffer
  * \{ */
 
-static void extract_lines_loose_subbuffer(const MeshRenderData *mr, struct MeshBatchCache *cache)
+static void extract_lines_loose_subbuffer(const MeshRenderData *mr, MeshBatchCache *cache)
 {
   BLI_assert(cache->final.buff.ibo.lines);
   /* Multiply by 2 because these are edges indices. */
@@ -241,7 +274,7 @@ static void extract_lines_loose_subbuffer(const MeshRenderData *mr, struct MeshB
 }
 
 static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr,
-                                                  struct MeshBatchCache *cache,
+                                                  MeshBatchCache *cache,
                                                   void *buf,
                                                   void *data)
 {
@@ -253,7 +286,7 @@ static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr,
 
 static void extract_lines_with_lines_loose_finish_subdiv(const struct DRWSubdivCache *subdiv_cache,
                                                          const MeshRenderData *UNUSED(mr),
-                                                         struct MeshBatchCache *cache,
+                                                         MeshBatchCache *cache,
                                                          void *UNUSED(buf),
                                                          void *UNUSED(_data))
 {
@@ -292,7 +325,7 @@ constexpr MeshExtract create_extractor_lines_with_lines_loose()
  * \{ */
 
 static void extract_lines_loose_only_init(const MeshRenderData *mr,
-                                          struct MeshBatchCache *cache,
+                                          MeshBatchCache *cache,
                                           void *buf,
                                           void *UNUSED(tls_data))
 {
@@ -303,7 +336,7 @@ static void extract_lines_loose_only_init(const MeshRenderData *mr,
 
 static void extract_lines_loose_only_init_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
                                                  const MeshRenderData *mr,
-                                                 struct MeshBatchCache *cache,
+                                                 MeshBatchCache *cache,
                                                  void *buffer,
                                                  void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
index 9ba9453dada..d6c246c51a9 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
@@ -42,7 +42,7 @@ static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data,
 }
 
 static void extract_lines_adjacency_init(const MeshRenderData *mr,
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(buf),
                                          void *tls_data)
 {
@@ -119,20 +119,21 @@ static void extract_lines_adjacency_iter_looptri_mesh(const MeshRenderData *mr,
                                                       void *_data)
 {
   MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    lines_adjacency_triangle(mr->mloop[mlt->tri[0]].v,
-                             mr->mloop[mlt->tri[1]].v,
-                             mr->mloop[mlt->tri[2]].v,
-                             mlt->tri[0],
-                             mlt->tri[1],
-                             mlt->tri[2],
-                             data);
+  const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mlt->poly];
+  if (hidden) {
+    return;
   }
+  lines_adjacency_triangle(mr->mloop[mlt->tri[0]].v,
+                           mr->mloop[mlt->tri[1]].v,
+                           mr->mloop[mlt->tri[2]].v,
+                           mlt->tri[0],
+                           mlt->tri[1],
+                           mlt->tri[2],
+                           data);
 }
 
 static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
-                                           struct MeshBatchCache *cache,
+                                           MeshBatchCache *cache,
                                            void *buf,
                                            void *_data)
 {
@@ -166,7 +167,7 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
 
 static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                                 const MeshRenderData *UNUSED(mr),
-                                                struct MeshBatchCache *UNUSED(cache),
+                                                MeshBatchCache *UNUSED(cache),
                                                 void *UNUSED(buf),
                                                 void *_data)
 {
@@ -222,7 +223,7 @@ static void extract_lines_adjacency_iter_subdiv_mesh(const DRWSubdivCache *subdi
 
 static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
                                                   const MeshRenderData *mr,
-                                                  struct MeshBatchCache *cache,
+                                                  MeshBatchCache *cache,
                                                   void *buf,
                                                   void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc
index 713a533492f..31e5c515129 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc
@@ -26,7 +26,7 @@ struct MeshExtract_LinePaintMask_Data {
 };
 
 static void extract_lines_paint_mask_init(const MeshRenderData *mr,
-                                          struct MeshBatchCache *UNUSED(cache),
+                                          MeshBatchCache *UNUSED(cache),
                                           void *UNUSED(ibo),
                                           void *tls_data)
 {
@@ -47,10 +47,8 @@ static void extract_lines_paint_mask_iter_poly_mesh(const MeshRenderData *mr,
     const MLoop *ml = &mloop[ml_index];
 
     const int e_index = ml->e;
-    const MEdge *me = &mr->medge[e_index];
-    if (!((mr->use_hide && (me->flag & ME_HIDE)) ||
-          ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-           (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
+    if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[e_index]) ||
+          ((mr->e_origindex) && (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
 
       const int ml_index_last = mp->totloop + mp->loopstart - 1;
       const int ml_index_other = (ml_index == ml_index_last) ? mp->loopstart : (ml_index + 1);
@@ -78,7 +76,7 @@ static void extract_lines_paint_mask_iter_poly_mesh(const MeshRenderData *mr,
 }
 
 static void extract_lines_paint_mask_finish(const MeshRenderData *UNUSED(mr),
-                                            struct MeshBatchCache *UNUSED(cache),
+                                            MeshBatchCache *UNUSED(cache),
                                             void *buf,
                                             void *_data)
 {
@@ -122,11 +120,10 @@ static void extract_lines_paint_mask_iter_subdiv_mesh(const DRWSubdivCache *subd
       GPU_indexbuf_set_line_restart(&data->elb, subdiv_edge_index);
     }
     else {
-      const MEdge *me = &mr->medge[coarse_edge_index];
-      if (!((mr->use_hide && (me->flag & ME_HIDE)) ||
-            ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-             (mr->e_origindex[coarse_edge_index] == ORIGINDEX_NONE)))) {
-        const uint ml_index_other = (loop_idx == end_loop_idx) ? start_loop_idx : loop_idx + 1;
+      if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[coarse_edge_index]) ||
+            ((mr->e_origindex) && (mr->e_origindex[coarse_edge_index] == ORIGINDEX_NONE)))) {
+        const uint ml_index_other = (loop_idx == (end_loop_idx - 1)) ? start_loop_idx :
+                                                                       loop_idx + 1;
         if (coarse_quad->flag & ME_FACE_SEL) {
           if (BLI_BITMAP_TEST_AND_SET_ATOMIC(data->select_map, coarse_edge_index)) {
             /* Hide edge as it has more than 2 selected loop. */
@@ -154,7 +151,7 @@ static void extract_lines_paint_mask_iter_subdiv_mesh(const DRWSubdivCache *subd
 static void extract_lines_paint_mask_finish_subdiv(
     const struct DRWSubdivCache *UNUSED(subdiv_cache),
     const MeshRenderData *mr,
-    struct MeshBatchCache *cache,
+    MeshBatchCache *cache,
     void *buf,
     void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
index e746b37fd30..48eeb86e5ee 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
@@ -19,7 +19,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_points_init(const MeshRenderData *mr,
-                                struct MeshBatchCache *UNUSED(cache),
+                                MeshBatchCache *UNUSED(cache),
                                 void *UNUSED(buf),
                                 void *tls_data)
 {
@@ -43,10 +43,9 @@ BLI_INLINE void vert_set_mesh(GPUIndexBufBuilder *elb,
                               const int v_index,
                               const int l_index)
 {
-  const MVert *mv = &mr->mvert[v_index];
-  if (!((mr->use_hide && (mv->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
-         (mr->v_origindex[v_index] == ORIGINDEX_NONE)))) {
+  const bool hidden = mr->use_hide && mr->hide_vert && mr->hide_vert[v_index];
+
+  if (!(hidden || ((mr->v_origindex) && (mr->v_origindex[v_index] == ORIGINDEX_NONE)))) {
     GPU_indexbuf_set_point_vert(elb, v_index, l_index);
   }
   else {
@@ -131,7 +130,7 @@ static void extract_points_task_reduce(void *_userdata_to, void *_userdata_from)
 }
 
 static void extract_points_finish(const MeshRenderData *UNUSED(mr),
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *_userdata)
 {
@@ -142,7 +141,7 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr),
 
 static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                        const MeshRenderData *mr,
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *UNUSED(buffer),
                                        void *data)
 {
@@ -181,8 +180,7 @@ static void extract_points_iter_subdiv_common(GPUIndexBufBuilder *elb,
       }
     }
     else {
-      const MVert *mv = &mr->mvert[coarse_vertex_index];
-      if (mr->use_hide && (mv->flag & ME_HIDE)) {
+      if (mr->use_hide && mr->hide_vert && mr->hide_vert[coarse_vertex_index]) {
         GPU_indexbuf_set_point_restart(elb, coarse_vertex_index);
         continue;
       }
@@ -285,7 +283,7 @@ static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
 
 static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
                                          const MeshRenderData *UNUSED(mr),
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *buf,
                                          void *_userdata)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
index 4c8d1d0002a..2e3e6c7b6b1 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@@ -25,7 +25,7 @@ static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_fro
  * \{ */
 
 static void extract_tris_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *UNUSED(cache),
+                              MeshBatchCache *UNUSED(cache),
                               void *UNUSED(ibo),
                               void *tls_data)
 {
@@ -81,7 +81,7 @@ static void extract_tris_iter_poly_mesh(const MeshRenderData *mr,
 }
 
 static void extract_tris_finish(const MeshRenderData *mr,
-                                struct MeshBatchCache *cache,
+                                MeshBatchCache *cache,
                                 void *buf,
                                 void *_data)
 {
@@ -111,7 +111,7 @@ static void extract_tris_finish(const MeshRenderData *mr,
 
 static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                      const MeshRenderData *UNUSED(mr),
-                                     struct MeshBatchCache *cache,
+                                     MeshBatchCache *cache,
                                      void *buffer,
                                      void *UNUSED(data))
 {
@@ -157,7 +157,7 @@ constexpr MeshExtract create_extractor_tris()
  * \{ */
 
 static void extract_tris_single_mat_init(const MeshRenderData *mr,
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(ibo),
                                          void *tls_data)
 {
@@ -189,17 +189,17 @@ static void extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr,
                                                       void *_data)
 {
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    GPU_indexbuf_set_tri_verts(elb, mlt_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
+  const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mlt->poly];
+  if (hidden) {
+    GPU_indexbuf_set_tri_restart(elb, mlt_index);
   }
   else {
-    GPU_indexbuf_set_tri_restart(elb, mlt_index);
+    GPU_indexbuf_set_tri_verts(elb, mlt_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
   }
 }
 
 static void extract_tris_single_mat_finish(const MeshRenderData *mr,
-                                           struct MeshBatchCache *cache,
+                                           MeshBatchCache *cache,
                                            void *buf,
                                            void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
index fb6b5e1904b..64ade020418 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
@@ -9,6 +9,7 @@
 
 #include <functional>
 
+#include "BLI_color.hh"
 #include "BLI_math_vec_types.hh"
 #include "BLI_string.h"
 
@@ -57,7 +58,6 @@ template<typename AttributeType, typename VBOType> struct AttributeTypeConverter
   }
 };
 
-/* Similar to the one in #extract_mesh_vcol_vbo.cc */
 struct gpuMeshCol {
   ushort r, g, b, a;
 };
@@ -74,6 +74,18 @@ template<> struct AttributeTypeConverter<MPropCol, gpuMeshCol> {
   }
 };
 
+template<> struct AttributeTypeConverter<ColorGeometry4b, gpuMeshCol> {
+  static gpuMeshCol convert_value(ColorGeometry4b value)
+  {
+    gpuMeshCol result;
+    result.r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.r]);
+    result.g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.g]);
+    result.b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.b]);
+    result.a = unit_float_to_ushort_clamp(value.a * (1.0f / 255.0f));
+    return result;
+  }
+};
+
 /* Return the number of component for the attribute's value type, or 0 if is it unsupported. */
 static uint gpu_component_size_for_attribute_type(eCustomDataType type)
 {
@@ -90,6 +102,7 @@ static uint gpu_component_size_for_attribute_type(eCustomDataType type)
     case CD_PROP_FLOAT3:
       return 3;
     case CD_PROP_COLOR:
+    case CD_PROP_BYTE_COLOR:
       return 4;
     default:
       return 0;
@@ -102,6 +115,7 @@ static GPUVertFetchMode get_fetch_mode_for_type(eCustomDataType type)
     case CD_PROP_INT32:
       return GPU_FETCH_INT_TO_FLOAT;
     case CD_PROP_COLOR:
+    case CD_PROP_BYTE_COLOR:
       return GPU_FETCH_INT_TO_FLOAT_UNIT;
     default:
       return GPU_FETCH_FLOAT;
@@ -114,13 +128,14 @@ static GPUVertCompType get_comp_type_for_type(eCustomDataType type)
     case CD_PROP_INT32:
       return GPU_COMP_I32;
     case CD_PROP_COLOR:
+    case CD_PROP_BYTE_COLOR:
       return GPU_COMP_U16;
     default:
       return GPU_COMP_F32;
   }
 }
 
-static void init_vbo_for_attribute(const MeshRenderData *mr,
+static void init_vbo_for_attribute(const MeshRenderData &mr,
                                    GPUVertBuf *vbo,
                                    const DRW_AttributeRequest &request,
                                    bool build_on_device,
@@ -132,11 +147,8 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
   /* We should not be here if the attribute type is not supported. */
   BLI_assert(comp_size != 0);
 
-  const CustomData *custom_data = get_custom_data_for_domain(mr, request.domain);
   char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
-  const char *layer_name = CustomData_get_layer_name(
-      custom_data, request.cd_type, request.layer_index);
-  GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
+  GPU_vertformat_safe_attr_name(request.attribute_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
   /* Attributes use auto-name. */
   BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
 
@@ -144,6 +156,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
   GPU_vertformat_deinterleave(&format);
   GPU_vertformat_attr_add(&format, attr_name, comp_type, comp_size, fetch_mode);
 
+  if (mr.active_color_name && STREQ(request.attribute_name, mr.active_color_name)) {
+    GPU_vertformat_alias_add(&format, "ac");
+  }
+  if (mr.default_color_name && STREQ(request.attribute_name, mr.default_color_name)) {
+    GPU_vertformat_alias_add(&format, "c");
+  }
+
   if (build_on_device) {
     GPU_vertbuf_init_build_on_device(vbo, &format, len);
   }
@@ -258,18 +277,15 @@ static void extract_attr_generic(const MeshRenderData *mr,
   }
 }
 
-static void extract_attr_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data),
-                              int index)
+static void extract_attr_init(
+    const MeshRenderData *mr, MeshBatchCache *cache, void *buf, void *UNUSED(tls_data), int index)
 {
   const DRW_Attributes *attrs_used = &cache->attr_used;
   const DRW_AttributeRequest &request = attrs_used->requests[index];
 
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
 
-  init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
+  init_vbo_for_attribute(*mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
 
   /* TODO(@kevindietrich): float3 is used for scalar attributes as the implicit conversion done by
    * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the
@@ -297,6 +313,9 @@ static void extract_attr_init(const MeshRenderData *mr,
     case CD_PROP_COLOR:
       extract_attr_generic<MPropCol, gpuMeshCol>(mr, vbo, request);
       break;
+    case CD_PROP_BYTE_COLOR:
+      extract_attr_generic<ColorGeometry4b, gpuMeshCol>(mr, vbo, request);
+      break;
     default:
       BLI_assert_unreachable();
   }
@@ -345,17 +364,24 @@ static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
     case CD_PROP_COLOR:
       extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request);
       break;
+    case CD_PROP_BYTE_COLOR:
+      extract_attr_generic<ColorGeometry4b, gpuMeshCol>(mr, src_data, request);
+      break;
     default:
       BLI_assert_unreachable();
   }
 
   GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
-  init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
+  init_vbo_for_attribute(*mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
 
   /* Ensure data is uploaded properly. */
   GPU_vertbuf_tag_dirty(src_data);
-  draw_subdiv_interp_custom_data(
-      subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0, false);
+  draw_subdiv_interp_custom_data(subdiv_cache,
+                                 src_data,
+                                 dst_buffer,
+                                 static_cast<int>(dimensions),
+                                 0,
+                                 ELEM(request.cd_type, CD_PROP_COLOR, CD_PROP_BYTE_COLOR));
 
   GPU_vertbuf_discard(src_data);
 }
@@ -364,13 +390,13 @@ static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
  * extract. The overall API does not allow us to pass this in a convenient way. */
 #define EXTRACT_INIT_WRAPPER(index) \
   static void extract_attr_init##index( \
-      const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \
+      const MeshRenderData *mr, MeshBatchCache *cache, void *buf, void *tls_data) \
   { \
     extract_attr_init(mr, cache, buf, tls_data, index); \
   } \
   static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \
                                               const MeshRenderData *mr, \
-                                              struct MeshBatchCache *cache, \
+                                              MeshBatchCache *cache, \
                                               void *buf, \
                                               void *tls_data) \
   { \
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
index a11f740239a..50c37f6397c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
@@ -43,7 +43,7 @@ static float loop_edge_factor_get(const float f_no[3],
 }
 
 static void extract_edge_fac_init(const MeshRenderData *mr,
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *tls_data)
 {
@@ -167,14 +167,14 @@ static void extract_edge_fac_iter_ledge_mesh(const MeshRenderData *mr,
 }
 
 static void extract_edge_fac_finish(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *_data)
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data);
 
-  if (GPU_crappy_amd_driver()) {
+  if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
     /* Some AMD drivers strangely crash with VBO's with a one byte format.
      * To workaround we reinitialize the VBO with another format and convert
      * all bytes to floats. */
@@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format()
 {
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
-    if (GPU_crappy_amd_driver()) {
+    if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
       GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
     }
     else {
@@ -218,7 +218,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format()
 
 static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                          const MeshRenderData *UNUSED(mr),
-                                         struct MeshBatchCache *cache,
+                                         MeshBatchCache *cache,
                                          void *buffer,
                                          void *UNUSED(data))
 {
@@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach
 
   uint offset = subdiv_cache->num_subdiv_loops;
   for (int i = 0; i < loose_geom.edge_len; i++) {
-    if (GPU_crappy_amd_driver()) {
+    if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
       float loose_edge_fac[2] = {1.0f, 1.0f};
       GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
     }
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
index 3bb706e82cd..27fd6546b8c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
@@ -112,7 +112,7 @@ static GPUVertFormat *get_edit_data_format()
 }
 
 static void extract_edit_data_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *buf,
                                    void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
index 6d54fce2a0d..0b9043e3289 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
@@ -43,7 +43,7 @@ static void extract_edituv_data_init_common(const MeshRenderData *mr,
 }
 
 static void extract_edituv_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
index 5d6dd14b57a..e4714aabf34 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
@@ -74,7 +74,7 @@ static void edituv_get_edituv_stretch_angle(float auv[2][2],
 }
 
 static void extract_edituv_stretch_angle_init(const MeshRenderData *mr,
-                                              struct MeshBatchCache *UNUSED(cache),
+                                              MeshBatchCache *UNUSED(cache),
                                               void *buf,
                                               void *tls_data)
 {
@@ -97,7 +97,7 @@ static void extract_edituv_stretch_angle_init(const MeshRenderData *mr,
     data->cd_ofs = CustomData_get_offset(&mr->bm->ldata, CD_MLOOPUV);
   }
   else {
-    BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH));
+    BLI_assert(mr->extract_type == MR_EXTRACT_MESH);
     data->luv = (const MLoopUV *)CustomData_get_layer(&mr->me->ldata, CD_MLOOPUV);
   }
 }
@@ -212,7 +212,7 @@ static GPUVertFormat *get_edituv_stretch_angle_format_subdiv()
 
 static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                                      const MeshRenderData *mr,
-                                                     struct MeshBatchCache *cache,
+                                                     MeshBatchCache *cache,
                                                      void *buffer,
                                                      void *UNUSED(tls_data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
index 70dcc24f946..9679c0523f8 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
@@ -20,14 +20,14 @@ namespace blender::draw {
  * \{ */
 
 static void extract_edituv_stretch_area_init(const MeshRenderData *mr,
-                                             struct MeshBatchCache *UNUSED(cache),
+                                             MeshBatchCache *UNUSED(cache),
                                              void *buf,
                                              void *UNUSED(tls_data))
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_I16, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
   }
 
   GPU_vertbuf_init_with_format(vbo, &format);
@@ -37,15 +37,14 @@ static void extract_edituv_stretch_area_init(const MeshRenderData *mr,
 BLI_INLINE float area_ratio_get(float area, float uvarea)
 {
   if (area >= FLT_EPSILON && uvarea >= FLT_EPSILON) {
-    /* Tag inversion by using the sign. */
-    return (area > uvarea) ? (uvarea / area) : -(area / uvarea);
+    return uvarea / area;
   }
   return 0.0f;
 }
 
-BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_tot_ratio)
+BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio)
 {
-  ratio *= (ratio > 0.0f) ? tot_ratio : -inv_tot_ratio;
+  ratio *= tot_ratio;
   return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
 }
 
@@ -72,7 +71,7 @@ static void compute_area_ratio(const MeshRenderData *mr,
     }
   }
   else {
-    BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH));
+    BLI_assert(mr->extract_type == MR_EXTRACT_MESH);
     const MLoopUV *uv_data = (const MLoopUV *)CustomData_get_layer(&mr->me->ldata, CD_MLOOPUV);
     const MPoly *mp = mr->mpoly;
     for (int mp_index = 0; mp_index < mr->poly_len; mp_index++, mp++) {
@@ -89,7 +88,7 @@ static void compute_area_ratio(const MeshRenderData *mr,
 }
 
 static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
-                                               struct MeshBatchCache *cache,
+                                               MeshBatchCache *cache,
                                                void *buf,
                                                void *UNUSED(data))
 {
@@ -97,14 +96,8 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
   float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
   compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area);
 
-  /* Convert in place to avoid an extra allocation */
-  uint16_t *poly_stretch = (uint16_t *)area_ratio;
-  for (int mp_index = 0; mp_index < mr->poly_len; mp_index++) {
-    poly_stretch[mp_index] = area_ratio[mp_index] * SHRT_MAX;
-  }
-
   /* Copy face data for each loop. */
-  uint16_t *loop_stretch = (uint16_t *)GPU_vertbuf_get_data(vbo);
+  float *loop_stretch = (float *)GPU_vertbuf_get_data(vbo);
 
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     BMFace *efa;
@@ -112,16 +105,16 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
     int f, l_index = 0;
     BM_ITER_MESH_INDEX (efa, &f_iter, mr->bm, BM_FACES_OF_MESH, f) {
       for (int i = 0; i < efa->len; i++, l_index++) {
-        loop_stretch[l_index] = poly_stretch[f];
+        loop_stretch[l_index] = area_ratio[f];
       }
     }
   }
   else {
-    BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH));
+    BLI_assert(mr->extract_type == MR_EXTRACT_MESH);
     const MPoly *mp = mr->mpoly;
     for (int mp_index = 0, l_index = 0; mp_index < mr->poly_len; mp_index++, mp++) {
       for (int i = 0; i < mp->totloop; i++, l_index++) {
-        loop_stretch[l_index] = poly_stretch[mp_index];
+        loop_stretch[l_index] = area_ratio[mp_index];
       }
     }
   }
@@ -131,7 +124,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
 
 static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                                     const MeshRenderData *mr,
-                                                    struct MeshBatchCache *cache,
+                                                    MeshBatchCache *cache,
                                                     void *buffer,
                                                     void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc
index 64bec0adad4..27d1975d67b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc
@@ -21,7 +21,7 @@ struct MeshExtract_EditUVFdotData_Data {
 };
 
 static void extract_fdots_edituv_data_init(const MeshRenderData *mr,
-                                           struct MeshBatchCache *UNUSED(cache),
+                                           MeshBatchCache *UNUSED(cache),
                                            void *buf,
                                            void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc
index 8d189db9f12..c47cde63630 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc
@@ -19,7 +19,7 @@ namespace blender::draw {
 #define NOR_AND_FLAG_HIDDEN -2
 
 static void extract_fdots_nor_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *buf,
                                    void *UNUSED(tls_data))
 {
@@ -34,7 +34,7 @@ static void extract_fdots_nor_init(const MeshRenderData *mr,
 }
 
 static void extract_fdots_nor_finish(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(data))
 {
@@ -48,8 +48,7 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = BM_face_at_index(mr->bm, f);
       const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         nor[f] = GPU_normal_convert_i10_v3(invalid_normal);
         nor[f].w = NOR_AND_FLAG_HIDDEN;
       }
@@ -66,8 +65,7 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = bm_original_face_get(mr, f);
       const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         nor[f] = GPU_normal_convert_i10_v3(invalid_normal);
         nor[f].w = NOR_AND_FLAG_HIDDEN;
       }
@@ -101,7 +99,7 @@ constexpr MeshExtract create_extractor_fdots_nor()
  * \{ */
 
 static void extract_fdots_nor_hq_init(const MeshRenderData *mr,
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *buf,
                                       void *UNUSED(tls_data))
 {
@@ -116,7 +114,7 @@ static void extract_fdots_nor_hq_init(const MeshRenderData *mr,
 }
 
 static void extract_fdots_nor_hq_finish(const MeshRenderData *mr,
-                                        struct MeshBatchCache *UNUSED(cache),
+                                        MeshBatchCache *UNUSED(cache),
                                         void *buf,
                                         void *UNUSED(data))
 {
@@ -130,8 +128,7 @@ static void extract_fdots_nor_hq_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = BM_face_at_index(mr->bm, f);
       const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         normal_float_to_short_v3(&nor[f * 4], invalid_normal);
         nor[f * 4 + 3] = NOR_AND_FLAG_HIDDEN;
       }
@@ -148,8 +145,7 @@ static void extract_fdots_nor_hq_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = bm_original_face_get(mr, f);
       const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         normal_float_to_short_v3(&nor[f * 4], invalid_normal);
         nor[f * 4 + 3] = NOR_AND_FLAG_HIDDEN;
       }
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
index 822b5928c49..c391cb6ca5a 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
@@ -36,7 +36,7 @@ static GPUVertFormat *get_fdots_nor_format_subdiv()
 }
 
 static void extract_fdots_pos_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *buf,
                                    void *tls_data)
 {
@@ -101,7 +101,7 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr,
 
 static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                       const MeshRenderData *UNUSED(mr),
-                                      struct MeshBatchCache *cache,
+                                      MeshBatchCache *cache,
                                       void *buffer,
                                       void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc
index de21c63e5fd..b0403cf7c4c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc
@@ -22,7 +22,7 @@ struct MeshExtract_FdotUV_Data {
 };
 
 static void extract_fdots_uv_init(const MeshRenderData *mr,
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
index 42a9a58bbe4..01d07fa5f83 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
@@ -16,7 +16,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_lnor_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *UNUSED(cache),
+                              MeshBatchCache *UNUSED(cache),
                               void *buf,
                               void *tls_data)
 {
@@ -62,6 +62,8 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
                                         const int mp_index,
                                         void *data)
 {
+  const bool hidden = mr->hide_poly && mr->hide_poly[mp_index];
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
@@ -78,10 +80,10 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
     }
 
     /* Flag for paint mode overlay.
-     * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals.
+     * Only use origindex in edit mode where it is used to display the edge-normals.
      * In paint mode it will use the un-mapped data to draw the wire-frame. */
-    if (mp->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED &&
-                               (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
+    if (hidden ||
+        (mr->edit_bmesh && (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
       lnor_data->w = -1;
     }
     else if (mp->flag & ME_FACE_SEL) {
@@ -105,7 +107,7 @@ static GPUVertFormat *get_subdiv_lnor_format()
 
 static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                      const MeshRenderData *UNUSED(mr),
-                                     struct MeshBatchCache *cache,
+                                     MeshBatchCache *cache,
                                      void *buffer,
                                      void *UNUSED(data))
 {
@@ -141,7 +143,7 @@ struct gpuHQNor {
 };
 
 static void extract_lnor_hq_init(const MeshRenderData *mr,
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *tls_data)
 {
@@ -185,6 +187,8 @@ static void extract_lnor_hq_iter_poly_mesh(const MeshRenderData *mr,
                                            const int mp_index,
                                            void *data)
 {
+  const bool hidden = mr->hide_poly && mr->hide_poly[mp_index];
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
@@ -201,10 +205,10 @@ static void extract_lnor_hq_iter_poly_mesh(const MeshRenderData *mr,
     }
 
     /* Flag for paint mode overlay.
-     * Only use #MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals.
+     * Only use origindex in edit mode where it is used to display the edge-normals.
      * In paint mode it will use the un-mapped data to draw the wire-frame. */
-    if (mp->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED &&
-                               (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
+    if (hidden ||
+        (mr->edit_bmesh && (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
       lnor_data->w = -1;
     }
     else if (mp->flag & ME_FACE_SEL) {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc
index b57e2f6b807..fe2a02b6b63 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc
@@ -23,7 +23,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_mesh_analysis_init(const MeshRenderData *mr,
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *buf,
                                        void *UNUSED(tls_data))
 {
@@ -259,7 +259,8 @@ static void statvis_calc_thickness(const MeshRenderData *mr, float *r_thickness)
 }
 
 struct BVHTree_OverlapData {
-  const Mesh *me;
+  const MVert *verts;
+  const MLoop *loops;
   const MLoopTri *mlooptri;
   float epsilon;
 };
@@ -267,7 +268,6 @@ struct BVHTree_OverlapData {
 static bool bvh_overlap_cb(void *userdata, int index_a, int index_b, int UNUSED(thread))
 {
   struct BVHTree_OverlapData *data = static_cast<struct BVHTree_OverlapData *>(userdata);
-  const Mesh *me = data->me;
 
   const MLoopTri *tri_a = &data->mlooptri[index_a];
   const MLoopTri *tri_b = &data->mlooptri[index_b];
@@ -276,12 +276,12 @@ static bool bvh_overlap_cb(void *userdata, int index_a, int index_b, int UNUSED(
     return false;
   }
 
-  const float *tri_a_co[3] = {me->mvert[me->mloop[tri_a->tri[0]].v].co,
-                              me->mvert[me->mloop[tri_a->tri[1]].v].co,
-                              me->mvert[me->mloop[tri_a->tri[2]].v].co};
-  const float *tri_b_co[3] = {me->mvert[me->mloop[tri_b->tri[0]].v].co,
-                              me->mvert[me->mloop[tri_b->tri[1]].v].co,
-                              me->mvert[me->mloop[tri_b->tri[2]].v].co};
+  const float *tri_a_co[3] = {data->verts[data->loops[tri_a->tri[0]].v].co,
+                              data->verts[data->loops[tri_a->tri[1]].v].co,
+                              data->verts[data->loops[tri_a->tri[2]].v].co};
+  const float *tri_b_co[3] = {data->verts[data->loops[tri_b->tri[0]].v].co,
+                              data->verts[data->loops[tri_b->tri[1]].v].co,
+                              data->verts[data->loops[tri_b->tri[2]].v].co};
   float ix_pair[2][3];
   int verts_shared = 0;
 
@@ -342,7 +342,8 @@ static void statvis_calc_intersect(const MeshRenderData *mr, float *r_intersect)
     BVHTree *tree = BKE_bvhtree_from_mesh_get(&treeData, mr->me, BVHTREE_FROM_LOOPTRI, 4);
 
     struct BVHTree_OverlapData data = {nullptr};
-    data.me = mr->me;
+    data.verts = mr->mvert;
+    data.loops = mr->mloop;
     data.mlooptri = mr->mlooptri;
     data.epsilon = BLI_bvhtree_get_epsilon(tree);
 
@@ -587,7 +588,7 @@ static void statvis_calc_sharp(const MeshRenderData *mr, float *r_sharp)
 }
 
 static void extract_analysis_iter_finish_mesh(const MeshRenderData *mr,
-                                              struct MeshBatchCache *UNUSED(cache),
+                                              MeshBatchCache *UNUSED(cache),
                                               void *buf,
                                               void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc
index 68d838e9e62..4fcbdb1fc7c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc
@@ -19,7 +19,7 @@ struct MeshExtract_Orco_Data {
 };
 
 static void extract_orco_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *UNUSED(cache),
+                              MeshBatchCache *UNUSED(cache),
                               void *buf,
                               void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
index 313744bdd27..a822845c688 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
@@ -28,7 +28,7 @@ struct MeshExtract_PosNor_Data {
 };
 
 static void extract_pos_nor_init(const MeshRenderData *mr,
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *tls_data)
 {
@@ -83,10 +83,11 @@ static void extract_pos_nor_iter_poly_bm(const MeshRenderData *mr,
 
 static void extract_pos_nor_iter_poly_mesh(const MeshRenderData *mr,
                                            const MPoly *mp,
-                                           const int UNUSED(mp_index),
+                                           const int mp_index,
                                            void *_data)
 {
   MeshExtract_PosNor_Data *data = static_cast<MeshExtract_PosNor_Data *>(_data);
+  const bool poly_hidden = mr->hide_poly && mr->hide_poly[mp_index];
 
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
@@ -95,12 +96,12 @@ static void extract_pos_nor_iter_poly_mesh(const MeshRenderData *mr,
 
     PosNorLoop *vert = &data->vbo_data[ml_index];
     const MVert *mv = &mr->mvert[ml->v];
+    const bool vert_hidden = mr->hide_vert && mr->hide_vert[ml->v];
     copy_v3_v3(vert->pos, mv->co);
     vert->nor = data->normals[ml->v].low;
     /* Flag for paint mode overlay. */
-    if (mp->flag & ME_HIDE || mv->flag & ME_HIDE ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
-         (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
+    if (poly_hidden || vert_hidden ||
+        ((mr->v_origindex) && (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
       vert->nor.w = -1;
     }
     else if (mv->flag & SELECT) {
@@ -171,7 +172,7 @@ static void extract_pos_nor_iter_lvert_mesh(const MeshRenderData *mr,
 }
 
 static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr),
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *UNUSED(buf),
                                    void *_data)
 {
@@ -201,7 +202,7 @@ static GPUVertFormat *get_custom_normals_format()
 
 static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                         const MeshRenderData *UNUSED(mr),
-                                        struct MeshBatchCache *cache,
+                                        MeshBatchCache *cache,
                                         void *buffer,
                                         void *UNUSED(data))
 {
@@ -372,7 +373,7 @@ struct MeshExtract_PosNorHQ_Data {
 };
 
 static void extract_pos_nor_hq_init(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *tls_data)
 {
@@ -432,20 +433,22 @@ static void extract_pos_nor_hq_iter_poly_mesh(const MeshRenderData *mr,
                                               void *_data)
 {
   MeshExtract_PosNorHQ_Data *data = static_cast<MeshExtract_PosNorHQ_Data *>(_data);
+  const bool poly_hidden = mr->hide_poly && mr->hide_poly[mp - mr->mpoly];
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
     const MLoop *ml = &mloop[ml_index];
 
+    const bool vert_hidden = mr->hide_vert && mr->hide_vert[ml->v];
     PosNorHQLoop *vert = &data->vbo_data[ml_index];
     const MVert *mv = &mr->mvert[ml->v];
     copy_v3_v3(vert->pos, mv->co);
     copy_v3_v3_short(vert->nor, data->normals[ml->v].high);
 
     /* Flag for paint mode overlay. */
-    if (mp->flag & ME_HIDE || mv->flag & ME_HIDE ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
-         (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
+    if (poly_hidden || vert_hidden ||
+        ((mr->v_origindex) && (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
       vert->nor[3] = -1;
     }
     else if (mv->flag & SELECT) {
@@ -521,7 +524,7 @@ static void extract_pos_nor_hq_iter_lvert_mesh(const MeshRenderData *mr,
 }
 
 static void extract_pos_nor_hq_finish(const MeshRenderData *UNUSED(mr),
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *UNUSED(buf),
                                       void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
index 0d959e324f8..6202fdd312d 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
@@ -9,6 +9,7 @@
 
 #include "BLI_string.h"
 
+#include "BKE_mesh.h"
 #include "BKE_paint.h"
 
 #include "draw_subdivision.h"
@@ -31,7 +32,7 @@ static GPUVertFormat *get_sculpt_data_format()
 }
 
 static void extract_sculpt_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(tls_data))
 {
@@ -113,7 +114,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
 
 static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                             const MeshRenderData *mr,
-                                            struct MeshBatchCache *UNUSED(cache),
+                                            MeshBatchCache *UNUSED(cache),
                                             void *buffer,
                                             void *UNUSED(data))
 {
@@ -128,6 +129,9 @@ static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
   GPUVertBuf *subdiv_mask_vbo = nullptr;
   const float *cd_mask = (const float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
 
+  const Span<MPoly> coarse_polys = coarse_mesh->polys();
+  const Span<MLoop> coarse_loops = coarse_mesh->loops();
+
   if (cd_mask) {
     GPUVertFormat mask_format = {0};
     GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
@@ -138,11 +142,11 @@ static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
     float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo));
 
     for (int i = 0; i < coarse_mesh->totpoly; i++) {
-      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+      const MPoly *mpoly = &coarse_polys[i];
 
       for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
            loop_index++) {
-        const MLoop *ml = &coarse_mesh->mloop[loop_index];
+        const MLoop *ml = &coarse_loops[loop_index];
         *v_mask++ = cd_mask[ml->v];
       }
     }
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
index 6230e1974be..9e0d171c9e4 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
@@ -30,7 +30,7 @@ static void extract_select_idx_init_impl(const MeshRenderData *UNUSED(mr),
 }
 
 static void extract_select_idx_init(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *tls_data)
 {
@@ -366,7 +366,7 @@ constexpr MeshExtract create_extractor_vert_idx()
 }
 
 static void extract_fdot_idx_init(const MeshRenderData *mr,
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
index a275f247cad..f7655658bdd 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
@@ -19,7 +19,7 @@ struct SkinRootData {
 };
 
 static void extract_skin_roots_init(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *UNUSED(tls_data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc
index 83453d6ef38..049fa416523 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc
@@ -25,7 +25,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_tan_init_common(const MeshRenderData *mr,
-                                    struct MeshBatchCache *cache,
+                                    MeshBatchCache *cache,
                                     GPUVertFormat *format,
                                     GPUVertCompType comp_type,
                                     GPUVertFetchMode fetch_mode,
@@ -161,7 +161,7 @@ static void extract_tan_init_common(const MeshRenderData *mr,
 }
 
 static void extract_tan_ex_init(const MeshRenderData *mr,
-                                struct MeshBatchCache *cache,
+                                MeshBatchCache *cache,
                                 GPUVertBuf *vbo,
                                 const bool do_hq)
 {
@@ -235,7 +235,7 @@ static void extract_tan_ex_init(const MeshRenderData *mr,
 }
 
 static void extract_tan_init(const MeshRenderData *mr,
-                             struct MeshBatchCache *cache,
+                             MeshBatchCache *cache,
                              void *buf,
                              void *UNUSED(tls_data))
 {
@@ -254,7 +254,7 @@ static GPUVertFormat *get_coarse_tan_format()
 
 static void extract_tan_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                     const MeshRenderData *mr,
-                                    struct MeshBatchCache *cache,
+                                    MeshBatchCache *cache,
                                     void *buffer,
                                     void *UNUSED(data))
 {
@@ -344,7 +344,7 @@ constexpr MeshExtract create_extractor_tan()
  * \{ */
 
 static void extract_tan_hq_init(const MeshRenderData *mr,
-                                struct MeshBatchCache *cache,
+                                MeshBatchCache *cache,
                                 void *buf,
                                 void *UNUSED(tls_data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
index ddb8ed9b25b..6606912850d 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
@@ -19,7 +19,7 @@ namespace blender::draw {
 /* Initialize the vertex format to be used for UVs. Return true if any UV layer is
  * found, false otherwise. */
 static bool mesh_extract_uv_format_init(GPUVertFormat *format,
-                                        struct MeshBatchCache *cache,
+                                        MeshBatchCache *cache,
                                         CustomData *cd_ldata,
                                         eMRExtractType extract_type,
                                         uint32_t &r_uv_layers)
@@ -72,7 +72,7 @@ static bool mesh_extract_uv_format_init(GPUVertFormat *format,
 }
 
 static void extract_uv_init(const MeshRenderData *mr,
-                            struct MeshBatchCache *cache,
+                            MeshBatchCache *cache,
                             void *buf,
                             void *UNUSED(tls_data))
 {
@@ -120,7 +120,7 @@ static void extract_uv_init(const MeshRenderData *mr,
 
 static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                    const MeshRenderData *UNUSED(mr),
-                                   struct MeshBatchCache *cache,
+                                   MeshBatchCache *cache,
                                    void *buffer,
                                    void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
deleted file mode 100644
index 84ab20f8f90..00000000000
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2021 Blender Foundation. All rights reserved. */
-
-/** \file
- * \ingroup draw
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "BKE_attribute.h"
-#include "BLI_string.h"
-#include "BLI_vector.hh"
-
-#include "draw_subdivision.h"
-#include "extract_mesh.hh"
-
-namespace blender::draw {
-
-struct VColRef {
-  const CustomDataLayer *layer;
-  eAttrDomain domain;
-};
-
-/** Get all vcol layers as AttributeRefs.
- *
- * \param vcol_layers: bitmask to filter vcol layers by, each bit
- *                     corresponds to the integer position of the attribute
- *                     within the global color attribute list.
- */
-static Vector<VColRef> get_vcol_refs(const CustomData *cd_vdata,
-                                     const CustomData *cd_ldata,
-                                     const uint vcol_layers)
-{
-  Vector<VColRef> refs;
-  uint layeri = 0;
-
-  auto buildList = [&](const CustomData *cdata, eAttrDomain domain) {
-    for (int i = 0; i < cdata->totlayer; i++) {
-      const CustomDataLayer *layer = cdata->layers + i;
-
-      if (!(CD_TYPE_AS_MASK(layer->type) & CD_MASK_COLOR_ALL)) {
-        continue;
-      }
-
-      if (layer->flag & CD_FLAG_TEMPORARY) {
-        continue;
-      }
-
-      if (!(vcol_layers & (1UL << layeri))) {
-        layeri++;
-        continue;
-      }
-
-      VColRef ref = {};
-      ref.domain = domain;
-      ref.layer = layer;
-
-      refs.append(ref);
-      layeri++;
-    }
-  };
-
-  buildList(cd_vdata, ATTR_DOMAIN_POINT);
-  buildList(cd_ldata, ATTR_DOMAIN_CORNER);
-
-  return refs;
-}
-
-/* ---------------------------------------------------------------------- */
-/** \name Extract VCol
- * \{ */
-
-/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */
-static void init_vcol_format(GPUVertFormat *format,
-                             const MeshBatchCache *cache,
-                             const CustomData *cd_vdata,
-                             const CustomData *cd_ldata,
-                             const CustomDataLayer *active,
-                             const CustomDataLayer *render)
-{
-  GPU_vertformat_deinterleave(format);
-
-  const uint32_t vcol_layers = cache->cd_used.vcol;
-
-  Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers);
-
-  for (const VColRef &ref : refs) {
-    char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
-
-    GPU_vertformat_safe_attr_name(ref.layer->name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
-
-    /* VCol layer name. */
-    BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-    GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-
-    /* Active layer name. */
-    if (ref.layer == active) {
-      GPU_vertformat_alias_add(format, "ac");
-    }
-
-    /* Active render layer name. */
-    if (ref.layer == render) {
-      GPU_vertformat_alias_add(format, "c");
-    }
-  }
-}
-
-/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision
- * case. */
-static GPUVertFormat *get_coarse_vcol_format()
-{
-  static GPUVertFormat format = {0};
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    GPU_vertformat_alias_add(&format, "c");
-    GPU_vertformat_alias_add(&format, "ac");
-  }
-  return &format;
-}
-
-struct gpuMeshVcol {
-  ushort r, g, b, a;
-};
-
-static void extract_vcol_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data))
-{
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-
-  const CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata :
-                                                                        &mr->me->vdata;
-  const CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata :
-                                                                        &mr->me->ldata;
-
-  Mesh me_query = blender::dna::shallow_zero_initialize();
-
-  BKE_id_attribute_copy_domains_temp(
-      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
-
-  const CustomDataLayer *active_color = BKE_id_attributes_active_color_get(&me_query.id);
-  const CustomDataLayer *render_color = BKE_id_attributes_render_color_get(&me_query.id);
-
-  const uint32_t vcol_layers = cache->cd_used.vcol;
-  init_vcol_format(&format, cache, cd_vdata, cd_ldata, active_color, render_color);
-
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, mr->loop_len);
-
-  gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo);
-
-  Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers);
-
-  for (const VColRef &ref : refs) {
-    const CustomData *cdata = ref.domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata;
-
-    if (mr->extract_type == MR_EXTRACT_BMESH) {
-      int cd_ofs = ref.layer->offset;
-
-      if (cd_ofs == -1) {
-        vcol_data += ref.domain == ATTR_DOMAIN_POINT ? mr->bm->totvert : mr->bm->totloop;
-        continue;
-      }
-
-      BMIter iter;
-      const bool is_byte = ref.layer->type == CD_PROP_BYTE_COLOR;
-      const bool is_point = ref.domain == ATTR_DOMAIN_POINT;
-
-      BMFace *f;
-      BM_ITER_MESH (f, &iter, mr->bm, BM_FACES_OF_MESH) {
-        const BMLoop *l_iter = f->l_first;
-        do {
-          const BMElem *elem = is_point ? reinterpret_cast<const BMElem *>(l_iter->v) :
-                                          reinterpret_cast<const BMElem *>(l_iter);
-          if (is_byte) {
-            const MLoopCol *mloopcol = (const MLoopCol *)BM_ELEM_CD_GET_VOID_P(elem, cd_ofs);
-            vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
-            vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
-            vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
-            vcol_data->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
-            vcol_data++;
-          }
-          else {
-            const MPropCol *mpcol = (const MPropCol *)BM_ELEM_CD_GET_VOID_P(elem, cd_ofs);
-            vcol_data->r = unit_float_to_ushort_clamp(mpcol->color[0]);
-            vcol_data->g = unit_float_to_ushort_clamp(mpcol->color[1]);
-            vcol_data->b = unit_float_to_ushort_clamp(mpcol->color[2]);
-            vcol_data->a = unit_float_to_ushort_clamp(mpcol->color[3]);
-            vcol_data++;
-          }
-        } while ((l_iter = l_iter->next) != f->l_first);
-      }
-    }
-    else {
-      int totloop = mr->loop_len;
-      const int idx = CustomData_get_named_layer_index(cdata, ref.layer->type, ref.layer->name);
-
-      const MLoopCol *mcol = nullptr;
-      const MPropCol *pcol = nullptr;
-      const MLoop *mloop = mr->mloop;
-
-      if (ref.layer->type == CD_PROP_COLOR) {
-        pcol = static_cast<const MPropCol *>(cdata->layers[idx].data);
-      }
-      else {
-        mcol = static_cast<const MLoopCol *>(cdata->layers[idx].data);
-      }
-
-      const bool is_corner = ref.domain == ATTR_DOMAIN_CORNER;
-
-      for (int i = 0; i < totloop; i++, mloop++) {
-        const int v_i = is_corner ? i : mloop->v;
-
-        if (mcol) {
-          vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].r]);
-          vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].g]);
-          vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].b]);
-          vcol_data->a = unit_float_to_ushort_clamp(mcol[v_i].a * (1.0f / 255.0f));
-          vcol_data++;
-        }
-        else if (pcol) {
-          vcol_data->r = unit_float_to_ushort_clamp(pcol[v_i].color[0]);
-          vcol_data->g = unit_float_to_ushort_clamp(pcol[v_i].color[1]);
-          vcol_data->b = unit_float_to_ushort_clamp(pcol[v_i].color[2]);
-          vcol_data->a = unit_float_to_ushort_clamp(pcol[v_i].color[3]);
-          vcol_data++;
-        }
-      }
-    }
-  }
-}
-
-static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache,
-                                     const MeshRenderData *mr,
-                                     struct MeshBatchCache *cache,
-                                     void *buffer,
-                                     void *UNUSED(data))
-{
-  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
-  const Mesh *coarse_mesh = subdiv_cache->mesh;
-
-  bool extract_bmesh = mr->extract_type == MR_EXTRACT_BMESH;
-
-  const CustomData *cd_vdata = extract_bmesh ? &coarse_mesh->edit_mesh->bm->vdata :
-                                               &coarse_mesh->vdata;
-  const CustomData *cd_ldata = extract_bmesh ? &coarse_mesh->edit_mesh->bm->ldata :
-                                               &coarse_mesh->ldata;
-  const int totloop = extract_bmesh ? coarse_mesh->edit_mesh->bm->totloop : coarse_mesh->totloop;
-
-  Mesh me_query = blender::dna::shallow_copy(*coarse_mesh);
-  BKE_id_attribute_copy_domains_temp(
-      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
-
-  const CustomDataLayer *active_color = BKE_id_attributes_active_color_get(&me_query.id);
-  const CustomDataLayer *render_color = BKE_id_attributes_render_color_get(&me_query.id);
-
-  GPUVertFormat format = {0};
-  init_vcol_format(
-      &format, cache, &coarse_mesh->vdata, &coarse_mesh->ldata, active_color, render_color);
-
-  GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops);
-
-  GPUVertBuf *src_data = GPU_vertbuf_calloc();
-  /* Dynamic as we upload and interpolate layers one at a time. */
-  GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC);
-
-  GPU_vertbuf_data_alloc(src_data, totloop);
-
-  gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data);
-
-  const uint vcol_layers = cache->cd_used.vcol;
-
-  Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers);
-
-  /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in
-   * a single buffer. */
-  int pack_layer_index = 0;
-  for (const VColRef &ref : refs) {
-    /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */
-    const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++;
-
-    const CustomData *cdata = ref.domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata;
-    int layer_i = CustomData_get_named_layer_index(cdata, ref.layer->type, ref.layer->name);
-
-    if (layer_i == -1) {
-      printf("%s: missing color layer %s\n", __func__, ref.layer->name);
-      continue;
-    }
-
-    gpuMeshVcol *vcol = mesh_vcol;
-
-    const bool is_vert = ref.domain == ATTR_DOMAIN_POINT;
-
-    if (extract_bmesh) {
-      BMesh *bm = coarse_mesh->edit_mesh->bm;
-      BMIter iter;
-      BMFace *f;
-      int cd_ofs = cdata->layers[layer_i].offset;
-      const bool is_byte = ref.layer->type == CD_PROP_BYTE_COLOR;
-
-      BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
-        const BMLoop *l_iter = f->l_first;
-
-        do {
-          const BMElem *elem = is_vert ? reinterpret_cast<const BMElem *>(l_iter->v) :
-                                         reinterpret_cast<const BMElem *>(l_iter);
-
-          if (is_byte) {
-            const MLoopCol *mcol2 = static_cast<const MLoopCol *>(
-                BM_ELEM_CD_GET_VOID_P(elem, cd_ofs));
-
-            vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->r]);
-            vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->g]);
-            vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->b]);
-            vcol->a = unit_float_to_ushort_clamp(mcol2->a * (1.0f / 255.0f));
-          }
-          else {
-            const MPropCol *pcol2 = static_cast<const MPropCol *>(
-                BM_ELEM_CD_GET_VOID_P(elem, cd_ofs));
-
-            vcol->r = unit_float_to_ushort_clamp(pcol2->color[0]);
-            vcol->g = unit_float_to_ushort_clamp(pcol2->color[1]);
-            vcol->b = unit_float_to_ushort_clamp(pcol2->color[2]);
-            vcol->a = unit_float_to_ushort_clamp(pcol2->color[3]);
-          }
-
-          vcol++;
-        } while ((l_iter = l_iter->next) != f->l_first);
-      }
-    }
-    else {
-      const MLoop *ml = coarse_mesh->mloop;
-      const MLoopCol *mcol = nullptr;
-      const MPropCol *pcol = nullptr;
-
-      if (ref.layer->type == CD_PROP_COLOR) {
-        pcol = static_cast<const MPropCol *>(cdata->layers[layer_i].data);
-      }
-      else {
-        mcol = static_cast<const MLoopCol *>(cdata->layers[layer_i].data);
-      }
-
-      for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, ml++) {
-        int idx = is_vert ? ml->v : ml_index;
-
-        if (mcol) {
-          vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].r]);
-          vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].g]);
-          vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].b]);
-          vcol->a = unit_float_to_ushort_clamp(mcol[idx].a * (1.0f / 255.0f));
-        }
-        else if (pcol) {
-          vcol->r = unit_float_to_ushort_clamp(pcol[idx].color[0]);
-          vcol->g = unit_float_to_ushort_clamp(pcol[idx].color[1]);
-          vcol->b = unit_float_to_ushort_clamp(pcol[idx].color[2]);
-          vcol->a = unit_float_to_ushort_clamp(pcol[idx].color[3]);
-        }
-      }
-    }
-
-    /* Ensure data is uploaded properly. */
-    GPU_vertbuf_tag_dirty(src_data);
-    draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset, true);
-  }
-
-  GPU_vertbuf_discard(src_data);
-}
-
-constexpr MeshExtract create_extractor_vcol()
-{
-  MeshExtract extractor = {nullptr};
-  extractor.init = extract_vcol_init;
-  extractor.init_subdiv = extract_vcol_init_subdiv;
-  extractor.data_type = MR_DATA_NONE;
-  extractor.data_size = 0;
-  extractor.use_threading = false;
-  extractor.mesh_buffer_offset = offsetof(MeshBufferList, vbo.vcol);
-  return extractor;
-}
-
-/** \} */
-
-}  // namespace blender::draw
-
-const MeshExtract extract_vcol = blender::draw::create_extractor_vcol();
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
index c64cca4dff5..4db5a8c23a4 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
@@ -8,6 +8,7 @@
 #include "MEM_guardedalloc.h"
 
 #include "BKE_deform.h"
+#include "BKE_mesh.h"
 
 #include "draw_subdivision.h"
 #include "extract_mesh.hh"
@@ -79,7 +80,7 @@ static float evaluate_vertex_weight(const MDeformVert *dvert, const DRW_MeshWeig
 }
 
 static void extract_weights_init(const MeshRenderData *mr,
-                                 struct MeshBatchCache *cache,
+                                 MeshBatchCache *cache,
                                  void *buf,
                                  void *tls_data)
 {
@@ -105,7 +106,7 @@ static void extract_weights_init(const MeshRenderData *mr,
     data->cd_ofs = CustomData_get_offset(&mr->bm->vdata, CD_MDEFORMVERT);
   }
   else {
-    data->dvert = (const MDeformVert *)CustomData_get_layer(&mr->me->vdata, CD_MDEFORMVERT);
+    data->dvert = mr->me->deform_verts().data();
     data->cd_ofs = -1;
   }
 }
@@ -154,7 +155,7 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr,
 
 static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                         const MeshRenderData *mr,
-                                        struct MeshBatchCache *cache,
+                                        MeshBatchCache *cache,
                                         void *buffer,
                                         void *_data)
 {
@@ -171,8 +172,9 @@ static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
   extract_weights_init(mr, cache, coarse_weights, _data);
 
   if (mr->extract_type != MR_EXTRACT_BMESH) {
-    for (int i = 0; i < coarse_mesh->totpoly; i++) {
-      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+    const Span<MPoly> coarse_polys = coarse_mesh->polys();
+    for (const int i : coarse_polys.index_range()) {
+      const MPoly *mpoly = &coarse_polys[i];
       extract_weights_iter_poly_mesh(mr, mpoly, i, _data);
     }
   }
diff --git a/source/blender/draw/intern/shaders/common_aabb_lib.glsl b/source/blender/draw/intern/shaders/common_aabb_lib.glsl
new file mode 100644
index 00000000000..b5f664a6779
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl
@@ -0,0 +1,59 @@
+
+#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Axis Aligned Bound Box
+ * \{ */
+
+struct AABB {
+  vec3 min, max;
+};
+
+AABB aabb_init_min_max()
+{
+  AABB aabb;
+  aabb.min = vec3(1.0e30);
+  aabb.max = vec3(-1.0e30);
+  return aabb;
+}
+
+void aabb_merge(inout AABB aabb, vec3 v)
+{
+  aabb.min = min(aabb.min, v);
+  aabb.max = max(aabb.max, v);
+}
+
+/**
+ * Return true if there is any intersection.
+ */
+bool aabb_intersect(AABB a, AABB b)
+{
+  return all(greaterThanEqual(min(a.max, b.max), max(a.min, b.min)));
+}
+
+/**
+ * Compute intersect intersection volume of \a a and \a b.
+ * Return true if the resulting volume is not empty.
+ */
+bool aabb_clip(AABB a, AABB b, out AABB c)
+{
+  c.min = max(a.min, b.min);
+  c.max = min(a.max, b.max);
+  return all(greaterThanEqual(c.max, c.min));
+}
+
+Box aabb_to_box(AABB aabb)
+{
+  Box box;
+  box.corners[0] = aabb.min;
+  box.corners[1] = vec3(aabb.max.x, aabb.min.y, aabb.min.z);
+  box.corners[2] = vec3(aabb.max.x, aabb.max.y, aabb.min.z);
+  box.corners[3] = vec3(aabb.min.x, aabb.max.y, aabb.min.z);
+  box.corners[4] = vec3(aabb.min.x, aabb.min.y, aabb.max.z);
+  box.corners[5] = vec3(aabb.max.x, aabb.min.y, aabb.max.z);
+  box.corners[6] = aabb.max;
+  box.corners[7] = vec3(aabb.min.x, aabb.max.y, aabb.max.z);
+  return box;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_attribute_lib.glsl b/source/blender/draw/intern/shaders/common_attribute_lib.glsl
index ce5e49c7f63..6b5b6fcc846 100644
--- a/source/blender/draw/intern/shaders/common_attribute_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_attribute_lib.glsl
@@ -25,3 +25,4 @@ float attr_load_float(sampler3D tex);
 
 float attr_load_temperature_post(float attr);
 vec4 attr_load_color_post(vec4 attr);
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash);
diff --git a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
new file mode 100644
index 00000000000..3287897e73c
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
@@ -0,0 +1,215 @@
+
+/**
+ * Debugging drawing library
+ *
+ * Quick way to draw debug geometry. All input should be in world space and
+ * will be rendered in the default view. No additional setup required.
+ **/
+
+/** Global switch option. */
+bool drw_debug_draw_enable = true;
+const vec4 drw_debug_default_color = vec4(1.0, 0.0, 0.0, 1.0);
+
+/* -------------------------------------------------------------------- */
+/** \name Internals
+ * \{ */
+
+uint drw_debug_start_draw(uint v_needed)
+{
+  uint vertid = atomicAdd(drw_debug_draw_v_count, v_needed);
+  vertid += drw_debug_draw_offset;
+  return vertid;
+}
+
+uint drw_debug_color_pack(vec4 color)
+{
+  color = clamp(color, 0.0, 1.0);
+  uint result = 0;
+  result |= uint(color.x * 255.0) << 0u;
+  result |= uint(color.y * 255.0) << 8u;
+  result |= uint(color.z * 255.0) << 16u;
+  result |= uint(color.w * 255.0) << 24u;
+  return result;
+}
+
+void drw_debug_line(inout uint vertid, vec3 v1, vec3 v2, uint color)
+{
+  drw_debug_verts_buf[vertid++] = DRWDebugVert(
+      floatBitsToUint(v1.x), floatBitsToUint(v1.y), floatBitsToUint(v1.z), color);
+  drw_debug_verts_buf[vertid++] = DRWDebugVert(
+      floatBitsToUint(v2.x), floatBitsToUint(v2.y), floatBitsToUint(v2.z), color);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name API
+ * \{ */
+
+/**
+ * Draw a line.
+ */
+void drw_debug_line(vec3 v1, vec3 v2, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  const uint v_needed = 2;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    drw_debug_line(vertid, v1, v2, drw_debug_color_pack(color));
+  }
+}
+void drw_debug_line(vec3 v1, vec3 v2)
+{
+  drw_debug_line(v1, v2, drw_debug_default_color);
+}
+
+/**
+ * Draw a quad contour.
+ */
+void drw_debug_quad(vec3 v1, vec3 v2, vec3 v3, vec3 v4, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  const uint v_needed = 8;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    uint pcolor = drw_debug_color_pack(color);
+    drw_debug_line(vertid, v1, v2, pcolor);
+    drw_debug_line(vertid, v2, v3, pcolor);
+    drw_debug_line(vertid, v3, v4, pcolor);
+    drw_debug_line(vertid, v4, v1, pcolor);
+  }
+}
+void drw_debug_quad(vec3 v1, vec3 v2, vec3 v3, vec3 v4)
+{
+  drw_debug_quad(v1, v2, v3, v4, drw_debug_default_color);
+}
+
+/**
+ * Draw a point as octahedron wireframe.
+ */
+void drw_debug_point(vec3 p, float radius, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  vec3 c = vec3(radius, -radius, 0);
+  vec3 v1 = p + c.xzz;
+  vec3 v2 = p + c.zxz;
+  vec3 v3 = p + c.yzz;
+  vec3 v4 = p + c.zyz;
+  vec3 v5 = p + c.zzx;
+  vec3 v6 = p + c.zzy;
+
+  const uint v_needed = 12 * 2;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    uint pcolor = drw_debug_color_pack(color);
+    drw_debug_line(vertid, v1, v2, pcolor);
+    drw_debug_line(vertid, v2, v3, pcolor);
+    drw_debug_line(vertid, v3, v4, pcolor);
+    drw_debug_line(vertid, v4, v1, pcolor);
+    drw_debug_line(vertid, v1, v5, pcolor);
+    drw_debug_line(vertid, v2, v5, pcolor);
+    drw_debug_line(vertid, v3, v5, pcolor);
+    drw_debug_line(vertid, v4, v5, pcolor);
+    drw_debug_line(vertid, v1, v6, pcolor);
+    drw_debug_line(vertid, v2, v6, pcolor);
+    drw_debug_line(vertid, v3, v6, pcolor);
+    drw_debug_line(vertid, v4, v6, pcolor);
+  }
+}
+void drw_debug_point(vec3 p, float radius)
+{
+  drw_debug_point(p, radius, drw_debug_default_color);
+}
+void drw_debug_point(vec3 p)
+{
+  drw_debug_point(p, 0.01);
+}
+
+/**
+ * Draw a sphere wireframe as 3 axes circle.
+ */
+void drw_debug_sphere(vec3 p, float radius, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  const int circle_resolution = 16;
+  const uint v_needed = circle_resolution * 2 * 3;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    uint pcolor = drw_debug_color_pack(color);
+    for (int axis = 0; axis < 3; axis++) {
+      for (int edge = 0; edge < circle_resolution; edge++) {
+        float angle1 = (2.0 * 3.141592) * float(edge + 0) / float(circle_resolution);
+        vec3 p1 = vec3(cos(angle1), sin(angle1), 0.0) * radius;
+        p1 = vec3(p1[(0 + axis) % 3], p1[(1 + axis) % 3], p1[(2 + axis) % 3]);
+
+        float angle2 = (2.0 * 3.141592) * float(edge + 1) / float(circle_resolution);
+        vec3 p2 = vec3(cos(angle2), sin(angle2), 0.0) * radius;
+        p2 = vec3(p2[(0 + axis) % 3], p2[(1 + axis) % 3], p2[(2 + axis) % 3]);
+
+        drw_debug_line(vertid, p + p1, p + p2, pcolor);
+      }
+    }
+  }
+}
+void drw_debug_sphere(vec3 p, float radius)
+{
+  drw_debug_sphere(p, radius, drw_debug_default_color);
+}
+
+/**
+ * Draw a matrix transformation as 3 colored axes.
+ */
+void drw_debug_matrix(mat4 mat, vec4 color)
+{
+  vec4 p[4] = vec4[4](vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1));
+  for (int i = 0; i < 4; i++) {
+    p[i] = mat * p[i];
+    p[i].xyz /= p[i].w;
+  }
+  drw_debug_line(p[0].xyz, p[0].xyz, vec4(1, 0, 0, 1));
+  drw_debug_line(p[0].xyz, p[1].xyz, vec4(0, 1, 0, 1));
+  drw_debug_line(p[0].xyz, p[2].xyz, vec4(0, 0, 1, 1));
+}
+void drw_debug_matrix(mat4 mat)
+{
+  drw_debug_matrix(mat, drw_debug_default_color);
+}
+
+/**
+ * Draw a matrix as a 2 units length bounding box, centered on origin.
+ */
+void drw_debug_matrix_as_bbox(mat4 mat, vec4 color)
+{
+  vec4 p[8] = vec4[8](vec4(-1, -1, -1, 1),
+                      vec4(1, -1, -1, 1),
+                      vec4(1, 1, -1, 1),
+                      vec4(-1, 1, -1, 1),
+                      vec4(-1, -1, 1, 1),
+                      vec4(1, -1, 1, 1),
+                      vec4(1, 1, 1, 1),
+                      vec4(-1, 1, 1, 1));
+  for (int i = 0; i < 8; i++) {
+    p[i] = mat * p[i];
+    p[i].xyz /= p[i].w;
+  }
+  drw_debug_quad(p[0].xyz, p[1].xyz, p[2].xyz, p[3].xyz, color);
+  drw_debug_line(p[0].xyz, p[4].xyz, color);
+  drw_debug_line(p[1].xyz, p[5].xyz, color);
+  drw_debug_line(p[2].xyz, p[6].xyz, color);
+  drw_debug_line(p[3].xyz, p[7].xyz, color);
+  drw_debug_quad(p[4].xyz, p[5].xyz, p[6].xyz, p[7].xyz, color);
+}
+void drw_debug_matrix_as_bbox(mat4 mat)
+{
+  drw_debug_matrix_as_bbox(mat, drw_debug_default_color);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
new file mode 100644
index 00000000000..89d1729b52d
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
@@ -0,0 +1,388 @@
+
+/**
+ * Debug print implementation for shaders.
+ *
+ * `print()`:
+ *   Log variable or strings inside the viewport.
+ *   Using a unique non string argument will print the variable name with it.
+ *   Concatenate by using multiple arguments. i.e: `print("Looped ", n, "times.")`.
+ * `drw_print_no_endl()`:
+ *   Same as `print()` but does not finish the line.
+ * `drw_print_value()`:
+ *   Display only the value of a variable. Does not finish the line.
+ * `drw_print_value_hex()`:
+ *   Display only the hex representation of a variable. Does not finish the line.
+ * `drw_print_value_binary()`: Display only the binary representation of a
+ * variable. Does not finish the line.
+ *
+ * IMPORTANT: As it is now, it is not yet thread safe. Only print from one thread. You can use the
+ * IS_DEBUG_MOUSE_FRAGMENT macro in fragment shader to filter using mouse position or
+ * IS_FIRST_INVOCATION in compute shaders.
+ *
+ * NOTE: Floating point representation might not be very precise (see drw_print_value(float)).
+ *
+ * IMPORTANT: Multipler drawcalls can write to the buffer in sequence (if they are from different
+ * shgroups). However, we add barriers to support this case and it might change the application
+ * behavior. Uncomment DISABLE_DEBUG_SHADER_drw_print_BARRIER to remove the barriers if that
+ * happens. But then you are limited to a single invocation output.
+ *
+ * IMPORTANT: All of these are copied to the CPU debug libs (draw_debug.cc). They need to be kept
+ * in sync to write the same data.
+ */
+
+/** Global switch option when you want to silence all prints from all shaders at once. */
+bool drw_debug_print_enable = true;
+
+/* Set drw_print_col to max value so we will start by creating a new line and get the correct
+ * threadsafe row. */
+uint drw_print_col = DRW_DEBUG_PRINT_WORD_WRAP_COLUMN;
+uint drw_print_row = 0u;
+
+void drw_print_newline()
+{
+  if (!drw_debug_print_enable) {
+    return;
+  }
+  drw_print_col = 0u;
+  drw_print_row = atomicAdd(drw_debug_print_row_shared, 1u) + 1u;
+}
+
+void drw_print_string_start(uint len)
+{
+  if (!drw_debug_print_enable) {
+    return;
+  }
+  /* Break before word. */
+  if (drw_print_col + len > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+    drw_print_newline();
+  }
+}
+
+void drw_print_char4(uint data)
+{
+  if (!drw_debug_print_enable) {
+    return;
+  }
+  /* Convert into char stream. */
+  for (; data != 0u; data >>= 8u) {
+    uint char1 = data & 0xFFu;
+    /* Check for null terminator. */
+    if (char1 == 0x00) {
+      break;
+    }
+    uint cursor = atomicAdd(drw_debug_print_cursor, 1u);
+    cursor += drw_debug_print_offset;
+    if (cursor < DRW_DEBUG_PRINT_MAX) {
+      /* For future usage. (i.e: Color) */
+      uint flags = 0u;
+      uint col = drw_print_col++;
+      uint drw_print_header = (flags << 24u) | (drw_print_row << 16u) | (col << 8u);
+      drw_debug_print_buf[cursor] = drw_print_header | char1;
+      /* Break word. */
+      if (drw_print_col > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+        drw_print_newline();
+      }
+    }
+  }
+}
+
+/**
+ * NOTE(fclem): Strange behavior emerge when trying to increment the digit
+ * counter inside the append function. It looks like the compiler does not see
+ * it is referenced as an index for char4 and thus do not capture the right
+ * reference. I do not know if this is undefined behavior. As a matter of
+ * precaution, we implement all the append function separately. This behavior
+ * was observed on both Mesa & amdgpu-pro.
+ */
+/* Using ascii char code. Expect char1 to be less or equal to 0xFF. Appends chars to the right. */
+void drw_print_append_char(uint char1, inout uint char4)
+{
+  char4 = (char4 << 8u) | char1;
+}
+
+void drw_print_append_digit(uint digit, inout uint char4)
+{
+  const uint char_A = 0x41u;
+  const uint char_0 = 0x30u;
+  bool is_hexadecimal = digit > 9u;
+  char4 = (char4 << 8u) | (is_hexadecimal ? (char_A + digit - 10u) : (char_0 + digit));
+}
+
+void drw_print_append_space(inout uint char4)
+{
+  char4 = (char4 << 8u) | 0x20u;
+}
+
+void drw_print_value_binary(uint value)
+{
+  drw_print_no_endl("0b");
+  drw_print_string_start(10u * 4u);
+  uint digits[10] = uint[10](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+  uint digit = 0u;
+  for (uint i = 0u; i < 32u; i++) {
+    drw_print_append_digit(((value >> i) & 1u), digits[digit / 4u]);
+    digit++;
+    if ((i % 4u) == 3u) {
+      drw_print_append_space(digits[digit / 4u]);
+      digit++;
+    }
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 9; j >= 0; j--) {
+    drw_print_char4(digits[j]);
+  }
+}
+
+void drw_print_value_binary(int value)
+{
+  drw_print_value_binary(uint(value));
+}
+
+void drw_print_value_binary(float value)
+{
+  drw_print_value_binary(floatBitsToUint(value));
+}
+
+void drw_print_value_uint(uint value, const bool hex, bool is_negative, const bool is_unsigned)
+{
+  drw_print_string_start(3u * 4u);
+  const uint blank_value = hex ? 0x30303030u : 0x20202020u;
+  const uint prefix = hex ? 0x78302020u : 0x20202020u;
+  uint digits[3] = uint[3](blank_value, blank_value, prefix);
+  const uint base = hex ? 16u : 10u;
+  uint digit = 0u;
+  /* Add `u` suffix. */
+  if (is_unsigned) {
+    drw_print_append_char('u', digits[digit / 4u]);
+    digit++;
+  }
+  /* Number's digits. */
+  for (; value != 0u || digit == uint(is_unsigned); value /= base) {
+    drw_print_append_digit(value % base, digits[digit / 4u]);
+    digit++;
+  }
+  /* Add negative sign. */
+  if (is_negative) {
+    drw_print_append_char('-', digits[digit / 4u]);
+    digit++;
+  }
+  /* Need to pad to uint alignment because we are issuing chars in "reverse". */
+  for (uint i = digit % 4u; i < 4u && i > 0u; i++) {
+    drw_print_append_space(digits[digit / 4u]);
+    digit++;
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 2; j >= 0; j--) {
+    drw_print_char4(digits[j]);
+  }
+}
+
+void drw_print_value_hex(uint value)
+{
+  drw_print_value_uint(value, true, false, false);
+}
+
+void drw_print_value_hex(int value)
+{
+  drw_print_value_uint(uint(value), true, false, false);
+}
+
+void drw_print_value_hex(float value)
+{
+  drw_print_value_uint(floatBitsToUint(value), true, false, false);
+}
+
+void drw_print_value(uint value)
+{
+  drw_print_value_uint(value, false, false, true);
+}
+
+void drw_print_value(int value)
+{
+  drw_print_value_uint(uint(abs(value)), false, (value < 0), false);
+}
+
+void drw_print_value(bool value)
+{
+  if (value) {
+    drw_print_no_endl("true ");
+  }
+  else {
+    drw_print_no_endl("false");
+  }
+}
+
+/* NOTE(@fclem): This is homebrew and might not be 100% accurate (accuracy has
+ * not been tested and might dependent on compiler implementation). If unsure,
+ * use drw_print_value_hex and transcribe the value manually with another tool. */
+void drw_print_value(float val)
+{
+  /* We pad the string to match normal float values length. */
+  if (isnan(val)) {
+    drw_print_no_endl("         NaN");
+    return;
+  }
+  if (isinf(val)) {
+    if (sign(val) < 0.0) {
+      drw_print_no_endl("        -Inf");
+    }
+    else {
+      drw_print_no_endl("         Inf");
+    }
+    return;
+  }
+
+  /* Adjusted for significant digits (6) with sign (1), decimal separator (1)
+   * and exponent (4). */
+  const float significant_digits = 6.0;
+  drw_print_string_start(3u * 4u);
+  uint digits[3] = uint[3](0x20202020u, 0x20202020u, 0x20202020u);
+
+  float exponent = floor(log(abs(val)) / log(10.0));
+  bool display_exponent = exponent >= (significant_digits) ||
+                          exponent <= (-significant_digits + 1.0);
+
+  float int_significant_digits = min(exponent + 1.0, significant_digits);
+  float dec_significant_digits = max(0.0, significant_digits - int_significant_digits);
+  /* Power to get to the rounding point. */
+  float rounding_power = dec_significant_digits;
+
+  if (val == 0.0 || isinf(exponent)) {
+    display_exponent = false;
+    int_significant_digits = dec_significant_digits = 1.0;
+  }
+  /* Remap to keep significant numbers count. */
+  if (display_exponent) {
+    int_significant_digits = 1.0;
+    dec_significant_digits = significant_digits - int_significant_digits;
+    rounding_power = -exponent + dec_significant_digits;
+  }
+  /* Round at the last significant digit. */
+  val = round(val * pow(10.0, rounding_power));
+  /* Get back to final exponent. */
+  val *= pow(10.0, -dec_significant_digits);
+
+  float int_part;
+  float dec_part = modf(val, int_part);
+
+  dec_part *= pow(10.0, dec_significant_digits);
+
+  const uint base = 10u;
+  uint digit = 0u;
+  /* Exponent */
+  uint value = uint(abs(exponent));
+  if (display_exponent) {
+    for (int i = 0; value != 0u || i == 0; i++, value /= base) {
+      drw_print_append_digit(value % base, digits[digit / 4u]);
+      digit++;
+    }
+    /* Exponent sign. */
+    uint sign_char = (exponent < 0.0) ? '-' : '+';
+    drw_print_append_char(sign_char, digits[digit / 4u]);
+    digit++;
+    /* Exponent `e` suffix. */
+    drw_print_append_char(0x65u, digits[digit / 4u]);
+    digit++;
+  }
+  /* Decimal part. */
+  value = uint(abs(dec_part));
+#if 0 /* We don't do that because it makes unstable values really hard to \
+         read. */
+  /* Trim trailing zeros. */
+  while ((value % base) == 0u) {
+    value /= base;
+    if (value == 0u) {
+      break;
+    }
+  }
+#endif
+  if (value != 0u) {
+    for (int i = 0; value != 0u || i == 0; i++, value /= base) {
+      drw_print_append_digit(value % base, digits[digit / 4u]);
+      digit++;
+    }
+    /* Point separator. */
+    drw_print_append_char('.', digits[digit / 4u]);
+    digit++;
+  }
+  /* Integer part. */
+  value = uint(abs(int_part));
+  for (int i = 0; value != 0u || i == 0; i++, value /= base) {
+    drw_print_append_digit(value % base, digits[digit / 4u]);
+    digit++;
+  }
+  /* Negative sign. */
+  if (val < 0.0) {
+    drw_print_append_char('-', digits[digit / 4u]);
+    digit++;
+  }
+  /* Need to pad to uint alignment because we are issuing chars in "reverse". */
+  for (uint i = digit % 4u; i < 4u && i > 0u; i++) {
+    drw_print_append_space(digits[digit / 4u]);
+    digit++;
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 2; j >= 0; j--) {
+    drw_print_char4(digits[j]);
+  }
+}
+
+void drw_print_value(vec2 value)
+{
+  drw_print_no_endl("vec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(vec3 value)
+{
+  drw_print_no_endl("vec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(vec4 value)
+{
+  drw_print_no_endl("vec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+void drw_print_value(ivec2 value)
+{
+  drw_print_no_endl("ivec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(ivec3 value)
+{
+  drw_print_no_endl("ivec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(ivec4 value)
+{
+  drw_print_no_endl("ivec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+void drw_print_value(uvec2 value)
+{
+  drw_print_no_endl("uvec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(uvec3 value)
+{
+  drw_print_no_endl("uvec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(uvec4 value)
+{
+  drw_print_no_endl("uvec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+void drw_print_value(bvec2 value)
+{
+  drw_print_no_endl("bvec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(bvec3 value)
+{
+  drw_print_no_endl("bvec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(bvec4 value)
+{
+  drw_print_no_endl("bvec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
diff --git a/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl b/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl
new file mode 100644
index 00000000000..538c55ce544
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Debug drawing of shapes.
+ */
+
+#pragma BLENDER_REQUIRE(common_debug_draw_lib.glsl)
+#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
+
+void drw_debug(Box shape, vec4 color)
+{
+  drw_debug_quad(shape.corners[0], shape.corners[1], shape.corners[2], shape.corners[3], color);
+  drw_debug_line(shape.corners[0], shape.corners[4], color);
+  drw_debug_line(shape.corners[1], shape.corners[5], color);
+  drw_debug_line(shape.corners[2], shape.corners[6], color);
+  drw_debug_line(shape.corners[3], shape.corners[7], color);
+  drw_debug_quad(shape.corners[4], shape.corners[5], shape.corners[6], shape.corners[7], color);
+}
+void drw_debug(Box shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
+
+void drw_debug(Frustum shape, vec4 color)
+{
+  drw_debug_quad(shape.corners[0], shape.corners[1], shape.corners[2], shape.corners[3], color);
+  drw_debug_line(shape.corners[0], shape.corners[4], color);
+  drw_debug_line(shape.corners[1], shape.corners[5], color);
+  drw_debug_line(shape.corners[2], shape.corners[6], color);
+  drw_debug_line(shape.corners[3], shape.corners[7], color);
+  drw_debug_quad(shape.corners[4], shape.corners[5], shape.corners[6], shape.corners[7], color);
+}
+void drw_debug(Frustum shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
+
+void drw_debug(Pyramid shape, vec4 color)
+{
+  drw_debug_line(shape.corners[0], shape.corners[1], color);
+  drw_debug_line(shape.corners[0], shape.corners[2], color);
+  drw_debug_line(shape.corners[0], shape.corners[3], color);
+  drw_debug_line(shape.corners[0], shape.corners[4], color);
+  drw_debug_quad(shape.corners[1], shape.corners[2], shape.corners[3], shape.corners[4], color);
+}
+void drw_debug(Pyramid shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
+
+void drw_debug(Sphere shape, vec4 color)
+{
+  drw_debug_sphere(shape.center, shape.radius, color);
+}
+void drw_debug(Sphere shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
diff --git a/source/blender/draw/intern/shaders/common_hair_lib.glsl b/source/blender/draw/intern/shaders/common_hair_lib.glsl
index e235da91e8d..b82df4a51dc 100644
--- a/source/blender/draw/intern/shaders/common_hair_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_hair_lib.glsl
@@ -164,16 +164,15 @@ float hair_shaperadius(float shape, float root, float tip, float time)
 in float dummy;
 #  endif
 
-void hair_get_pos_tan_binor_time(bool is_persp,
-                                 mat4 invmodel_mat,
-                                 vec3 camera_pos,
-                                 vec3 camera_z,
-                                 out vec3 wpos,
-                                 out vec3 wtan,
-                                 out vec3 wbinor,
-                                 out float time,
-                                 out float thickness,
-                                 out float thick_time)
+void hair_get_center_pos_tan_binor_time(bool is_persp,
+                                        mat4 invmodel_mat,
+                                        vec3 camera_pos,
+                                        vec3 camera_z,
+                                        out vec3 wpos,
+                                        out vec3 wtan,
+                                        out vec3 wbinor,
+                                        out float time,
+                                        out float thickness)
 {
   int id = hair_get_base_id();
   vec4 data = texelFetch(hairPointBuffer, id);
@@ -202,15 +201,27 @@ void hair_get_pos_tan_binor_time(bool is_persp,
   wbinor = normalize(cross(camera_vec, wtan));
 
   thickness = hair_shaperadius(hairRadShape, hairRadRoot, hairRadTip, time);
+}
 
+void hair_get_pos_tan_binor_time(bool is_persp,
+                                 mat4 invmodel_mat,
+                                 vec3 camera_pos,
+                                 vec3 camera_z,
+                                 out vec3 wpos,
+                                 out vec3 wtan,
+                                 out vec3 wbinor,
+                                 out float time,
+                                 out float thickness,
+                                 out float thick_time)
+{
+  hair_get_center_pos_tan_binor_time(
+      is_persp, invmodel_mat, camera_pos, camera_z, wpos, wtan, wbinor, time, thickness);
   if (hairThicknessRes > 1) {
     thick_time = float(gl_VertexID % hairThicknessRes) / float(hairThicknessRes - 1);
     thick_time = thickness * (thick_time * 2.0 - 1.0);
-
     /* Take object scale into account.
      * NOTE: This only works fine with uniform scaling. */
     float scale = 1.0 / length(mat3(invmodel_mat) * wbinor);
-
     wpos += wbinor * thick_time * scale;
   }
   else {
diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
new file mode 100644
index 00000000000..83223f89277
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
@@ -0,0 +1,466 @@
+
+/**
+ * Intersection library used for culling.
+ * Results are meant to be conservative.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Plane extraction functions.
+ * \{ */
+
+/** \a v1 and \a v2 are vectors on the plane. \a p is a point on the plane. */
+vec4 isect_plane_setup(vec3 p, vec3 v1, vec3 v2)
+{
+  vec3 normal_to_plane = normalize(cross(v1, v2));
+  return vec4(normal_to_plane, -dot(normal_to_plane, p));
+}
+
+struct IsectPyramid {
+  vec3 corners[5];
+  vec4 planes[5];
+};
+
+IsectPyramid isect_data_setup(Pyramid shape)
+{
+  vec3 A1 = shape.corners[1] - shape.corners[0];
+  vec3 A2 = shape.corners[2] - shape.corners[0];
+  vec3 A3 = shape.corners[3] - shape.corners[0];
+  vec3 A4 = shape.corners[4] - shape.corners[0];
+  vec3 S4 = shape.corners[4] - shape.corners[1];
+  vec3 S2 = shape.corners[2] - shape.corners[1];
+
+  IsectPyramid data;
+  data.planes[0] = isect_plane_setup(shape.corners[0], A2, A1);
+  data.planes[1] = isect_plane_setup(shape.corners[0], A3, A2);
+  data.planes[2] = isect_plane_setup(shape.corners[0], A4, A3);
+  data.planes[3] = isect_plane_setup(shape.corners[0], A1, A4);
+  data.planes[4] = isect_plane_setup(shape.corners[1], S2, S4);
+  for (int i = 0; i < 5; i++) {
+    data.corners[i] = shape.corners[i];
+  }
+  return data;
+}
+
+struct IsectBox {
+  vec3 corners[8];
+  vec4 planes[6];
+};
+
+IsectBox isect_data_setup(Box shape)
+{
+  vec3 A1 = shape.corners[1] - shape.corners[0];
+  vec3 A3 = shape.corners[3] - shape.corners[0];
+  vec3 A4 = shape.corners[4] - shape.corners[0];
+
+  IsectBox data;
+  data.planes[0] = isect_plane_setup(shape.corners[0], A3, A1);
+  data.planes[1] = isect_plane_setup(shape.corners[0], A4, A3);
+  data.planes[2] = isect_plane_setup(shape.corners[0], A1, A4);
+  /* Assumes that the box is actually a box! */
+  data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, shape.corners[6]));
+  data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, shape.corners[6]));
+  data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, shape.corners[6]));
+  for (int i = 0; i < 8; i++) {
+    data.corners[i] = shape.corners[i];
+  }
+  return data;
+}
+
+/* Construct box from 1 corner point + 3 side vectors. */
+IsectBox isect_data_setup(vec3 origin, vec3 side_x, vec3 side_y, vec3 side_z)
+{
+  IsectBox data;
+  data.corners[0] = origin;
+  data.corners[1] = origin + side_x;
+  data.corners[2] = origin + side_y + side_x;
+  data.corners[3] = origin + side_y;
+  data.corners[4] = data.corners[0] + side_z;
+  data.corners[5] = data.corners[1] + side_z;
+  data.corners[6] = data.corners[2] + side_z;
+  data.corners[7] = data.corners[3] + side_z;
+
+  data.planes[0] = isect_plane_setup(data.corners[0], side_y, side_z);
+  data.planes[1] = isect_plane_setup(data.corners[0], side_x, side_y);
+  data.planes[2] = isect_plane_setup(data.corners[0], side_z, side_x);
+  /* Assumes that the box is actually a box! */
+  data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, data.corners[6]));
+  data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, data.corners[6]));
+  data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, data.corners[6]));
+
+  return data;
+}
+
+struct IsectFrustum {
+  vec3 corners[8];
+  vec4 planes[6];
+};
+
+IsectFrustum isect_data_setup(Frustum shape)
+{
+  vec3 A1 = shape.corners[1] - shape.corners[0];
+  vec3 A3 = shape.corners[3] - shape.corners[0];
+  vec3 A4 = shape.corners[4] - shape.corners[0];
+  vec3 B5 = shape.corners[5] - shape.corners[6];
+  vec3 B7 = shape.corners[7] - shape.corners[6];
+  vec3 B2 = shape.corners[2] - shape.corners[6];
+
+  IsectFrustum data;
+  data.planes[0] = isect_plane_setup(shape.corners[0], A3, A1);
+  data.planes[1] = isect_plane_setup(shape.corners[0], A4, A3);
+  data.planes[2] = isect_plane_setup(shape.corners[0], A1, A4);
+  data.planes[3] = isect_plane_setup(shape.corners[6], B7, B5);
+  data.planes[4] = isect_plane_setup(shape.corners[6], B5, B2);
+  data.planes[5] = isect_plane_setup(shape.corners[6], B2, B7);
+  for (int i = 0; i < 8; i++) {
+    data.corners[i] = shape.corners[i];
+  }
+  return data;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name View Intersection functions.
+ * \{ */
+
+bool intersect_view(Pyramid pyramid)
+{
+  bool intersects = true;
+
+  /* Do Pyramid vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 5; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(pyramid.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Pyramid planes. */
+  IsectPyramid i_pyramid = isect_data_setup(pyramid);
+  for (int p = 0; p < 5; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_pyramid.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect_view(Box box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Box planes. */
+  IsectBox i_box = isect_data_setup(box);
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
+bool intersect_view(IsectBox i_box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(i_box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
+bool intersect_view(Sphere sphere)
+{
+  bool intersects = true;
+
+  for (int p = 0; p < 6 && intersects; ++p) {
+    float dist_to_plane = dot(drw_view.frustum_planes[p], vec4(sphere.center, 1.0));
+    if (dist_to_plane < -sphere.radius) {
+      intersects = false;
+    }
+  }
+  /* TODO reject false positive. */
+  return intersects;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Shape vs. Shape Intersection functions.
+ * \{ */
+
+bool intersect(IsectPyramid i_pyramid, Box box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Pyramid planes. */
+  for (int p = 0; p < 5; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_pyramid.planes[p], vec4(box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Pyramid vertices vs Box planes. */
+  IsectBox i_box = isect_data_setup(box);
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 5; ++v) {
+      float test = dot(i_box.planes[p], vec4(i_pyramid.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect(IsectFrustum i_frustum, Pyramid pyramid)
+{
+  bool intersects = true;
+
+  /* Do Pyramid vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 5; ++v) {
+      float test = dot(i_frustum.planes[p], vec4(pyramid.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Pyramid planes. */
+  IsectPyramid i_pyramid = isect_data_setup(pyramid);
+  for (int p = 0; p < 5; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_pyramid.planes[p], vec4(i_frustum.corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect(IsectFrustum i_frustum, Box box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_frustum.planes[p], vec4(box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Box planes. */
+  IsectBox i_box = isect_data_setup(box);
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(i_frustum.corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
+bool intersect(IsectFrustum i_frustum, Sphere sphere)
+{
+  bool intersects = true;
+  for (int p = 0; p < 6; ++p) {
+    float dist_to_plane = dot(i_frustum.planes[p], vec4(sphere.center, 1.0));
+    if (dist_to_plane < -sphere.radius) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect(Cone cone, Sphere sphere)
+{
+  /**
+   * Following "Improve Tile-based Light Culling with Spherical-sliced Cone"
+   * by Eric Zhang
+   * https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html
+   */
+  float sphere_distance = length(sphere.center);
+  float sphere_distance_rcp = safe_rcp(sphere_distance);
+  float sphere_sin = saturate(sphere.radius * sphere_distance_rcp);
+  float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin);
+  float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos);
+
+  float cone_sphere_center_cos = dot(sphere.center * sphere_distance_rcp, cone.direction);
+  /* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */
+  float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ?
+                                        -1.0 :
+                                        (cone.angle_cos * sphere_cos -
+                                         cone_aperture_sin * sphere_sin);
+  /* Comparing cosines instead of angles since we are interested
+   * only in the monotonic region [0 .. M_PI / 2]. This saves costly acos() calls. */
+  bool intersects = (cone_sphere_center_cos >= cone_sphere_angle_sum_cos);
+
+  return intersects;
+}
+
+bool intersect(Circle circle_a, Circle circle_b)
+{
+  return distance_squared(circle_a.center, circle_b.center) <
+         sqr(circle_a.radius + circle_b.radius);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
index 6d4452c18c8..71460c39285 100644
--- a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
@@ -5,63 +5,88 @@
 /** \name Math intersection & projection functions.
  * \{ */
 
-float point_plane_projection_dist(vec3 lineorigin, vec3 planeorigin, vec3 planenormal)
+vec4 plane_from_quad(vec3 v0, vec3 v1, vec3 v2, vec3 v3)
 {
-  return dot(planenormal, planeorigin - lineorigin);
+  vec3 nor = normalize(cross(v2 - v1, v0 - v1) + cross(v0 - v3, v2 - v3));
+  return vec4(nor, -dot(nor, v2));
 }
 
-float line_plane_intersect_dist(vec3 lineorigin,
-                                vec3 linedirection,
-                                vec3 planeorigin,
-                                vec3 planenormal)
+vec4 plane_from_tri(vec3 v0, vec3 v1, vec3 v2)
 {
-  return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, linedirection);
+  vec3 nor = normalize(cross(v2 - v1, v0 - v1));
+  return vec4(nor, -dot(nor, v2));
 }
 
-float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec4 plane)
+float point_plane_projection_dist(vec3 line_origin, vec3 plane_origin, vec3 plane_normal)
+{
+  return dot(plane_normal, plane_origin - line_origin);
+}
+
+float point_line_projection_dist(vec2 point, vec2 line_origin, vec2 line_normal)
+{
+  return dot(line_normal, line_origin - point);
+}
+
+float line_plane_intersect_dist(vec3 line_origin,
+                                vec3 line_direction,
+                                vec3 plane_origin,
+                                vec3 plane_normal)
+{
+  return dot(plane_normal, plane_origin - line_origin) / dot(plane_normal, line_direction);
+}
+
+float line_plane_intersect_dist(vec3 line_origin, vec3 line_direction, vec4 plane)
 {
   vec3 plane_co = plane.xyz * (-plane.w / len_squared(plane.xyz));
-  vec3 h = lineorigin - plane_co;
-  return -dot(plane.xyz, h) / dot(plane.xyz, linedirection);
+  vec3 h = line_origin - plane_co;
+  return -dot(plane.xyz, h) / dot(plane.xyz, line_direction);
 }
 
-vec3 line_plane_intersect(vec3 lineorigin, vec3 linedirection, vec3 planeorigin, vec3 planenormal)
+vec3 line_plane_intersect(vec3 line_origin,
+                          vec3 line_direction,
+                          vec3 plane_origin,
+                          vec3 plane_normal)
 {
-  float dist = line_plane_intersect_dist(lineorigin, linedirection, planeorigin, planenormal);
-  return lineorigin + linedirection * dist;
+  float dist = line_plane_intersect_dist(line_origin, line_direction, plane_origin, plane_normal);
+  return line_origin + line_direction * dist;
 }
 
-vec3 line_plane_intersect(vec3 lineorigin, vec3 linedirection, vec4 plane)
+vec3 line_plane_intersect(vec3 line_origin, vec3 line_direction, vec4 plane)
 {
-  float dist = line_plane_intersect_dist(lineorigin, linedirection, plane);
-  return lineorigin + linedirection * dist;
+  float dist = line_plane_intersect_dist(line_origin, line_direction, plane);
+  return line_origin + line_direction * dist;
 }
 
-float line_aligned_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 planeorigin)
+float line_aligned_plane_intersect_dist(vec3 line_origin, vec3 line_direction, vec3 plane_origin)
 {
   /* aligned plane normal */
-  vec3 L = planeorigin - lineorigin;
-  float diskdist = length(L);
-  vec3 planenormal = -normalize(L);
-  return -diskdist / dot(planenormal, linedirection);
+  vec3 L = plane_origin - line_origin;
+  float disk_dist = length(L);
+  vec3 plane_normal = -normalize(L);
+  return -disk_dist / dot(plane_normal, line_direction);
 }
 
-vec3 line_aligned_plane_intersect(vec3 lineorigin, vec3 linedirection, vec3 planeorigin)
+vec3 line_aligned_plane_intersect(vec3 line_origin, vec3 line_direction, vec3 plane_origin)
 {
-  float dist = line_aligned_plane_intersect_dist(lineorigin, linedirection, planeorigin);
+  float dist = line_aligned_plane_intersect_dist(line_origin, line_direction, plane_origin);
   if (dist < 0) {
     /* if intersection is behind we fake the intersection to be
      * really far and (hopefully) not inside the radius of interest */
     dist = 1e16;
   }
-  return lineorigin + linedirection * dist;
+  return line_origin + line_direction * dist;
 }
 
-float line_unit_sphere_intersect_dist(vec3 lineorigin, vec3 linedirection)
+/**
+ * Returns intersection distance between the unit sphere and the line
+ * with the assumption that \a line_origin is contained in the unit sphere.
+ * It will always returns the farthest intersection.
+ */
+float line_unit_sphere_intersect_dist(vec3 line_origin, vec3 line_direction)
 {
-  float a = dot(linedirection, linedirection);
-  float b = dot(linedirection, lineorigin);
-  float c = dot(lineorigin, lineorigin) - 1;
+  float a = dot(line_direction, line_direction);
+  float b = dot(line_direction, line_origin);
+  float c = dot(line_origin, line_origin) - 1;
 
   float dist = 1e15;
   float determinant = b * b - a * c;
@@ -72,22 +97,63 @@ float line_unit_sphere_intersect_dist(vec3 lineorigin, vec3 linedirection)
   return dist;
 }
 
-float line_unit_box_intersect_dist(vec3 lineorigin, vec3 linedirection)
+/**
+ * Returns minimum intersection distance between the unit box and the line
+ * with the assumption that \a line_origin is contained in the unit box.
+ * In other words, it will always returns the farthest intersection.
+ */
+float line_unit_box_intersect_dist(vec3 line_origin, vec3 line_direction)
 {
   /* https://seblagarde.wordpress.com/2012/09/29/image-based-lighting-approaches-and-parallax-corrected-cubemap/
    */
-  vec3 firstplane = (vec3(1.0) - lineorigin) / linedirection;
-  vec3 secondplane = (vec3(-1.0) - lineorigin) / linedirection;
-  vec3 furthestplane = max(firstplane, secondplane);
+  vec3 first_plane = (vec3(1.0) - line_origin) / line_direction;
+  vec3 second_plane = (vec3(-1.0) - line_origin) / line_direction;
+  vec3 farthest_plane = max(first_plane, second_plane);
+
+  return min_v3(farthest_plane);
+}
+
+float line_unit_box_intersect_dist_safe(vec3 line_origin, vec3 line_direction)
+{
+  vec3 safe_line_direction = max(vec3(1e-8), abs(line_direction)) *
+                             select(vec3(1.0), -vec3(1.0), lessThan(line_direction, vec3(0.0)));
+  return line_unit_box_intersect_dist(line_origin, safe_line_direction);
+}
+
+/**
+ * Same as line_unit_box_intersect_dist but for 2D case.
+ */
+float line_unit_square_intersect_dist(vec2 line_origin, vec2 line_direction)
+{
+  vec2 first_plane = (vec2(1.0) - line_origin) / line_direction;
+  vec2 second_plane = (vec2(-1.0) - line_origin) / line_direction;
+  vec2 farthest_plane = max(first_plane, second_plane);
 
-  return min_v3(furthestplane);
+  return min_v2(farthest_plane);
 }
 
-float line_unit_box_intersect_dist_safe(vec3 lineorigin, vec3 linedirection)
+float line_unit_square_intersect_dist_safe(vec2 line_origin, vec2 line_direction)
 {
-  vec3 safe_linedirection = max(vec3(1e-8), abs(linedirection)) *
-                            select(vec3(1.0), -vec3(1.0), lessThan(linedirection, vec3(0.0)));
-  return line_unit_box_intersect_dist(lineorigin, safe_linedirection);
+  vec2 safe_line_direction = max(vec2(1e-8), abs(line_direction)) *
+                             select(vec2(1.0), -vec2(1.0), lessThan(line_direction, vec2(0.0)));
+  return line_unit_square_intersect_dist(line_origin, safe_line_direction);
+}
+
+/**
+ * Returns clipping distance (intersection with the nearest plane) with the given axis-aligned
+ * bound box along \a line_direction.
+ * Safe even if \a line_direction is degenerate.
+ * It assumes that an intersection exists (i.e: that \a line_direction points towards the AABB).
+ */
+float line_aabb_clipping_dist(vec3 line_origin, vec3 line_direction, vec3 aabb_min, vec3 aabb_max)
+{
+  vec3 safe_dir = select(line_direction, vec3(1e-5), lessThan(abs(line_direction), vec3(1e-5)));
+  vec3 dir_inv = 1.0 / safe_dir;
+
+  vec3 first_plane = (aabb_min - line_origin) * dir_inv;
+  vec3 second_plane = (aabb_max - line_origin) * dir_inv;
+  vec3 nearest_plane = min(first_plane, second_plane);
+  return max_v3(nearest_plane);
 }
 
 /** \} */
@@ -98,8 +164,8 @@ float line_unit_box_intersect_dist_safe(vec3 lineorigin, vec3 linedirection)
 
 void make_orthonormal_basis(vec3 N, out vec3 T, out vec3 B)
 {
-  vec3 UpVector = abs(N.z) < 0.99999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0);
-  T = normalize(cross(UpVector, N));
+  vec3 up_vector = abs(N.z) < 0.99999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0);
+  T = normalize(cross(up_vector, N));
   B = cross(N, T);
 }
 
diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl
index 51f3c890df8..5842df424be 100644
--- a/source/blender/draw/intern/shaders/common_math_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_lib.glsl
@@ -17,6 +17,7 @@
 #define M_SQRT2 1.41421356237309504880   /* sqrt(2) */
 #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
 #define FLT_MAX 3.402823e+38
+#define FLT_MIN 1.175494e-38
 
 vec3 mul(mat3 m, vec3 v)
 {
@@ -116,8 +117,8 @@ bool flag_test(int flag, int val) { return (flag & val) != 0; }
 void set_flag_from_test(inout uint value, bool test, uint flag) { if (test) { value |= flag; } else { value &= ~flag; } }
 void set_flag_from_test(inout int value, bool test, int flag) { if (test) { value |= flag; } else { value &= ~flag; } }
 
-#define weighted_sum(val0, val1, val2, val3, weights) ((val0 * weights[0] + val1 * weights[1] + val2 * weights[2] + val3 * weights[3]) * safe_rcp(sum(weights)));
-#define weighted_sum_array(val, weights) ((val[0] * weights[0] + val[1] * weights[1] + val[2] * weights[2] + val[3] * weights[3]) * safe_rcp(sum(weights)));
+#define weighted_sum(val0, val1, val2, val3, weights) ((val0 * weights[0] + val1 * weights[1] + val2 * weights[2] + val3 * weights[3]) * safe_rcp(sum(weights)))
+#define weighted_sum_array(val, weights) ((val[0] * weights[0] + val[1] * weights[1] + val[2] * weights[2] + val[3] * weights[3]) * safe_rcp(sum(weights)))
 
 /* clang-format on */
 
@@ -130,12 +131,17 @@ void set_flag_from_test(inout int value, bool test, int flag) { if (test) { valu
 #define in_texture_range(texel, tex) \
   (all(greaterThanEqual(texel, ivec2(0))) && all(lessThan(texel, textureSize(tex, 0).xy)))
 
-uint divide_ceil_u(uint visible_count, uint divisor)
+uint divide_ceil(uint visible_count, uint divisor)
 {
   return (visible_count + (divisor - 1u)) / divisor;
 }
 
-int divide_ceil_i(int visible_count, int divisor)
+int divide_ceil(int visible_count, int divisor)
+{
+  return (visible_count + (divisor - 1)) / divisor;
+}
+
+ivec2 divide_ceil(ivec2 visible_count, ivec2 divisor)
 {
   return (visible_count + (divisor - 1)) / divisor;
 }
diff --git a/source/blender/draw/intern/shaders/common_shape_lib.glsl b/source/blender/draw/intern/shaders/common_shape_lib.glsl
new file mode 100644
index 00000000000..f2c8bf0faaf
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl
@@ -0,0 +1,202 @@
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+
+/**
+ * Geometric shape structures.
+ * Some constructors might seems redundant but are here to make the API cleaner and
+ * allow for more than one constructor per type.
+ */
+
+/* ---------------------------------------------------------------------- */
+/** \name Circle
+ * \{ */
+
+struct Circle {
+  vec2 center;
+  float radius;
+};
+
+Circle shape_circle(vec2 center, float radius)
+{
+  return Circle(center, radius);
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Sphere
+ * \{ */
+
+struct Sphere {
+  vec3 center;
+  float radius;
+};
+
+Sphere shape_sphere(vec3 center, float radius)
+{
+  return Sphere(center, radius);
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Box
+ * \{ */
+
+struct Box {
+  vec3 corners[8];
+};
+
+/* Construct box from 4 basis points. */
+Box shape_box(vec3 v000, vec3 v100, vec3 v010, vec3 v001)
+{
+  v100 -= v000;
+  v010 -= v000;
+  v001 -= v000;
+  Box box;
+  box.corners[0] = v000;
+  box.corners[1] = v000 + v100;
+  box.corners[2] = v000 + v010 + v100;
+  box.corners[3] = v000 + v010;
+  box.corners[4] = box.corners[0] + v001;
+  box.corners[5] = box.corners[1] + v001;
+  box.corners[6] = box.corners[2] + v001;
+  box.corners[7] = box.corners[3] + v001;
+  return box;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Square Pyramid
+ * \{ */
+
+struct Pyramid {
+  /* Apex is the first. Base vertices are in clockwise order from front view. */
+  vec3 corners[5];
+};
+
+/**
+ * Regular Square Pyramid (can be oblique).
+ * Use this corner order.
+ * (Top-Down View of the pyramid)
+ * <pre>
+ *
+ * Y
+ * |
+ * |
+ * .-----X
+ *
+ *  4-----------3
+ *  | \       / |
+ *  |   \   /   |
+ *  |     0     |
+ *  |   /   \   |
+ *  | /       \ |
+ *  1-----------2
+ * </pre>
+ * base_corner_00 is vertex 1
+ * base_corner_01 is vertex 2
+ * base_corner_10 is vertex 4
+ */
+Pyramid shape_pyramid(vec3 apex, vec3 base_corner_00, vec3 base_corner_01, vec3 base_corner_10)
+{
+  Pyramid pyramid;
+  pyramid.corners[0] = apex;
+  pyramid.corners[1] = base_corner_00;
+  pyramid.corners[2] = base_corner_01;
+  pyramid.corners[3] = base_corner_10 + (base_corner_01 - base_corner_00);
+  pyramid.corners[4] = base_corner_10;
+  return pyramid;
+}
+
+/**
+ * Regular Square Pyramid.
+ * <pre>
+ *
+ * Y
+ * |
+ * |
+ * .-----X
+ *
+ *  4-----Y-----3
+ *  | \   |   / |
+ *  |   \ | /   |
+ *  |     0-----X
+ *  |   /   \   |
+ *  | /       \ |
+ *  1-----------2
+ * </pre>
+ * base_center_pos_x is vector from base center to X
+ * base_center_pos_y is vector from base center to Y
+ */
+Pyramid shape_pyramid_non_oblique(vec3 apex,
+                                  vec3 base_center,
+                                  vec3 base_center_pos_x,
+                                  vec3 base_center_pos_y)
+{
+  Pyramid pyramid;
+  pyramid.corners[0] = apex;
+  pyramid.corners[1] = base_center - base_center_pos_x - base_center_pos_y;
+  pyramid.corners[2] = base_center + base_center_pos_x - base_center_pos_y;
+  pyramid.corners[3] = base_center + base_center_pos_x + base_center_pos_y;
+  pyramid.corners[4] = base_center - base_center_pos_x + base_center_pos_y;
+  return pyramid;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Frustum
+ * \{ */
+
+struct Frustum {
+  vec3 corners[8];
+};
+
+/**
+ * Use this corner order.
+ * <pre>
+ *
+ * Z  Y
+ * | /
+ * |/
+ * .-----X
+ *     2----------6
+ *    /|         /|
+ *   / |        / |
+ *  1----------5  |
+ *  |  |       |  |
+ *  |  3-------|--7
+ *  | /        | /
+ *  |/         |/
+ *  0----------4
+ * </pre>
+ */
+Frustum shape_frustum(vec3 corners[8])
+{
+  Frustum frustum;
+  for (int i = 0; i < 8; i++) {
+    frustum.corners[i] = corners[i];
+  }
+  return frustum;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Cone
+ * \{ */
+
+/* Cone at orign with no height. */
+struct Cone {
+  vec3 direction;
+  float angle_cos;
+};
+
+Cone shape_cone(vec3 direction, float angle_cosine)
+{
+  return Cone(direction, angle_cosine);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
index 3244b7960d8..eacdf8e6333 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
@@ -35,7 +35,7 @@ void emit_line(uint line_offset, uint quad_index, uint start_loop_index, uint co
   uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
                                                                        coarse_poly_count);
 
-  if (is_face_hidden(coarse_quad_index) ||
+  if (use_hide && is_face_hidden(coarse_quad_index) ||
       (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display)) {
     output_lines[line_offset + 0] = 0xffffffff;
     output_lines[line_offset + 1] = 0xffffffff;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
index ce3c8478d3f..a46d69eca88 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -45,7 +45,7 @@ void main()
   int triangle_loop_index = (int(quad_index) + mat_offset) * 6;
 #endif
 
-  if (is_face_hidden(coarse_quad_index)) {
+  if (use_hide && is_face_hidden(coarse_quad_index)) {
     output_tris[triangle_loop_index + 0] = 0xffffffff;
     output_tris[triangle_loop_index + 1] = 0xffffffff;
     output_tris[triangle_loop_index + 2] = 0xffffffff;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
index d76a7369f79..4183b4a1cd3 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
@@ -36,6 +36,10 @@ layout(std140) uniform shader_data
 
   /* Total number of elements to process. */
   uint total_dispatch_size;
+
+  bool is_edit_mode;
+
+  bool use_hide;
 };
 
 uint get_global_invocation_index()
diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
index e146ccb343a..81e346863c2 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
@@ -427,7 +427,7 @@ void main()
   output_nors[coarse_quad_index] = fnor;
 #  endif
 
-  if (is_face_hidden(coarse_quad_index)) {
+  if (use_hide && is_face_hidden(coarse_quad_index)) {
     output_indices[coarse_quad_index] = 0xffffffff;
   }
   else {
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
index f5c4c7895aa..97c07704c06 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
@@ -26,6 +26,23 @@ bool is_face_selected(uint coarse_quad_index)
   return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0;
 }
 
+bool is_face_hidden(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_hidden_mask) != 0;
+}
+
+/* Flag for paint mode overlay and normals drawing in edit-mode. */
+float get_loop_flag(uint coarse_quad_index, int vert_origindex)
+{
+  if (is_face_hidden(coarse_quad_index) || (is_edit_mode && vert_origindex == -1)) {
+    return -1.0;
+  }
+  if (is_face_selected(coarse_quad_index)) {
+    return 1.0;
+  }
+  return 0.0;
+}
+
 void main()
 {
   /* We execute for each quad. */
@@ -44,7 +61,11 @@ void main()
     /* Face is smooth, use vertex normals. */
     for (int i = 0; i < 4; i++) {
       PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i];
-      output_lnor[start_loop_index + i] = get_normal_and_flag(pos_nor_loop);
+      int origindex = input_vert_origindex[start_loop_index + i];
+      LoopNormal loop_normal = get_normal_and_flag(pos_nor_loop);
+      loop_normal.flag = get_loop_flag(coarse_quad_index, origindex);
+
+      output_lnor[start_loop_index + i] = loop_normal;
     }
   }
   else {
@@ -68,11 +89,7 @@ void main()
 
     for (int i = 0; i < 4; i++) {
       int origindex = input_vert_origindex[start_loop_index + i];
-      float flag = 0.0;
-      if (origindex == -1) {
-        flag = -1.0;
-      }
-      loop_normal.flag = flag;
+      loop_normal.flag = get_loop_flag(coarse_quad_index, origindex);
 
       output_lnor[start_loop_index + i] = loop_normal;
     }
diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl
index 8eecaa46b58..6521476c3a7 100644
--- a/source/blender/draw/intern/shaders/common_view_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_view_lib.glsl
@@ -37,6 +37,9 @@ layout(std140) uniform viewBlock
 #  endif
 #endif
 
+#define IS_DEBUG_MOUSE_FRAGMENT (ivec2(gl_FragCoord) == drw_view.mouse_pixel)
+#define IS_FIRST_INVOCATION (gl_GlobalInvocationID == uvec3(0))
+
 #define ViewNear (ViewVecs[0].w)
 #define ViewFar (ViewVecs[1].w)
 
@@ -152,7 +155,11 @@ uniform int drw_ResourceID;
 #    define PASS_RESOURCE_ID
 
 #  elif defined(GPU_VERTEX_SHADER)
-#    define resource_id gpu_InstanceIndex
+#    if defined(UNIFORM_RESOURCE_ID_NEW)
+#      define resource_id drw_ResourceID
+#    else
+#      define resource_id gpu_InstanceIndex
+#    endif
 #    define PASS_RESOURCE_ID drw_ResourceID_iface.resource_index = resource_id;
 
 #  elif defined(GPU_GEOMETRY_SHADER)
@@ -200,8 +207,8 @@ flat in int resourceIDFrag;
 #  ifndef DRW_SHADER_SHARED_H
 
 struct ObjectMatrices {
-  mat4 drw_modelMatrix;
-  mat4 drw_modelMatrixInverse;
+  mat4 model;
+  mat4 model_inverse;
 };
 #  endif /* DRW_SHADER_SHARED_H */
 
@@ -211,8 +218,8 @@ layout(std140) uniform modelBlock
   ObjectMatrices drw_matrices[DRW_RESOURCE_CHUNK_LEN];
 };
 
-#    define ModelMatrix (drw_matrices[resource_id].drw_modelMatrix)
-#    define ModelMatrixInverse (drw_matrices[resource_id].drw_modelMatrixInverse)
+#    define ModelMatrix (drw_matrices[resource_id].model)
+#    define ModelMatrixInverse (drw_matrices[resource_id].model_inverse)
 #  endif /* USE_GPU_SHADER_CREATE_INFO */
 
 #else /* GPU_INTEL */
diff --git a/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl
new file mode 100644
index 00000000000..3e640540777
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl
@@ -0,0 +1,84 @@
+
+/**
+ * Convert DrawPrototype into draw commands.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+#define atomicAddAndGet(dst, val) (atomicAdd(dst, val) + val)
+
+/* This is only called by the last thread executed over the group's prototype draws. */
+void write_draw_call(DrawGroup group, uint group_id)
+{
+  DrawCommand cmd;
+  cmd.vertex_len = group.vertex_len;
+  cmd.vertex_first = group.vertex_first;
+  if (group.base_index != -1) {
+    cmd.base_index = group.base_index;
+    cmd.instance_first_indexed = group.start;
+  }
+  else {
+    cmd._instance_first_array = group.start;
+  }
+  /* Back-facing command. */
+  cmd.instance_len = group_buf[group_id].back_facing_counter;
+  command_buf[group_id * 2 + 0] = cmd;
+  /* Front-facing command. */
+  cmd.instance_len = group_buf[group_id].front_facing_counter;
+  command_buf[group_id * 2 + 1] = cmd;
+
+  /* Reset the counters for a next command gen dispatch. Avoids resending the whole data just
+   * for this purpose. Only the last thread will execute this so it is thread-safe. */
+  group_buf[group_id].front_facing_counter = 0u;
+  group_buf[group_id].back_facing_counter = 0u;
+  group_buf[group_id].total_counter = 0u;
+}
+
+void main()
+{
+  uint proto_id = gl_GlobalInvocationID.x;
+  if (proto_id >= prototype_len) {
+    return;
+  }
+
+  DrawPrototype proto = prototype_buf[proto_id];
+  uint group_id = proto.group_id;
+  bool is_inverted = (proto.resource_handle & 0x80000000u) != 0;
+  uint resource_index = (proto.resource_handle & 0x7FFFFFFFu);
+
+  /* Visibility test result. */
+  bool is_visible = ((visibility_buf[resource_index / 32u] & (1u << (resource_index % 32u)))) != 0;
+
+  DrawGroup group = group_buf[group_id];
+
+  if (!is_visible) {
+    /* Skip the draw but still count towards the completion. */
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+    return;
+  }
+
+  uint back_facing_len = group.len - group.front_facing_len;
+  uint front_facing_len = group.front_facing_len;
+  uint dst_index = group.start;
+  if (is_inverted) {
+    uint offset = atomicAdd(group_buf[group_id].back_facing_counter, proto.instance_len);
+    dst_index += offset;
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+  }
+  else {
+    uint offset = atomicAdd(group_buf[group_id].front_facing_counter, proto.instance_len);
+    dst_index += back_facing_len + offset;
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+  }
+
+  for (uint i = dst_index; i < dst_index + proto.instance_len; i++) {
+    /* Fill resource_id buffer for each instance of this draw */
+    resource_id_buf[i] = resource_index;
+  }
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl
new file mode 100644
index 00000000000..3fc5294b024
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl
@@ -0,0 +1,9 @@
+
+/**
+ * Display debug edge list.
+ **/
+
+void main()
+{
+  out_color = interp.color;
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
new file mode 100644
index 00000000000..4061dda5d1c
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
@@ -0,0 +1,15 @@
+
+/**
+ * Display debug edge list.
+ **/
+
+void main()
+{
+  /* Skip the first vertex containing header data. */
+  DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 2];
+  vec3 pos = uintBitsToFloat(uvec3(vert.pos0, vert.pos1, vert.pos2));
+  vec4 col = vec4((uvec4(vert.color) >> uvec4(0, 8, 16, 24)) & 0xFFu) / 255.0;
+
+  interp.color = col;
+  gl_Position = persmat * vec4(pos, 1.0);
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_info.hh b/source/blender/draw/intern/shaders/draw_debug_info.hh
new file mode 100644
index 00000000000..ce450bb1210
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_info.hh
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "draw_defines.h"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Debug print
+ *
+ * Allows print() function to have logging support inside shaders.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_debug_print)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_DEBUG_PRINT_SLOT, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]");
+
+GPU_SHADER_INTERFACE_INFO(draw_debug_print_display_iface, "").flat(Type::UINT, "char_index");
+
+GPU_SHADER_CREATE_INFO(draw_debug_print_display)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(7, Qualifier::READ, "uint", "drw_debug_print_buf[]")
+    .vertex_out(draw_debug_print_display_iface)
+    .fragment_out(0, Type::VEC4, "out_color")
+    .vertex_source("draw_debug_print_display_vert.glsl")
+    .fragment_source("draw_debug_print_display_frag.glsl")
+    .additional_info("draw_view");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ *
+ * Allows to draw lines and points just like the DRW_debug module functions.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_debug_draw)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_DEBUG_DRAW_SLOT,
+                 Qualifier::READ_WRITE,
+                 "DRWDebugVert",
+                 "drw_debug_verts_buf[]");
+
+GPU_SHADER_INTERFACE_INFO(draw_debug_draw_display_iface, "interp").flat(Type::VEC4, "color");
+
+GPU_SHADER_CREATE_INFO(draw_debug_draw_display)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(6, Qualifier::READ, "DRWDebugVert", "drw_debug_verts_buf[]")
+    .vertex_out(draw_debug_draw_display_iface)
+    .fragment_out(0, Type::VEC4, "out_color")
+    .push_constant(Type::MAT4, "persmat")
+    .vertex_source("draw_debug_draw_display_vert.glsl")
+    .fragment_source("draw_debug_draw_display_frag.glsl")
+    .additional_info("draw_view");
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl
new file mode 100644
index 00000000000..4e0d980637f
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl
@@ -0,0 +1,133 @@
+
+/**
+ * Display characters using an ascii table.
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+bool char_intersect(uvec2 bitmap_position)
+{
+  /* Using 8x8 = 64bits = uvec2. */
+  uvec2 ascii_bitmap[96] = uvec2[96](uvec2(0x00000000u, 0x00000000u),
+                                     uvec2(0x18001800u, 0x183c3c18u),
+                                     uvec2(0x00000000u, 0x36360000u),
+                                     uvec2(0x7f363600u, 0x36367f36u),
+                                     uvec2(0x301f0c00u, 0x0c3e031eu),
+                                     uvec2(0x0c666300u, 0x00633318u),
+                                     uvec2(0x3b336e00u, 0x1c361c6eu),
+                                     uvec2(0x00000000u, 0x06060300u),
+                                     uvec2(0x060c1800u, 0x180c0606u),
+                                     uvec2(0x180c0600u, 0x060c1818u),
+                                     uvec2(0x3c660000u, 0x00663cffu),
+                                     uvec2(0x0c0c0000u, 0x000c0c3fu),
+                                     uvec2(0x000c0c06u, 0x00000000u),
+                                     uvec2(0x00000000u, 0x0000003fu),
+                                     uvec2(0x000c0c00u, 0x00000000u),
+                                     uvec2(0x06030100u, 0x6030180cu),
+                                     uvec2(0x6f673e00u, 0x3e63737bu),
+                                     uvec2(0x0c0c3f00u, 0x0c0e0c0cu),
+                                     uvec2(0x06333f00u, 0x1e33301cu),
+                                     uvec2(0x30331e00u, 0x1e33301cu),
+                                     uvec2(0x7f307800u, 0x383c3633u),
+                                     uvec2(0x30331e00u, 0x3f031f30u),
+                                     uvec2(0x33331e00u, 0x1c06031fu),
+                                     uvec2(0x0c0c0c00u, 0x3f333018u),
+                                     uvec2(0x33331e00u, 0x1e33331eu),
+                                     uvec2(0x30180e00u, 0x1e33333eu),
+                                     uvec2(0x000c0c00u, 0x000c0c00u),
+                                     uvec2(0x000c0c06u, 0x000c0c00u),
+                                     uvec2(0x060c1800u, 0x180c0603u),
+                                     uvec2(0x003f0000u, 0x00003f00u),
+                                     uvec2(0x180c0600u, 0x060c1830u),
+                                     uvec2(0x0c000c00u, 0x1e333018u),
+                                     uvec2(0x7b031e00u, 0x3e637b7bu),
+                                     uvec2(0x3f333300u, 0x0c1e3333u),
+                                     uvec2(0x66663f00u, 0x3f66663eu),
+                                     uvec2(0x03663c00u, 0x3c660303u),
+                                     uvec2(0x66361f00u, 0x1f366666u),
+                                     uvec2(0x16467f00u, 0x7f46161eu),
+                                     uvec2(0x16060f00u, 0x7f46161eu),
+                                     uvec2(0x73667c00u, 0x3c660303u),
+                                     uvec2(0x33333300u, 0x3333333fu),
+                                     uvec2(0x0c0c1e00u, 0x1e0c0c0cu),
+                                     uvec2(0x33331e00u, 0x78303030u),
+                                     uvec2(0x36666700u, 0x6766361eu),
+                                     uvec2(0x46667f00u, 0x0f060606u),
+                                     uvec2(0x6b636300u, 0x63777f7fu),
+                                     uvec2(0x73636300u, 0x63676f7bu),
+                                     uvec2(0x63361c00u, 0x1c366363u),
+                                     uvec2(0x06060f00u, 0x3f66663eu),
+                                     uvec2(0x3b1e3800u, 0x1e333333u),
+                                     uvec2(0x36666700u, 0x3f66663eu),
+                                     uvec2(0x38331e00u, 0x1e33070eu),
+                                     uvec2(0x0c0c1e00u, 0x3f2d0c0cu),
+                                     uvec2(0x33333f00u, 0x33333333u),
+                                     uvec2(0x331e0c00u, 0x33333333u),
+                                     uvec2(0x7f776300u, 0x6363636bu),
+                                     uvec2(0x1c366300u, 0x6363361cu),
+                                     uvec2(0x0c0c1e00u, 0x3333331eu),
+                                     uvec2(0x4c667f00u, 0x7f633118u),
+                                     uvec2(0x06061e00u, 0x1e060606u),
+                                     uvec2(0x30604000u, 0x03060c18u),
+                                     uvec2(0x18181e00u, 0x1e181818u),
+                                     uvec2(0x00000000u, 0x081c3663u),
+                                     uvec2(0x000000ffu, 0x00000000u),
+                                     uvec2(0x00000000u, 0x0c0c1800u),
+                                     uvec2(0x3e336e00u, 0x00001e30u),
+                                     uvec2(0x66663b00u, 0x0706063eu),
+                                     uvec2(0x03331e00u, 0x00001e33u),
+                                     uvec2(0x33336e00u, 0x3830303eu),
+                                     uvec2(0x3f031e00u, 0x00001e33u),
+                                     uvec2(0x06060f00u, 0x1c36060fu),
+                                     uvec2(0x333e301fu, 0x00006e33u),
+                                     uvec2(0x66666700u, 0x0706366eu),
+                                     uvec2(0x0c0c1e00u, 0x0c000e0cu),
+                                     uvec2(0x3033331eu, 0x30003030u),
+                                     uvec2(0x1e366700u, 0x07066636u),
+                                     uvec2(0x0c0c1e00u, 0x0e0c0c0cu),
+                                     uvec2(0x7f6b6300u, 0x0000337fu),
+                                     uvec2(0x33333300u, 0x00001f33u),
+                                     uvec2(0x33331e00u, 0x00001e33u),
+                                     uvec2(0x663e060fu, 0x00003b66u),
+                                     uvec2(0x333e3078u, 0x00006e33u),
+                                     uvec2(0x66060f00u, 0x00003b6eu),
+                                     uvec2(0x1e301f00u, 0x00003e03u),
+                                     uvec2(0x0c2c1800u, 0x080c3e0cu),
+                                     uvec2(0x33336e00u, 0x00003333u),
+                                     uvec2(0x331e0c00u, 0x00003333u),
+                                     uvec2(0x7f7f3600u, 0x0000636bu),
+                                     uvec2(0x1c366300u, 0x00006336u),
+                                     uvec2(0x333e301fu, 0x00003333u),
+                                     uvec2(0x0c263f00u, 0x00003f19u),
+                                     uvec2(0x0c0c3800u, 0x380c0c07u),
+                                     uvec2(0x18181800u, 0x18181800u),
+                                     uvec2(0x0c0c0700u, 0x070c0c38u),
+                                     uvec2(0x00000000u, 0x6e3b0000u),
+                                     uvec2(0x00000000u, 0x00000000u));
+
+  if (!in_range_inclusive(bitmap_position, uvec2(0), uvec2(7))) {
+    return false;
+  }
+  uint char_bits = ascii_bitmap[char_index][bitmap_position.y >> 2u & 1u];
+  char_bits = (char_bits >> ((bitmap_position.y & 3u) * 8u + bitmap_position.x));
+  return (char_bits & 1u) != 0u;
+}
+
+void main()
+{
+  uvec2 bitmap_position = uvec2(gl_PointCoord.xy * 8.0);
+  /* Point coord start from top left corner. But layout is from bottom to top. */
+  bitmap_position.y = 7 - bitmap_position.y;
+
+  if (char_intersect(bitmap_position)) {
+    out_color = vec4(1);
+  }
+  else if (char_intersect(bitmap_position + uvec2(0, 1))) {
+    /* Shadow */
+    out_color = vec4(0, 0, 0, 1);
+  }
+  else {
+    /* Transparent Background for ease of read. */
+    out_color = vec4(0, 0, 0, 0.2);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
new file mode 100644
index 00000000000..cb379056e2b
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
@@ -0,0 +1,29 @@
+
+/**
+ * Display characters using an ascii table. Outputs one point per character.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+
+void main()
+{
+  /* Skip first 4 chars containing header data. */
+  uint char_data = drw_debug_print_buf[gl_VertexID + 8];
+  char_index = (char_data & 0xFFu) - 0x20u;
+
+  /* Discard invalid chars. */
+  if (char_index >= 96u) {
+    gl_Position = vec4(-1);
+    gl_PointSize = 0.0;
+    return;
+  }
+  uint row = (char_data >> 16u) & 0xFFu;
+  uint col = (char_data >> 8u) & 0xFFu;
+
+  float char_size = 16.0;
+  /* Change anchor point to the top left. */
+  vec2 pos_on_screen = char_size * vec2(col, row) + char_size * 4;
+  gl_Position = vec4(
+      pos_on_screen * drw_view.viewport_size_inverse * vec2(2.0, -2.0) - vec2(1.0, -1.0), 0, 1);
+  gl_PointSize = char_size;
+}
diff --git a/source/blender/draw/intern/shaders/draw_object_infos_info.hh b/source/blender/draw/intern/shaders/draw_object_infos_info.hh
index 8fd55ea351f..31fee018fbc 100644
--- a/source/blender/draw/intern/shaders/draw_object_infos_info.hh
+++ b/source/blender/draw/intern/shaders/draw_object_infos_info.hh
@@ -1,10 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 GPU_SHADER_CREATE_INFO(draw_object_infos)
     .typedef_source("draw_shader_shared.h")
     .define("OBINFO_LIB")
+    .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
+    .define("ObjectInfo", "(drw_infos[resource_id].infos)")
+    .define("ObjectColor", "(drw_infos[resource_id].color)")
     .uniform_buf(1, "ObjectInfos", "drw_infos[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH);
 
 GPU_SHADER_CREATE_INFO(draw_volume_infos)
@@ -14,3 +18,19 @@ GPU_SHADER_CREATE_INFO(draw_volume_infos)
 GPU_SHADER_CREATE_INFO(draw_curves_infos)
     .typedef_source("draw_shader_shared.h")
     .uniform_buf(2, "CurvesInfos", "drw_curves", Frequency::BATCH);
+
+GPU_SHADER_CREATE_INFO(draw_object_infos_new)
+    .typedef_source("draw_shader_shared.h")
+    .define("OBINFO_LIB")
+    .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
+    .define("ObjectInfo", "(drw_infos[resource_id].infos)")
+    .define("ObjectColor", "(drw_infos[resource_id].color)")
+    .storage_buf(DRW_OBJ_INFOS_SLOT, Qualifier::READ, "ObjectInfos", "drw_infos[]");
+
+/** \note Requires draw_object_infos_new. */
+GPU_SHADER_CREATE_INFO(draw_object_attribute_new)
+    .define("OBATTR_LIB")
+    .define("ObjectAttributeStart", "(drw_infos[resource_id].orco_mul_bias[0].w)")
+    .define("ObjectAttributeLen", "(drw_infos[resource_id].orco_mul_bias[1].w)")
+    .storage_buf(DRW_OBJ_ATTR_SLOT, Qualifier::READ, "ObjectAttribute", "drw_attrs[]")
+    .additional_info("draw_object_infos_new");
diff --git a/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl
new file mode 100644
index 00000000000..511d4e49651
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl
@@ -0,0 +1,64 @@
+
+/**
+ * Finish computation of a few draw resource after sync.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+void main()
+{
+  uint resource_id = gl_GlobalInvocationID.x;
+  if (resource_id >= resource_len) {
+    return;
+  }
+
+  mat4 model_mat = matrix_buf[resource_id].model;
+  ObjectInfos infos = infos_buf[resource_id];
+  ObjectBounds bounds = bounds_buf[resource_id];
+
+  if (bounds.bounding_sphere.w != -1.0) {
+    /* Convert corners to origin + sides in world space. */
+    vec3 p0 = bounds.bounding_corners[0].xyz;
+    vec3 p01 = bounds.bounding_corners[1].xyz - p0;
+    vec3 p02 = bounds.bounding_corners[2].xyz - p0;
+    vec3 p03 = bounds.bounding_corners[3].xyz - p0;
+    /* Avoid flat box. */
+    p01.x = max(p01.x, 1e-4);
+    p02.y = max(p02.y, 1e-4);
+    p03.z = max(p03.z, 1e-4);
+    vec3 diagonal = p01 + p02 + p03;
+    vec3 center = p0 + diagonal * 0.5;
+    float min_axis = min_v3(abs(diagonal));
+    bounds_buf[resource_id].bounding_sphere.xyz = transform_point(model_mat, center);
+    /* We have to apply scaling to the diagonal. */
+    bounds_buf[resource_id].bounding_sphere.w = length(transform_direction(model_mat, diagonal)) *
+                                                0.5;
+    bounds_buf[resource_id]._inner_sphere_radius = min_axis;
+    bounds_buf[resource_id].bounding_corners[0].xyz = transform_point(model_mat, p0);
+    bounds_buf[resource_id].bounding_corners[1].xyz = transform_direction(model_mat, p01);
+    bounds_buf[resource_id].bounding_corners[2].xyz = transform_direction(model_mat, p02);
+    bounds_buf[resource_id].bounding_corners[3].xyz = transform_direction(model_mat, p03);
+    /* Always have correct handedness in the corners vectors. */
+    if (flag_test(infos.flag, OBJECT_NEGATIVE_SCALE)) {
+      bounds_buf[resource_id].bounding_corners[0].xyz +=
+          bounds_buf[resource_id].bounding_corners[1].xyz;
+      bounds_buf[resource_id].bounding_corners[1].xyz =
+          -bounds_buf[resource_id].bounding_corners[1].xyz;
+    }
+
+    /* TODO: Bypass test for very large objects (see T67319). */
+    if (bounds_buf[resource_id].bounding_sphere.w > 1e12) {
+      bounds_buf[resource_id].bounding_sphere.w = -1.0;
+    }
+  }
+
+  vec3 loc = infos.orco_add;  /* Box center. */
+  vec3 size = infos.orco_mul; /* Box half-extent. */
+  /* This is what the original computation looks like.
+   * Simplify to a nice MADD in shading code. */
+  // orco = (pos - loc) / size;
+  // orco = pos * (1.0 / size) + (-loc / size);
+  vec3 size_inv = safe_rcp(size);
+  infos_buf[resource_id].orco_add = -loc * size_inv;
+  infos_buf[resource_id].orco_mul = size_inv;
+}
diff --git a/source/blender/draw/intern/shaders/draw_view_info.hh b/source/blender/draw/intern/shaders/draw_view_info.hh
index 0400521c53d..c522c607791 100644
--- a/source/blender/draw/intern/shaders/draw_view_info.hh
+++ b/source/blender/draw/intern/shaders/draw_view_info.hh
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 /* -------------------------------------------------------------------- */
@@ -44,13 +45,13 @@ GPU_SHADER_CREATE_INFO(draw_resource_handle)
  * \{ */
 
 GPU_SHADER_CREATE_INFO(draw_view)
-    .uniform_buf(0, "ViewInfos", "drw_view", Frequency::PASS)
+    .uniform_buf(DRW_VIEW_UBO_SLOT, "ViewInfos", "drw_view", Frequency::PASS)
     .typedef_source("draw_shader_shared.h");
 
 GPU_SHADER_CREATE_INFO(draw_modelmat)
     .uniform_buf(8, "ObjectMatrices", "drw_matrices[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH)
-    .define("ModelMatrix", "(drw_matrices[resource_id].drw_modelMatrix)")
-    .define("ModelMatrixInverse", "(drw_matrices[resource_id].drw_modelMatrixInverse)")
+    .define("ModelMatrix", "(drw_matrices[resource_id].model)")
+    .define("ModelMatrixInverse", "(drw_matrices[resource_id].model_inverse)")
     .additional_info("draw_view");
 
 GPU_SHADER_CREATE_INFO(draw_modelmat_legacy)
@@ -136,3 +137,77 @@ GPU_SHADER_CREATE_INFO(draw_gpencil)
     .additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos");
 
 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Internal Draw Manager usage
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_resource_finalize)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .define("DRAW_FINALIZE_SHADER")
+    .local_group_size(DRW_FINALIZE_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "ObjectMatrices", "matrix_buf[]")
+    .storage_buf(1, Qualifier::READ_WRITE, "ObjectBounds", "bounds_buf[]")
+    .storage_buf(2, Qualifier::READ_WRITE, "ObjectInfos", "infos_buf[]")
+    .push_constant(Type::INT, "resource_len")
+    .compute_source("draw_resource_finalize_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(draw_visibility_compute)
+    .do_static_compilation(true)
+    .local_group_size(DRW_VISIBILITY_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "ObjectBounds", "bounds_buf[]")
+    .storage_buf(1, Qualifier::READ_WRITE, "uint", "visibility_buf[]")
+    .push_constant(Type::INT, "resource_len")
+    .compute_source("draw_visibility_comp.glsl")
+    .additional_info("draw_view");
+
+GPU_SHADER_CREATE_INFO(draw_command_generate)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .typedef_source("draw_command_shared.hh")
+    .local_group_size(DRW_COMMAND_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ_WRITE, "DrawGroup", "group_buf[]")
+    .storage_buf(1, Qualifier::READ, "uint", "visibility_buf[]")
+    .storage_buf(2, Qualifier::READ, "DrawPrototype", "prototype_buf[]")
+    .storage_buf(3, Qualifier::WRITE, "DrawCommand", "command_buf[]")
+    .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::WRITE, "uint", "resource_id_buf[]")
+    .push_constant(Type::INT, "prototype_len")
+    .compute_source("draw_command_generate_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Resource ID
+ * New implementation using gl_BaseInstance and storage buffers.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_resource_id_new)
+    .define("UNIFORM_RESOURCE_ID_NEW")
+    .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::READ, "int", "resource_id_buf[]")
+    .define("drw_ResourceID", "resource_id_buf[gpu_BaseInstance + gl_InstanceID]");
+
+/**
+ * Workaround the lack of gl_BaseInstance by binding the resource_id_buf as vertex buf.
+ */
+GPU_SHADER_CREATE_INFO(draw_resource_id_fallback)
+    .define("UNIFORM_RESOURCE_ID_NEW")
+    .vertex_in(15, Type::INT, "drw_ResourceID");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Object Resources
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_modelmat_new)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_OBJ_MAT_SLOT, Qualifier::READ, "ObjectMatrices", "drw_matrix_buf[]")
+    .define("drw_ModelMatrixInverse", "drw_matrix_buf[resource_id].model_inverse")
+    .define("drw_ModelMatrix", "drw_matrix_buf[resource_id].model")
+    /* TODO For compatibility with old shaders. To be removed. */
+    .define("ModelMatrixInverse", "drw_ModelMatrixInverse")
+    .define("ModelMatrix", "drw_ModelMatrix")
+    .additional_info("draw_resource_id_new");
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
new file mode 100644
index 00000000000..86add2d1fe2
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
@@ -0,0 +1,46 @@
+
+/**
+ * Compute visibility of each resource bounds for a given view.
+ */
+/* TODO(fclem): This could be augmented by a 2 pass occlusion culling system. */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+
+shared uint shared_result;
+
+void mask_visibility_bit()
+{
+  uint bit = 1u << gl_LocalInvocationID.x;
+  atomicAnd(visibility_buf[gl_WorkGroupID.x], ~bit);
+}
+
+void main()
+{
+  if (gl_GlobalInvocationID.x >= resource_len) {
+    return;
+  }
+
+  ObjectBounds bounds = bounds_buf[gl_GlobalInvocationID.x];
+
+  if (bounds.bounding_sphere.w != -1.0) {
+    IsectBox box = isect_data_setup(bounds.bounding_corners[0].xyz,
+                                    bounds.bounding_corners[1].xyz,
+                                    bounds.bounding_corners[2].xyz,
+                                    bounds.bounding_corners[3].xyz);
+    Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
+    Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
+
+    if (intersect_view(inscribed_sphere) == true) {
+      /* Visible. */
+    }
+    else if (intersect_view(bounding_sphere) == false) {
+      /* Not visible. */
+      mask_visibility_bit();
+    }
+    else if (intersect_view(box) == false) {
+      /* Not visible. */
+      mask_visibility_bit();
+    }
+  }
+}
diff --git a/source/blender/draw/tests/draw_pass_test.cc b/source/blender/draw/tests/draw_pass_test.cc
new file mode 100644
index 00000000000..394ca8bd3cf
--- /dev/null
+++ b/source/blender/draw/tests/draw_pass_test.cc
@@ -0,0 +1,441 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+#include "testing/testing.h"
+
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader.h"
+#include "draw_testing.hh"
+
+#include <bitset>
+
+namespace blender::draw {
+
+static void test_draw_pass_all_commands()
+{
+  Texture tex;
+  tex.ensure_2d(GPU_RGBA16, int2(1));
+
+  UniformBuffer<uint4> ubo;
+  ubo.push_update();
+
+  StorageBuffer<uint4> ssbo;
+  ssbo.push_update();
+
+  float4 color(1.0f, 1.0f, 1.0f, 0.0f);
+  int3 dispatch_size(1);
+
+  PassSimple pass = {"test.all_commands"};
+  pass.init();
+  pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL);
+  pass.clear_color_depth_stencil(float4(0.25f, 0.5f, 100.0f, -2000.0f), 0.5f, 0xF0);
+  pass.state_stencil(0x80, 0x0F, 0x8F);
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+  pass.bind_texture("image", tex);
+  pass.bind_texture("image", &tex);
+  pass.bind_image("missing_image", tex);  /* Should not crash. */
+  pass.bind_image("missing_image", &tex); /* Should not crash. */
+  pass.bind_ubo("missing_ubo", ubo);      /* Should not crash. */
+  pass.bind_ubo("missing_ubo", &ubo);     /* Should not crash. */
+  pass.bind_ssbo("missing_ssbo", ssbo);   /* Should not crash. */
+  pass.bind_ssbo("missing_ssbo", &ssbo);  /* Should not crash. */
+  pass.push_constant("color", color);
+  pass.push_constant("color", &color);
+  pass.push_constant("ModelViewProjectionMatrix", float4x4::identity());
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+
+  /* Should not crash even if shader is not a compute. This is because we only serialize. */
+  /* TODO(fclem): Use real compute shader. */
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+  pass.dispatch(dispatch_size);
+  pass.dispatch(&dispatch_size);
+  pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+
+  /* Change references. */
+  color[3] = 1.0f;
+  dispatch_size = int3(2);
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.all_commands" << std::endl;
+  expected << "  .state_set(6)" << std::endl;
+  expected << "  .clear(color=(0.25, 0.5, 100, -2000), depth=0.5, stencil=0b11110000))"
+           << std::endl;
+  expected << "  .stencil_set(write_mask=0b10000000, compare_mask=0b00001111, reference=0b10001111"
+           << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_color)" << std::endl;
+  expected << "  .bind_texture(0)" << std::endl;
+  expected << "  .bind_texture_ref(0)" << std::endl;
+  expected << "  .bind_image(-1)" << std::endl;
+  expected << "  .bind_image_ref(-1)" << std::endl;
+  expected << "  .bind_uniform_buf(-1)" << std::endl;
+  expected << "  .bind_uniform_buf_ref(-1)" << std::endl;
+  expected << "  .bind_storage_buf(-1)" << std::endl;
+  expected << "  .bind_storage_buf_ref(-1)" << std::endl;
+  expected << "  .push_constant(1, data=(1, 1, 1, 0))" << std::endl;
+  expected << "  .push_constant(1, data=(1, 1, 1, 1))" << std::endl;
+  expected << "  .push_constant(0, data=(" << std::endl;
+  expected << "(   1.000000,    0.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    1.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    1.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "  .draw(inst_len=1, vert_len=3, vert_first=0, res_id=0)" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_color)" << std::endl;
+  expected << "  .dispatch(1, 1, 1)" << std::endl;
+  expected << "  .dispatch_ref(2, 2, 2)" << std::endl;
+  expected << "  .barrier(4)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_all_commands)
+
+static void test_draw_pass_sub_ordering()
+{
+  PassSimple pass = {"test.sub_ordering"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+  pass.push_constant("test_pass", 1);
+
+  PassSimple::Sub &sub1 = pass.sub("Sub1");
+  sub1.push_constant("test_sub1", 11);
+
+  PassSimple::Sub &sub2 = pass.sub("Sub2");
+  sub2.push_constant("test_sub2", 21);
+
+  /* Will execute after both sub. */
+  pass.push_constant("test_pass", 2);
+
+  /* Will execute after sub1. */
+  sub2.push_constant("test_sub2", 22);
+
+  /* Will execute before sub2. */
+  sub1.push_constant("test_sub1", 12);
+
+  /* Will execute before end of pass. */
+  sub2.push_constant("test_sub2", 23);
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.sub_ordering" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_color)" << std::endl;
+  expected << "  .push_constant(-1, data=1)" << std::endl;
+  expected << "  .Sub1" << std::endl;
+  expected << "    .push_constant(-1, data=11)" << std::endl;
+  expected << "    .push_constant(-1, data=12)" << std::endl;
+  expected << "  .Sub2" << std::endl;
+  expected << "    .push_constant(-1, data=21)" << std::endl;
+  expected << "    .push_constant(-1, data=22)" << std::endl;
+  expected << "    .push_constant(-1, data=23)" << std::endl;
+  expected << "  .push_constant(-1, data=2)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+}
+DRAW_TEST(draw_pass_sub_ordering)
+
+static void test_draw_pass_simple_draw()
+{
+  PassSimple pass = {"test.simple_draw"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+  /* Each draw procedural type uses a different batch. Groups are drawn in correct order. */
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, 10, 1, {1});
+  pass.draw_procedural(GPU_PRIM_POINTS, 4, 20, 2, {2});
+  pass.draw_procedural(GPU_PRIM_TRIS, 2, 30, 3, {3});
+  pass.draw_procedural(GPU_PRIM_POINTS, 5, 40, 4, ResourceHandle(4, true));
+  pass.draw_procedural(GPU_PRIM_LINES, 1, 50, 5, {5});
+  pass.draw_procedural(GPU_PRIM_POINTS, 6, 60, 6, {5});
+  pass.draw_procedural(GPU_PRIM_TRIS, 3, 70, 7, {6});
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.simple_draw" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_color)" << std::endl;
+  expected << "  .draw(inst_len=1, vert_len=10, vert_first=1, res_id=1)" << std::endl;
+  expected << "  .draw(inst_len=4, vert_len=20, vert_first=2, res_id=2)" << std::endl;
+  expected << "  .draw(inst_len=2, vert_len=30, vert_first=3, res_id=3)" << std::endl;
+  expected << "  .draw(inst_len=5, vert_len=40, vert_first=4, res_id=4)" << std::endl;
+  expected << "  .draw(inst_len=1, vert_len=50, vert_first=5, res_id=5)" << std::endl;
+  expected << "  .draw(inst_len=6, vert_len=60, vert_first=6, res_id=5)" << std::endl;
+  expected << "  .draw(inst_len=3, vert_len=70, vert_first=7, res_id=6)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_simple_draw)
+
+static void test_draw_pass_multi_draw()
+{
+  PassMain pass = {"test.multi_draw"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+  /* Each draw procedural type uses a different batch. Groups are drawn in reverse order. */
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, {1});
+  pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, {2});
+  pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, {3});
+  pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, ResourceHandle(4, true));
+  pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, {5});
+  pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, {5});
+  pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, {6});
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.multi_draw" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_color)" << std::endl;
+  expected << "  .draw_multi(3)" << std::endl;
+  expected << "    .group(id=2, len=1)" << std::endl;
+  expected << "      .proto(instance_len=1, resource_id=5, front_face)" << std::endl;
+  expected << "    .group(id=1, len=15)" << std::endl;
+  expected << "      .proto(instance_len=5, resource_id=4, back_face)" << std::endl;
+  expected << "      .proto(instance_len=6, resource_id=5, front_face)" << std::endl;
+  expected << "      .proto(instance_len=4, resource_id=2, front_face)" << std::endl;
+  expected << "    .group(id=0, len=6)" << std::endl;
+  expected << "      .proto(instance_len=3, resource_id=6, front_face)" << std::endl;
+  expected << "      .proto(instance_len=2, resource_id=3, front_face)" << std::endl;
+  expected << "      .proto(instance_len=1, resource_id=1, front_face)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_multi_draw)
+
+static void test_draw_pass_sortable()
+{
+  PassSortable pass = {"test.sortable"};
+  pass.init();
+
+  pass.sub("Sub3", 3.0f);
+  pass.sub("Sub2", 2.0f);
+  pass.sub("Sub5", 4.0f);
+  pass.sub("Sub4", 3.0f);
+  pass.sub("Sub1", 1.0f);
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.sortable" << std::endl;
+  expected << "  .Sub1" << std::endl;
+  expected << "  .Sub2" << std::endl;
+  expected << "  .Sub3" << std::endl;
+  expected << "  .Sub4" << std::endl;
+  expected << "  .Sub5" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_sortable)
+
+static void test_draw_resource_id_gen()
+{
+  float4x4 win_mat;
+  orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1);
+
+  View view("test_view");
+  view.sync(float4x4::identity(), win_mat);
+
+  Manager drw;
+
+  float4x4 obmat_1 = float4x4::identity();
+  float4x4 obmat_2 = float4x4::identity();
+  obmat_1.apply_scale(-0.5f);
+  obmat_2.apply_scale(0.5f);
+
+  drw.begin_sync();
+  ResourceHandle handle1 = drw.resource_handle(obmat_1);
+  ResourceHandle handle2 = drw.resource_handle(obmat_1);
+  ResourceHandle handle3 = drw.resource_handle(obmat_2);
+  drw.resource_handle(obmat_2, float3(2), float3(1));
+  drw.end_sync();
+
+  StringRefNull expected = "2 1 1 1 1 3 3 1 1 1 1 1 3 2 2 2 2 2 2 1 1 1 ";
+
+  {
+    /* Computed on CPU. */
+    PassSimple pass = {"test.resource_id"};
+    pass.init();
+    pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+    pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1);
+
+    Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
+
+    std::stringstream result;
+    for (auto val : debug.resource_id) {
+      result << val << " ";
+    }
+
+    EXPECT_EQ(result.str(), expected);
+  }
+  {
+    /* Same thing with PassMain (computed on GPU) */
+    PassSimple pass = {"test.resource_id"};
+    pass.init();
+    pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+    pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1);
+
+    Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
+
+    std::stringstream result;
+    for (auto val : debug.resource_id) {
+      result << val << " ";
+    }
+
+    EXPECT_EQ(result.str(), expected);
+  }
+
+  DRW_shape_cache_free();
+  DRW_shaders_free();
+}
+DRAW_TEST(draw_resource_id_gen)
+
+static void test_draw_visibility()
+{
+  float4x4 win_mat;
+  orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1);
+
+  View view("test_view");
+  view.sync(float4x4::identity(), win_mat);
+
+  Manager drw;
+
+  float4x4 obmat_1 = float4x4::identity();
+  float4x4 obmat_2 = float4x4::identity();
+  obmat_1.apply_scale(-0.5f);
+  obmat_2.apply_scale(0.5f);
+
+  drw.begin_sync();                                   /* Default {0} always visible. */
+  drw.resource_handle(obmat_1);                       /* No bounds, always visible. */
+  drw.resource_handle(obmat_1, float3(3), float3(1)); /* Out of view. */
+  drw.resource_handle(obmat_2, float3(0), float3(1)); /* Inside view. */
+  drw.end_sync();
+
+  PassMain pass = {"test.visibility"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR));
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, -1);
+
+  Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
+  Vector<uint32_t> expected_visibility = {0};
+
+  std::stringstream result;
+  for (auto val : debug.visibility) {
+    result << std::bitset<32>(val);
+  }
+
+  EXPECT_EQ(result.str(), "11111111111111111111111111111011");
+
+  DRW_shape_cache_free();
+  DRW_shaders_free();
+}
+DRAW_TEST(draw_visibility)
+
+static void test_draw_manager_sync()
+{
+  float4x4 obmat_1 = float4x4::identity();
+  float4x4 obmat_2 = float4x4::identity();
+  obmat_1.apply_scale(-0.5f);
+  obmat_2.apply_scale(0.5f);
+
+  /* TODO find a way to create a minimum object to test resource handle creation on it. */
+  Manager drw;
+
+  drw.begin_sync();
+  drw.resource_handle(obmat_1);
+  drw.resource_handle(obmat_2, float3(2), float3(1));
+  drw.end_sync();
+
+  Manager::DataDebugOutput debug = drw.data_debug();
+
+  std::stringstream result;
+  for (const auto &val : debug.matrices) {
+    result << val;
+  }
+  for (const auto &val : debug.bounds) {
+    result << val;
+  }
+  for (const auto &val : debug.infos) {
+    result << val;
+  }
+
+  std::stringstream expected;
+  expected << "ObjectMatrices(" << std::endl;
+  expected << "model=(" << std::endl;
+  expected << "(   1.000000,    0.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    1.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    1.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ", " << std::endl;
+  expected << "model_inverse=(" << std::endl;
+  expected << "(   1.000000,   -0.000000,    0.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    1.000000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,   -0.000000,    1.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    0.000000,   -0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectMatrices(" << std::endl;
+  expected << "model=(" << std::endl;
+  expected << "(  -0.500000,   -0.000000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,   -0.500000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,   -0.000000,   -0.500000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ", " << std::endl;
+  expected << "model_inverse=(" << std::endl;
+  expected << "(  -2.000000,    0.000000,   -0.000000,   -0.000000)" << std::endl;
+  expected << "(   0.000000,   -2.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,    0.000000,   -2.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,   -0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectMatrices(" << std::endl;
+  expected << "model=(" << std::endl;
+  expected << "(   0.500000,    0.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.500000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.500000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ", " << std::endl;
+  expected << "model_inverse=(" << std::endl;
+  expected << "(   2.000000,   -0.000000,    0.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    2.000000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,   -0.000000,    2.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    0.000000,   -0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectBounds(skipped)" << std::endl;
+  expected << "ObjectBounds(skipped)" << std::endl;
+  expected << "ObjectBounds(" << std::endl;
+  expected << ".bounding_corners[0](0.5, 0.5, 0.5)" << std::endl;
+  expected << ".bounding_corners[1](1, 0, 0)" << std::endl;
+  expected << ".bounding_corners[2](0, 1, 0)" << std::endl;
+  expected << ".bounding_corners[3](0, 0, 1)" << std::endl;
+  expected << ".sphere=(pos=(1, 1, 1), rad=0.866025" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectInfos(skipped)" << std::endl;
+  expected << "ObjectInfos(skipped)" << std::endl;
+  expected << "ObjectInfos(skipped)" << std::endl;
+
+  EXPECT_EQ(result.str(), expected.str());
+
+  DRW_shaders_free();
+}
+DRAW_TEST(draw_manager_sync)
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/tests/shaders_test.cc b/source/blender/draw/tests/shaders_test.cc
index 2bc0c9af895..892fd999fb5 100644
--- a/source/blender/draw/tests/shaders_test.cc
+++ b/source/blender/draw/tests/shaders_test.cc
@@ -256,6 +256,7 @@ static void test_overlay_glsl_shaders()
     EXPECT_NE(OVERLAY_shader_uniform_color(), nullptr);
     EXPECT_NE(OVERLAY_shader_outline_prepass(false), nullptr);
     EXPECT_NE(OVERLAY_shader_outline_prepass(true), nullptr);
+    EXPECT_NE(OVERLAY_shader_outline_prepass_curves(), nullptr);
     EXPECT_NE(OVERLAY_shader_outline_prepass_gpencil(), nullptr);
     EXPECT_NE(OVERLAY_shader_outline_prepass_pointcloud(), nullptr);
     EXPECT_NE(OVERLAY_shader_extra_grid(), nullptr);
@@ -270,6 +271,7 @@ static void test_overlay_glsl_shaders()
     EXPECT_NE(OVERLAY_shader_particle_dot(), nullptr);
     EXPECT_NE(OVERLAY_shader_particle_shape(), nullptr);
     EXPECT_NE(OVERLAY_shader_sculpt_mask(), nullptr);
+    EXPECT_NE(OVERLAY_shader_sculpt_curves_selection(), nullptr);
     EXPECT_NE(OVERLAY_shader_volume_velocity(false, false), nullptr);
     EXPECT_NE(OVERLAY_shader_volume_velocity(false, true), nullptr);
     EXPECT_NE(OVERLAY_shader_volume_velocity(true, false), nullptr);
@@ -358,6 +360,8 @@ static void test_eevee_glsl_shaders_static()
   EXPECT_NE(EEVEE_shaders_volumes_integration_sh_get(), nullptr);
   EXPECT_NE(EEVEE_shaders_volumes_resolve_sh_get(false), nullptr);
   EXPECT_NE(EEVEE_shaders_volumes_resolve_sh_get(true), nullptr);
+  EXPECT_NE(EEVEE_shaders_volumes_resolve_comp_sh_get(false), nullptr);
+  EXPECT_NE(EEVEE_shaders_volumes_resolve_comp_sh_get(true), nullptr);
   EXPECT_NE(EEVEE_shaders_volumes_accum_sh_get(), nullptr);
   EXPECT_NE(EEVEE_shaders_studiolight_probe_sh_get(), nullptr);
   EXPECT_NE(EEVEE_shaders_studiolight_background_sh_get(), nullptr);
@@ -397,6 +401,7 @@ static void test_basic_glsl_shaders()
     eGPUShaderConfig sh_cfg = static_cast<eGPUShaderConfig>(i);
     BASIC_shaders_depth_sh_get(sh_cfg);
     BASIC_shaders_pointcloud_depth_sh_get(sh_cfg);
+    BASIC_shaders_curves_depth_sh_get(sh_cfg);
     BASIC_shaders_depth_conservative_sh_get(sh_cfg);
     BASIC_shaders_pointcloud_depth_conservative_sh_get(sh_cfg);
   }