OpenSubDiv: add support for an OpenGL evaluator

This evaluator is used in order to evaluate subdivision at render time, allowing for faster renders of meshes with a subdivision surface modifier placed at the last position in the modifier list. When evaluating the subsurf modifier, we detect whether we can delegate evaluation to the draw code. If so, the subdivision is first evaluated on the GPU using our own custom evaluator (only the coarse data needs to be initially sent to the GPU), then, buffers for the final `MeshBufferCache` are filled on the GPU using a set of compute shaders. However, some buffers are still filled on the CPU side, if doing so on the GPU is impractical (e.g. the line adjacency buffer used for x-ray, whose logic is hardly GPU compatible). This is done at the mesh buffer extraction level so that the result can be readily used in the various OpenGL engines, without having to write custom geometry or tesselation shaders. We use our own subdivision evaluation shaders, instead of OpenSubDiv's vanilla one, in order to control the data layout, and interpolation. For example, we store vertex colors as compressed 16-bit integers, while OpenSubDiv's default evaluator only work for float types. In order to still access the modified geometry on the CPU side, for use in modifiers or transform operators, a dedicated wrapper type is added `MESH_WRAPPER_TYPE_SUBD`. Subdivision will be lazily evaluated via `BKE_object_get_evaluated_mesh` which will create such a wrapper if possible. If the final subdivision surface is not needed on the CPU side, `BKE_object_get_evaluated_mesh_no_subsurf` should be used. Enabling or disabling GPU subdivision can be done through the user preferences (under Viewport -> Subdivision). See patch description for benchmarks. Reviewed By: campbellbarton, jbakker, fclem, brecht, #eevee_viewport Differential Revision: https://developer.blender.org/D12406
author: Kévin Dietrich <kevin.dietrich@mailoo.org> 2021-12-27 18:34:47 +0300
committer: Kévin Dietrich <kevin.dietrich@mailoo.org> 2021-12-27 18:35:54 +0300
commit: eed45d2a239a2a18a2420ba15dfb55e0f8dc5630 (patch)
tree: aa55ce966caa8e28db4853d7d755003ed249805b /source/blender/draw
parent: 31e120ef4997583332aa9b5af93521e7e666e9f3 (diff)
42 files changed, 5086 insertions, 102 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 821b6025fff..eea3adc440a 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -44,9 +44,11 @@ set(INC
   ../../../intern/atomic
   ../../../intern/glew-mx
   ../../../intern/guardedalloc
+  ../../../intern/opensubdiv
 
   # dna_type_offsets.h
   ${CMAKE_CURRENT_BINARY_DIR}/../makesdna/intern
+  ${OPENSUBDIV_INCLUDE_DIRS}
 )
 
 set(SRC
@@ -91,6 +93,7 @@ set(SRC
   intern/draw_cache_impl_metaball.c
   intern/draw_cache_impl_particles.c
   intern/draw_cache_impl_pointcloud.c
+  intern/draw_cache_impl_subdivision.cc
   intern/draw_cache_impl_volume.c
   intern/draw_color_management.cc
   intern/draw_common.c
@@ -209,6 +212,7 @@ set(SRC
   intern/draw_manager_testing.h
   intern/draw_manager_text.h
   intern/draw_shader.h
+  intern/draw_subdivision.h
   intern/draw_texture_pool.h
   intern/draw_view.h
   intern/draw_view_data.h
@@ -372,6 +376,18 @@ data_to_c_simple(intern/shaders/common_view_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_fxaa_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_smaa_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_fullscreen_vert.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_custom_data_interp_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_ibo_lines_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_ibo_tris_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_lib.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_normals_accumulate_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_normals_finalize_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_patch_evaluation_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_lnor_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl SRC)
 
 data_to_c_simple(engines/gpencil/shaders/gpencil_frag.glsl SRC)
 data_to_c_simple(engines/gpencil/shaders/gpencil_vert.glsl SRC)
diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h
index 98e166ac3a7..132f66ecb1e 100644
--- a/source/blender/draw/DRW_engine.h
+++ b/source/blender/draw/DRW_engine.h
@@ -191,6 +191,10 @@ void DRW_xr_drawing_end(void);
 
 /* For garbage collection */
 void DRW_cache_free_old_batches(struct Main *bmain);
+void DRW_cache_free_old_subdiv(void);
+
+/* For the OpenGL evaluators and garbage collected subdivision data. */
+void DRW_subdiv_free(void);
 
 /* Never use this. Only for closing blender. */
 void DRW_opengl_context_enable_ex(bool restore);
diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c
index 2345a110134..a754e81b949 100644
--- a/source/blender/draw/engines/overlay/overlay_armature.c
+++ b/source/blender/draw/engines/overlay/overlay_armature.c
@@ -589,7 +589,7 @@ static void drw_shgroup_bone_custom_wire(ArmatureDrawContext *ctx,
                                          Object *custom)
 {
   /* See comments in #drw_shgroup_bone_custom_solid. */
-  Mesh *mesh = BKE_object_get_evaluated_mesh(custom);
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(custom);
   if (mesh == NULL) {
     return;
   }
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 03fb3b92277..1110658e3b2 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -923,7 +923,7 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob)
 
 GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob)
 {
-  Mesh *me = BKE_object_get_evaluated_mesh(ob);
+  Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   short type = (me != NULL) ? OB_MESH : ob->type;
 
   switch (type) {
@@ -950,7 +950,7 @@ int DRW_cache_object_material_count_get(struct Object *ob)
 {
   short type = ob->type;
 
-  Mesh *me = BKE_object_get_evaluated_mesh(ob);
+  Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (me != NULL && type != OB_POINTCLOUD) {
     /* Some object types can have one data type in ob->data, but will be rendered as mesh.
      * For point clouds this never happens. Ideally this check would happen at another level
@@ -3021,7 +3021,7 @@ GPUBatch *DRW_cache_surf_surface_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_surface(mesh_eval);
   }
@@ -3034,7 +3034,7 @@ GPUBatch *DRW_cache_surf_edge_wire_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
@@ -3047,7 +3047,7 @@ GPUBatch *DRW_cache_surf_face_wireframe_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_wireframes_face(mesh_eval);
   }
@@ -3059,7 +3059,7 @@ GPUBatch *DRW_cache_surf_edge_detection_get(Object *ob, bool *r_is_manifold)
 {
   BLI_assert(ob->type == OB_SURF);
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_edge_detection(mesh_eval, r_is_manifold);
   }
@@ -3072,7 +3072,7 @@ GPUBatch *DRW_cache_surf_loose_edges_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
@@ -3089,7 +3089,7 @@ GPUBatch **DRW_cache_surf_surface_shaded_get(Object *ob,
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_surface_shaded(mesh_eval, gpumat_array, gpumat_array_len);
   }
@@ -3382,7 +3382,7 @@ GPUBatch *DRW_cache_cursor_get(bool crosshair_lines)
 
 void drw_batch_cache_validate(Object *ob)
 {
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   switch (ob->type) {
     case OB_MESH:
       DRW_mesh_batch_cache_validate((Mesh *)ob->data);
@@ -3431,7 +3431,7 @@ void drw_batch_cache_generate_requested(Object *ob)
                            DRW_object_use_hide_faces(ob)) ||
                           ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob))));
 
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   switch (ob->type) {
     case OB_MESH:
       DRW_mesh_batch_cache_create_requested(
@@ -3470,7 +3470,7 @@ void drw_batch_cache_generate_requested_evaluated_mesh(Object *ob)
                            DRW_object_use_hide_faces(ob)) ||
                           ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob))));
 
-  Mesh *mesh = BKE_object_get_evaluated_mesh(ob);
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   DRW_mesh_batch_cache_create_requested(DST.task_graph, ob, mesh, scene, is_paint_mode, use_hide);
 }
 
@@ -3481,7 +3481,7 @@ void drw_batch_cache_generate_requested_delayed(Object *ob)
 
 void DRW_batch_cache_free_old(Object *ob, int ctime)
 {
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
 
   switch (ob->type) {
     case OB_MESH:
diff --git a/source/blender/draw/intern/draw_cache_extract.h b/source/blender/draw/intern/draw_cache_extract.h
index ba42cdf66e7..6de9788b434 100644
--- a/source/blender/draw/intern/draw_cache_extract.h
+++ b/source/blender/draw/intern/draw_cache_extract.h
@@ -22,6 +22,7 @@
 
 #pragma once
 
+struct DRWSubdivCache;
 struct TaskGraph;
 
 #include "DNA_customdata_types.h"
@@ -244,6 +245,13 @@ typedef enum DRWBatchFlag {
 
 BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of bit fields");
 
+typedef struct MeshExtractLooseGeom {
+  int edge_len;
+  int vert_len;
+  int *verts;
+  int *edges;
+} MeshExtractLooseGeom;
+
 /**
  * Data that are kept around between extractions to reduce rebuilding time.
  *
@@ -252,12 +260,7 @@ BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of b
 typedef struct MeshBufferCache {
   MeshBufferList buff;
 
-  struct {
-    int edge_len;
-    int vert_len;
-    int *verts;
-    int *edges;
-  } loose_geom;
+  MeshExtractLooseGeom loose_geom;
 
   struct {
     int *tri_first_index;
@@ -283,6 +286,8 @@ typedef struct MeshBatchCache {
 
   GPUBatch **surface_per_mat;
 
+  struct DRWSubdivCache *subdiv_cache;
+
   DRWBatchFlag batch_requested; /* DRWBatchFlag */
   DRWBatchFlag batch_ready;     /* DRWBatchFlag */
 
@@ -332,9 +337,14 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
                                         const bool do_uvedit,
                                         const bool use_subsurf_fdots,
                                         const Scene *scene,
-                                        const ToolSettings *ts,
+                                        const struct ToolSettings *ts,
                                         const bool use_hide);
 
+void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                               MeshBufferCache *mbc,
+                                               struct DRWSubdivCache *subdiv_cache,
+                                               const struct ToolSettings *ts);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc
index 485b803310c..383a3b05b67 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@@ -42,6 +42,7 @@
 
 #include "draw_cache_extract.h"
 #include "draw_cache_inline.h"
+#include "draw_subdivision.h"
 
 #include "mesh_extractors/extract_mesh.h"
 
@@ -783,6 +784,99 @@ static void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
 
 /** \} */
 
+/* ---------------------------------------------------------------------- */
+/** \name Subdivision Extract Loop
+ * \{ */
+
+static void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                                      MeshBufferCache *mbc,
+                                                      DRWSubdivCache *subdiv_cache,
+                                                      const ToolSettings *ts)
+{
+  /* Create an array containing all the extractors that needs to be executed. */
+  ExtractorRunDatas extractors;
+
+  MeshBufferList *mbuflist = &mbc->buff;
+
+#define EXTRACT_ADD_REQUESTED(type, name) \
+  do { \
+    if (DRW_##type##_requested(mbuflist->type.name)) { \
+      const MeshExtract *extractor = &extract_##name; \
+      extractors.append(extractor); \
+    } \
+  } while (0)
+
+  /* The order in which extractors are added to the list matters somewhat, as some buffers are
+   * reused when building others. */
+  EXTRACT_ADD_REQUESTED(ibo, tris);
+  EXTRACT_ADD_REQUESTED(vbo, pos_nor);
+  EXTRACT_ADD_REQUESTED(vbo, lnor);
+  for (int i = 0; i < GPU_MAX_ATTR; i++) {
+    EXTRACT_ADD_REQUESTED(vbo, attr[i]);
+  }
+
+  /* We use only one extractor for face dots, as the work is done in a single compute shader. */
+  if (DRW_vbo_requested(mbuflist->vbo.fdots_nor) || DRW_vbo_requested(mbuflist->vbo.fdots_pos) ||
+      DRW_ibo_requested(mbuflist->ibo.fdots)) {
+    extractors.append(&extract_fdots_pos);
+  }
+
+  EXTRACT_ADD_REQUESTED(ibo, lines);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_points);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_tris);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_lines);
+  EXTRACT_ADD_REQUESTED(vbo, vert_idx);
+  EXTRACT_ADD_REQUESTED(vbo, edge_idx);
+  EXTRACT_ADD_REQUESTED(vbo, poly_idx);
+  EXTRACT_ADD_REQUESTED(vbo, edge_fac);
+  EXTRACT_ADD_REQUESTED(ibo, points);
+  EXTRACT_ADD_REQUESTED(vbo, edit_data);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_data);
+  /* Make sure UVs are computed before edituv stuffs. */
+  EXTRACT_ADD_REQUESTED(vbo, uv);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_area);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle);
+  EXTRACT_ADD_REQUESTED(ibo, lines_adjacency);
+  EXTRACT_ADD_REQUESTED(vbo, vcol);
+  EXTRACT_ADD_REQUESTED(vbo, weights);
+  EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
+
+#undef EXTRACT_ADD_REQUESTED
+
+  if (extractors.is_empty()) {
+    return;
+  }
+
+  MeshRenderData mr;
+  draw_subdiv_init_mesh_render_data(subdiv_cache, &mr, ts);
+  mesh_render_data_update_loose_geom(&mr, mbc, MR_ITER_LEDGE | MR_ITER_LVERT, MR_DATA_LOOSE_GEOM);
+
+  void *data_stack = MEM_mallocN(extractors.data_size_total(), __func__);
+  uint32_t data_offset = 0;
+  for (const ExtractorRunData &run_data : extractors) {
+    const MeshExtract *extractor = run_data.extractor;
+    void *buffer = mesh_extract_buffer_get(extractor, mbuflist);
+    void *data = POINTER_OFFSET(data_stack, data_offset);
+
+    extractor->init_subdiv(subdiv_cache, &mr, cache, buffer, data);
+
+    if (extractor->iter_subdiv) {
+      extractor->iter_subdiv(subdiv_cache, &mr, data);
+    }
+
+    if (extractor->iter_loose_geom_subdiv) {
+      extractor->iter_loose_geom_subdiv(subdiv_cache, &mr, &mbc->loose_geom, buffer, data);
+    }
+
+    if (extractor->finish_subdiv) {
+      extractor->finish_subdiv(subdiv_cache, buffer, data);
+    }
+  }
+  MEM_freeN(data_stack);
+}
+
+/** \} */
+
 }  // namespace blender::draw
 
 extern "C" {
@@ -818,4 +912,12 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
                                                     use_hide);
 }
 
+void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                               MeshBufferCache *mbc,
+                                               DRWSubdivCache *subdiv_cache,
+                                               const ToolSettings *ts)
+{
+  blender::draw::mesh_buffer_cache_create_requested_subdiv(cache, mbc, subdiv_cache, ts);
+}
+
 }  // extern "C"
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c
index 82b3b5aee41..1e5ffc14911 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@@ -54,6 +54,7 @@
 #include "BKE_object_deform.h"
 #include "BKE_paint.h"
 #include "BKE_pbvh.h"
+#include "BKE_subdiv_modifier.h"
 
 #include "atomic_ops.h"
 
@@ -69,6 +70,7 @@
 
 #include "draw_cache_extract.h"
 #include "draw_cache_inline.h"
+#include "draw_subdivision.h"
 
 #include "draw_cache_impl.h" /* own include */
 
@@ -380,6 +382,7 @@ static void drw_mesh_attributes_add_request(DRW_MeshAttributes *attrs,
 BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->ldata;
       break;
@@ -395,6 +398,7 @@ BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->pdata;
       break;
@@ -410,6 +414,7 @@ BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->edata;
       break;
@@ -425,6 +430,7 @@ BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->vdata;
       break;
@@ -1037,6 +1043,15 @@ static void mesh_buffer_cache_clear(MeshBufferCache *mbc)
   mbc->poly_sorted.visible_tri_len = 0;
 }
 
+static void mesh_batch_cache_free_subdiv_cache(MeshBatchCache *cache)
+{
+  if (cache->subdiv_cache) {
+    draw_subdiv_cache_free(cache->subdiv_cache);
+    MEM_freeN(cache->subdiv_cache);
+    cache->subdiv_cache = NULL;
+  }
+}
+
 static void mesh_batch_cache_clear(Mesh *me)
 {
   MeshBatchCache *cache = me->runtime.batch_cache;
@@ -1064,6 +1079,8 @@ static void mesh_batch_cache_clear(Mesh *me)
 
   cache->batch_ready = 0;
   drw_mesh_weight_state_clear(&cache->weight_state);
+
+  mesh_batch_cache_free_subdiv_cache(cache);
 }
 
 void DRW_mesh_batch_cache_free(Mesh *me)
@@ -1693,6 +1710,10 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
 
   const bool do_uvcage = is_editmode && !me->edit_mesh->mesh_eval_final->runtime.is_original;
 
+  const int required_mode = BKE_subsurf_modifier_eval_required_mode(DRW_state_is_scene_render(),
+                                                                    is_editmode);
+  const bool do_subdivision = BKE_subsurf_modifier_can_do_gpu_subdiv(scene, ob, required_mode);
+
   MeshBufferList *mbuflist = &cache->final.buff;
 
   /* Initialize batches and request VBO's & IBO's. */
@@ -2038,6 +2059,15 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                                        true);
   }
 
+  if (do_subdivision) {
+    DRW_create_subdivision(scene, ob, me, cache, &cache->final, ts);
+  }
+  else {
+    /* The subsurf modifier may have been recently removed, or another modifier was added after it,
+     * so free any potential subdivision cache as it is not needed anymore. */
+    mesh_batch_cache_free_subdiv_cache(cache);
+  }
+
   mesh_buffer_cache_create_requested(task_graph,
                                      cache,
                                      &cache->final,
diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
new file mode 100644
index 00000000000..5533130212e
--- /dev/null
+++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
@@ -0,0 +1,1932 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#include "draw_subdivision.h"
+
+#include "DNA_mesh_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+
+#include "BKE_editmesh.h"
+#include "BKE_modifier.h"
+#include "BKE_object.h"
+#include "BKE_scene.h"
+#include "BKE_subdiv.h"
+#include "BKE_subdiv_eval.h"
+#include "BKE_subdiv_foreach.h"
+#include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
+
+#include "BLI_linklist.h"
+
+#include "BLI_string.h"
+
+#include "PIL_time.h"
+
+#include "DRW_engine.h"
+#include "DRW_render.h"
+
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_index_buffer.h"
+#include "GPU_state.h"
+#include "GPU_vertex_buffer.h"
+
+#include "opensubdiv_capi.h"
+#include "opensubdiv_capi_type.h"
+#include "opensubdiv_converter_capi.h"
+#include "opensubdiv_evaluator_capi.h"
+#include "opensubdiv_topology_refiner_capi.h"
+
+#include "draw_cache_extract.h"
+#include "draw_cache_impl.h"
+#include "draw_cache_inline.h"
+#include "mesh_extractors/extract_mesh.h"
+
+extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[];
+extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[];
+extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[];
+extern "C" char datatoc_common_subdiv_lib_glsl[];
+extern "C" char datatoc_common_subdiv_normals_accumulate_comp_glsl[];
+extern "C" char datatoc_common_subdiv_normals_finalize_comp_glsl[];
+extern "C" char datatoc_common_subdiv_patch_evaluation_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edge_fac_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_lnor_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_sculpt_data_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl[];
+
+enum {
+  SHADER_BUFFER_LINES,
+  SHADER_BUFFER_LINES_LOOSE,
+  SHADER_BUFFER_EDGE_FAC,
+  SHADER_BUFFER_LNOR,
+  SHADER_BUFFER_TRIS,
+  SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS,
+  SHADER_BUFFER_NORMALS_ACCUMULATE,
+  SHADER_BUFFER_NORMALS_FINALIZE,
+  SHADER_PATCH_EVALUATION,
+  SHADER_PATCH_EVALUATION_LIMIT_NORMALS,
+  SHADER_PATCH_EVALUATION_FVAR,
+  SHADER_PATCH_EVALUATION_FACE_DOTS,
+  SHADER_COMP_CUSTOM_DATA_INTERP_1D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_2D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_3D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_4D,
+  SHADER_BUFFER_SCULPT_DATA,
+  SHADER_BUFFER_UV_STRETCH_ANGLE,
+  SHADER_BUFFER_UV_STRETCH_AREA,
+
+  NUM_SHADERS,
+};
+
+static GPUShader *g_subdiv_shaders[NUM_SHADERS];
+
+static const char *get_shader_code(int shader_type)
+{
+  switch (shader_type) {
+    case SHADER_BUFFER_LINES:
+    case SHADER_BUFFER_LINES_LOOSE: {
+      return datatoc_common_subdiv_ibo_lines_comp_glsl;
+    }
+    case SHADER_BUFFER_EDGE_FAC: {
+      return datatoc_common_subdiv_vbo_edge_fac_comp_glsl;
+    }
+    case SHADER_BUFFER_LNOR: {
+      return datatoc_common_subdiv_vbo_lnor_comp_glsl;
+    }
+    case SHADER_BUFFER_TRIS:
+    case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: {
+      return datatoc_common_subdiv_ibo_tris_comp_glsl;
+    }
+    case SHADER_BUFFER_NORMALS_ACCUMULATE: {
+      return datatoc_common_subdiv_normals_accumulate_comp_glsl;
+    }
+    case SHADER_BUFFER_NORMALS_FINALIZE: {
+      return datatoc_common_subdiv_normals_finalize_comp_glsl;
+    }
+    case SHADER_PATCH_EVALUATION:
+    case SHADER_PATCH_EVALUATION_LIMIT_NORMALS:
+    case SHADER_PATCH_EVALUATION_FVAR:
+    case SHADER_PATCH_EVALUATION_FACE_DOTS: {
+      return datatoc_common_subdiv_patch_evaluation_comp_glsl;
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_1D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_2D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_3D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_4D: {
+      return datatoc_common_subdiv_custom_data_interp_comp_glsl;
+    }
+    case SHADER_BUFFER_SCULPT_DATA: {
+      return datatoc_common_subdiv_vbo_sculpt_data_comp_glsl;
+    }
+    case SHADER_BUFFER_UV_STRETCH_ANGLE: {
+      return datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl;
+    }
+    case SHADER_BUFFER_UV_STRETCH_AREA: {
+      return datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl;
+    }
+  }
+  return nullptr;
+}
+
+static const char *get_shader_name(int shader_type)
+{
+  switch (shader_type) {
+    case SHADER_BUFFER_LINES: {
+      return "subdiv lines build";
+    }
+    case SHADER_BUFFER_LINES_LOOSE: {
+      return "subdiv lines loose build";
+    }
+    case SHADER_BUFFER_LNOR: {
+      return "subdiv lnor build";
+    }
+    case SHADER_BUFFER_EDGE_FAC: {
+      return "subdiv edge fac build";
+    }
+    case SHADER_BUFFER_TRIS:
+    case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: {
+      return "subdiv tris";
+    }
+    case SHADER_BUFFER_NORMALS_ACCUMULATE: {
+      return "subdiv normals accumulate";
+    }
+    case SHADER_BUFFER_NORMALS_FINALIZE: {
+      return "subdiv normals finalize";
+    }
+    case SHADER_PATCH_EVALUATION: {
+      return "subdiv patch evaluation";
+    }
+    case SHADER_PATCH_EVALUATION_LIMIT_NORMALS: {
+      return "subdiv patch evaluation limit normals";
+    }
+    case SHADER_PATCH_EVALUATION_FVAR: {
+      return "subdiv patch evaluation face-varying";
+    }
+    case SHADER_PATCH_EVALUATION_FACE_DOTS: {
+      return "subdiv patch evaluation face dots";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_1D: {
+      return "subdiv custom data interp 1D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_2D: {
+      return "subdiv custom data interp 2D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_3D: {
+      return "subdiv custom data interp 3D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_4D: {
+      return "subdiv custom data interp 4D";
+    }
+    case SHADER_BUFFER_SCULPT_DATA: {
+      return "subdiv sculpt data";
+    }
+    case SHADER_BUFFER_UV_STRETCH_ANGLE: {
+      return "subdiv uv stretch angle";
+    }
+    case SHADER_BUFFER_UV_STRETCH_AREA: {
+      return "subdiv uv stretch area";
+    }
+  }
+  return nullptr;
+}
+
+static GPUShader *get_patch_evaluation_shader(int shader_type)
+{
+  if (g_subdiv_shaders[shader_type] == nullptr) {
+    const char *compute_code = get_shader_code(shader_type);
+
+    const char *defines = nullptr;
+    if (shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define LIMIT_NORMALS\n";
+    }
+    else if (shader_type == SHADER_PATCH_EVALUATION_FVAR) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define FVAR_EVALUATION\n";
+    }
+    else if (shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define FDOTS_EVALUATION\n";
+    }
+    else {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
+    }
+
+    /* Merge OpenSubdiv library code with our own library code. */
+    const char *patch_basis_source = openSubdiv_getGLSLPatchBasisSource();
+    const char *subdiv_lib_code = datatoc_common_subdiv_lib_glsl;
+    char *library_code = static_cast<char *>(
+        MEM_mallocN(strlen(patch_basis_source) + strlen(subdiv_lib_code) + 1,
+                    "subdiv patch evaluation library code"));
+    library_code[0] = '\0';
+    strcat(library_code, patch_basis_source);
+    strcat(library_code, subdiv_lib_code);
+
+    g_subdiv_shaders[shader_type] = GPU_shader_create_compute(
+        compute_code, library_code, defines, get_shader_name(shader_type));
+
+    MEM_freeN(library_code);
+  }
+
+  return g_subdiv_shaders[shader_type];
+}
+
+static GPUShader *get_subdiv_shader(int shader_type, const char *defines)
+{
+  if (shader_type == SHADER_PATCH_EVALUATION ||
+      shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS ||
+      shader_type == SHADER_PATCH_EVALUATION_FVAR ||
+      shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) {
+    return get_patch_evaluation_shader(shader_type);
+  }
+  if (g_subdiv_shaders[shader_type] == nullptr) {
+    const char *compute_code = get_shader_code(shader_type);
+    g_subdiv_shaders[shader_type] = GPU_shader_create_compute(
+        compute_code, datatoc_common_subdiv_lib_glsl, defines, get_shader_name(shader_type));
+  }
+  return g_subdiv_shaders[shader_type];
+}
+
+/* -------------------------------------------------------------------- */
+/** Vertex formats used for data transfer from OpenSubdiv, and for data processing on our side.
+ * \{ */
+
+static GPUVertFormat *get_uvs_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "uvs", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+/* Vertex format for `OpenSubdiv::Osd::PatchArray`. */
+static GPUVertFormat *get_patch_array_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "regDesc", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "desc", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "numPatches", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "indexBase", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "stride", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "primitiveIdBase", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format used for the `PatchTable::PatchHandle`. */
+static GPUVertFormat *get_patch_handle_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "vertex_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "array_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "patch_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format used for the quad-tree nodes of the PatchMap. */
+static GPUVertFormat *get_quadtree_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "child", GPU_COMP_U32, 4, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format for `OpenSubdiv::Osd::PatchParam`, not really used, it is only for making sure
+ * that the #GPUVertBuf used to wrap the OpenSubdiv patch param buffer is valid. */
+static GPUVertFormat *get_patch_param_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "data", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+/* Vertex format for the patches' vertices index buffer. */
+static GPUVertFormat *get_patch_index_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "data", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format for the OpenSubdiv vertex buffer. */
+static GPUVertFormat *get_subdiv_vertex_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* We use 4 components for the vectors to account for padding in the compute shaders, where
+     * vec3 is promoted to vec4. */
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+typedef struct CompressedPatchCoord {
+  int ptex_face_index;
+  /* UV coordinate encoded as u << 16 | v, where u and v are quantized on 16-bits. */
+  unsigned int encoded_uv;
+} CompressedPatchCoord;
+
+MINLINE CompressedPatchCoord make_patch_coord(int ptex_face_index, float u, float v)
+{
+  CompressedPatchCoord patch_coord = {
+      ptex_face_index,
+      (static_cast<unsigned int>(u * 65535.0f) << 16) | static_cast<unsigned int>(v * 65535.0f),
+  };
+  return patch_coord;
+}
+
+/* Vertex format used for the #CompressedPatchCoord. */
+static GPUVertFormat *get_blender_patch_coords_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* WARNING! Adjust #CompressedPatchCoord accordingly. */
+    GPU_vertformat_attr_add(&format, "ptex_face_index", GPU_COMP_U32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "uv", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+static GPUVertFormat *get_origindex_format(void)
+{
+  static GPUVertFormat format;
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "color", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities to initialize a OpenSubdiv_Buffer for a GPUVertBuf.
+ * \{ */
+
+static void vertbuf_bind_gpu(const OpenSubdiv_Buffer *buffer)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(buffer->data);
+  GPU_vertbuf_use(verts);
+}
+
+static void *vertbuf_alloc(const OpenSubdiv_Buffer *interface, const uint len)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_data_alloc(verts, len);
+  return GPU_vertbuf_get_data(verts);
+}
+
+static void vertbuf_device_alloc(const OpenSubdiv_Buffer *interface, const uint len)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  /* This assumes that GPU_USAGE_DEVICE_ONLY was used, which won't allocate host memory. */
+  // BLI_assert(GPU_vertbuf_get_usage(verts) == GPU_USAGE_DEVICE_ONLY);
+  GPU_vertbuf_data_alloc(verts, len);
+}
+
+static void vertbuf_wrap_device_handle(const OpenSubdiv_Buffer *interface, uint64_t handle)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_wrap_handle(verts, handle);
+}
+
+static void vertbuf_update_data(const OpenSubdiv_Buffer *interface,
+                                uint start,
+                                uint len,
+                                const void *data)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_update_sub(verts, start, len, data);
+}
+
+static void opensubdiv_gpu_buffer_init(OpenSubdiv_Buffer *buffer_interface, GPUVertBuf *vertbuf)
+{
+  buffer_interface->data = vertbuf;
+  buffer_interface->bind_gpu = vertbuf_bind_gpu;
+  buffer_interface->buffer_offset = 0;
+  buffer_interface->wrap_device_handle = vertbuf_wrap_device_handle;
+  buffer_interface->alloc = vertbuf_alloc;
+  buffer_interface->device_alloc = vertbuf_device_alloc;
+  buffer_interface->device_update = vertbuf_update_data;
+}
+
+static GPUVertBuf *create_buffer_and_interface(OpenSubdiv_Buffer *interface, GPUVertFormat *format)
+{
+  GPUVertBuf *buffer = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(buffer, format, GPU_USAGE_DEVICE_ONLY);
+  opensubdiv_gpu_buffer_init(interface, buffer);
+  return buffer;
+}
+
+/** \} */
+
+// --------------------------------------------------------
+
+static uint tris_count_from_number_of_loops(const uint number_of_loops)
+{
+  const uint32_t number_of_quads = number_of_loops / 4;
+  return number_of_quads * 2;
+}
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities to build a GPUVertBuf from an origindex buffer.
+ * \{ */
+
+void draw_subdiv_init_origindex_buffer(GPUVertBuf *buffer,
+                                       int *vert_origindex,
+                                       uint num_loops,
+                                       uint loose_len)
+{
+  GPU_vertbuf_init_with_format_ex(buffer, get_origindex_format(), GPU_USAGE_STATIC);
+  GPU_vertbuf_data_alloc(buffer, num_loops + loose_len);
+
+  int *vbo_data = (int *)GPU_vertbuf_get_data(buffer);
+  memcpy(vbo_data, vert_origindex, num_loops * sizeof(int));
+}
+
+GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops)
+{
+  GPUVertBuf *buffer = GPU_vertbuf_calloc();
+  draw_subdiv_init_origindex_buffer(buffer, vert_origindex, num_loops, 0);
+  return buffer;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities for DRWPatchMap.
+ * \{ */
+
+static void draw_patch_map_build(DRWPatchMap *gpu_patch_map, Subdiv *subdiv)
+{
+  GPUVertBuf *patch_map_handles = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(patch_map_handles, get_patch_handle_format(), GPU_USAGE_STATIC);
+
+  GPUVertBuf *patch_map_quadtree = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(patch_map_quadtree, get_quadtree_format(), GPU_USAGE_STATIC);
+
+  OpenSubdiv_Buffer patch_map_handles_interface;
+  opensubdiv_gpu_buffer_init(&patch_map_handles_interface, patch_map_handles);
+
+  OpenSubdiv_Buffer patch_map_quad_tree_interface;
+  opensubdiv_gpu_buffer_init(&patch_map_quad_tree_interface, patch_map_quadtree);
+
+  int min_patch_face = 0;
+  int max_patch_face = 0;
+  int max_depth = 0;
+  int patches_are_triangular = 0;
+
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+  evaluator->getPatchMap(evaluator,
+                         &patch_map_handles_interface,
+                         &patch_map_quad_tree_interface,
+                         &min_patch_face,
+                         &max_patch_face,
+                         &max_depth,
+                         &patches_are_triangular);
+
+  gpu_patch_map->patch_map_handles = patch_map_handles;
+  gpu_patch_map->patch_map_quadtree = patch_map_quadtree;
+  gpu_patch_map->min_patch_face = min_patch_face;
+  gpu_patch_map->max_patch_face = max_patch_face;
+  gpu_patch_map->max_depth = max_depth;
+  gpu_patch_map->patches_are_triangular = patches_are_triangular;
+}
+
+static void draw_patch_map_free(DRWPatchMap *gpu_patch_map)
+{
+  GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_handles);
+  GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_quadtree);
+  gpu_patch_map->min_patch_face = 0;
+  gpu_patch_map->max_patch_face = 0;
+  gpu_patch_map->max_depth = 0;
+  gpu_patch_map->patches_are_triangular = 0;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivCache
+ * \{ */
+
+static void draw_subdiv_cache_free_material_data(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->polygon_mat_offset);
+  MEM_SAFE_FREE(cache->mat_start);
+  MEM_SAFE_FREE(cache->mat_end);
+}
+
+static void draw_subdiv_free_edit_mode_cache(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->verts_orig_index);
+  GPU_VERTBUF_DISCARD_SAFE(cache->edges_orig_index);
+  GPU_VERTBUF_DISCARD_SAFE(cache->fdots_patch_coords);
+}
+
+void draw_subdiv_cache_free(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->patch_coords);
+  GPU_VERTBUF_DISCARD_SAFE(cache->face_ptex_offset_buffer);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_polygon_offset_buffer);
+  GPU_VERTBUF_DISCARD_SAFE(cache->extra_coarse_face_data);
+  MEM_SAFE_FREE(cache->subdiv_loop_subdiv_vert_index);
+  MEM_SAFE_FREE(cache->subdiv_loop_poly_index);
+  MEM_SAFE_FREE(cache->point_indices);
+  MEM_SAFE_FREE(cache->subdiv_polygon_offset);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency_offsets);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency);
+  cache->resolution = 0;
+  cache->num_subdiv_loops = 0;
+  cache->num_coarse_poly = 0;
+  cache->num_subdiv_quads = 0;
+  draw_subdiv_free_edit_mode_cache(cache);
+  draw_subdiv_cache_free_material_data(cache);
+  draw_patch_map_free(&cache->gpu_patch_map);
+  if (cache->ubo) {
+    GPU_uniformbuf_free(cache->ubo);
+    cache->ubo = nullptr;
+  }
+}
+
+/* Flags used in #DRWSubdivCache.extra_coarse_face_data. The flags are packed in the upper bits of
+ * each uint (one per coarse face), #SUBDIV_COARSE_FACE_FLAG_OFFSET tells where they are in the
+ * packed bits. */
+#define SUBDIV_COARSE_FACE_FLAG_SMOOTH 1u
+#define SUBDIV_COARSE_FACE_FLAG_SELECT 2u
+#define SUBDIV_COARSE_FACE_FLAG_ACTIVE 4u
+
+#define SUBDIV_COARSE_FACE_FLAG_OFFSET 29u
+
+#define SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_SMOOTH << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+#define SUBDIV_COARSE_FACE_FLAG_SELECT_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_SELECT << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+#define SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_ACTIVE << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+
+#define SUBDIV_COARSE_FACE_LOOP_START_MASK \
+  ~((SUBDIV_COARSE_FACE_FLAG_SMOOTH | SUBDIV_COARSE_FACE_FLAG_SELECT | \
+     SUBDIV_COARSE_FACE_FLAG_ACTIVE) \
+    << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+
+static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cache, Mesh *mesh)
+{
+  if (cache->extra_coarse_face_data == nullptr) {
+    cache->extra_coarse_face_data = GPU_vertbuf_calloc();
+    static GPUVertFormat format;
+    if (format.attr_len == 0) {
+      GPU_vertformat_attr_add(&format, "data", GPU_COMP_U32, 1, GPU_FETCH_INT);
+    }
+    GPU_vertbuf_init_with_format_ex(cache->extra_coarse_face_data, &format, GPU_USAGE_DYNAMIC);
+    GPU_vertbuf_data_alloc(cache->extra_coarse_face_data, mesh->totpoly);
+  }
+
+  uint32_t *flags_data = (uint32_t *)(GPU_vertbuf_get_data(cache->extra_coarse_face_data));
+
+  if (cache->bm) {
+    BMesh *bm = cache->bm;
+    BMFace *f;
+    BMIter iter;
+
+    /* Ensure all current elements follow new customdata layout. */
+    BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
+      const int index = BM_elem_index_get(f);
+      uint32_t flag = 0;
+      if (BM_elem_flag_test(f, BM_ELEM_SMOOTH)) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH;
+      }
+      if (BM_elem_flag_test(f, BM_ELEM_SELECT)) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_SELECT;
+      }
+      if (f == bm->act_face) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_ACTIVE;
+      }
+      const int loopstart = BM_elem_index_get(f->l_first);
+      flags_data[index] = (uint)(loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    }
+  }
+  else {
+    for (int i = 0; i < mesh->totpoly; i++) {
+      uint32_t flag = 0;
+      if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) {
+        flag = SUBDIV_COARSE_FACE_FLAG_SMOOTH;
+      }
+      flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    }
+  }
+
+  /* Make sure updated data is re-uploaded. */
+  GPU_vertbuf_tag_dirty(cache->extra_coarse_face_data);
+}
+
+static DRWSubdivCache *mesh_batch_cache_ensure_subdiv_cache(MeshBatchCache *mbc)
+{
+  DRWSubdivCache *subdiv_cache = mbc->subdiv_cache;
+  if (subdiv_cache == nullptr) {
+    subdiv_cache = static_cast<DRWSubdivCache *>(
+        MEM_callocN(sizeof(DRWSubdivCache), "DRWSubdivCache"));
+  }
+  mbc->subdiv_cache = subdiv_cache;
+  return subdiv_cache;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Subdivision grid traversal.
+ *
+ * Traverse the uniform subdivision grid over coarse faces and gather useful information for
+ * building the draw buffers on the GPU. We primarily gather the patch coordinates for all
+ * subdivision faces, as well as the original coarse indices for each subdivision element (vertex,
+ * face, or edge) which directly maps to its coarse counterpart (note that all subdivision faces
+ * map to a coarse face). This information will then be cached in #DRWSubdivCache for subsequent
+ * reevaluations, as long as the topology does not change.
+ * \{ */
+
+typedef struct DRWCacheBuildingContext {
+  const Mesh *coarse_mesh;
+  const SubdivToMeshSettings *settings;
+
+  DRWSubdivCache *cache;
+
+  /* Pointers into DRWSubdivCache buffers for easier access during traversal. */
+  CompressedPatchCoord *patch_coords;
+  int *subdiv_loop_vert_index;
+  int *subdiv_loop_subdiv_vert_index;
+  int *subdiv_loop_edge_index;
+  int *subdiv_loop_poly_index;
+  int *point_indices;
+
+  /* Temporary buffers used during traversal. */
+  int *vert_origindex_map;
+  int *edge_origindex_map;
+
+  /* Origindex layers from the mesh to directly look up during traversal the origindex from the
+   * base mesh for edit data so that we do not have to handle yet another GPU buffer and do this in
+   * the shaders. */
+  int *v_origindex;
+  int *e_origindex;
+} DRWCacheBuildingContext;
+
+static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context,
+                                         const int num_vertices,
+                                         const int num_edges,
+                                         const int num_loops,
+                                         const int num_polygons,
+                                         const int *subdiv_polygon_offset)
+{
+  if (num_loops == 0) {
+    return false;
+  }
+
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  DRWSubdivCache *cache = ctx->cache;
+
+  /* Set topology information. */
+  cache->num_subdiv_edges = (uint)num_edges;
+  cache->num_subdiv_loops = (uint)num_loops;
+  cache->num_subdiv_verts = (uint)num_vertices;
+  cache->num_subdiv_quads = (uint)num_polygons;
+  cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset));
+
+  /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after
+   * it was sent to the device, since we may use the data while building other buffers on the CPU
+   * side. */
+  cache->patch_coords = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->patch_coords, get_blender_patch_coords_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->patch_coords, cache->num_subdiv_loops);
+
+  cache->verts_orig_index = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->verts_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->verts_orig_index, cache->num_subdiv_loops);
+
+  cache->edges_orig_index = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->edges_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->edges_orig_index, cache->num_subdiv_loops);
+
+  cache->subdiv_loop_subdiv_vert_index = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_subdiv_vert_index"));
+
+  cache->subdiv_loop_poly_index = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_poly_index"));
+
+  cache->point_indices = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "point_indices"));
+  for (int i = 0; i < num_vertices; i++) {
+    cache->point_indices[i] = -1;
+  }
+
+  /* Initialize context pointers and temporary buffers. */
+  ctx->patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(cache->patch_coords);
+  ctx->subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(cache->verts_orig_index);
+  ctx->subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(cache->edges_orig_index);
+  ctx->subdiv_loop_subdiv_vert_index = cache->subdiv_loop_subdiv_vert_index;
+  ctx->subdiv_loop_poly_index = cache->subdiv_loop_poly_index;
+  ctx->point_indices = cache->point_indices;
+
+  ctx->v_origindex = static_cast<int *>(
+      CustomData_get_layer(&ctx->coarse_mesh->vdata, CD_ORIGINDEX));
+
+  ctx->e_origindex = static_cast<int *>(
+      CustomData_get_layer(&ctx->coarse_mesh->edata, CD_ORIGINDEX));
+
+  ctx->vert_origindex_map = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map"));
+  for (int i = 0; i < num_vertices; i++) {
+    ctx->vert_origindex_map[i] = -1;
+  }
+
+  ctx->edge_origindex_map = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_edges * sizeof(int), "subdiv_edge_origindex_map"));
+  for (int i = 0; i < num_edges; i++) {
+    ctx->edge_origindex_map[i] = -1;
+  }
+
+  return true;
+}
+
+static void draw_subdiv_vertex_corner_cb(const SubdivForeachContext *foreach_context,
+                                         void *UNUSED(tls),
+                                         const int UNUSED(ptex_face_index),
+                                         const float UNUSED(u),
+                                         const float UNUSED(v),
+                                         const int coarse_vertex_index,
+                                         const int UNUSED(coarse_poly_index),
+                                         const int UNUSED(coarse_corner),
+                                         const int subdiv_vertex_index)
+{
+  BLI_assert(coarse_vertex_index != ORIGINDEX_NONE);
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  ctx->vert_origindex_map[subdiv_vertex_index] = coarse_vertex_index;
+}
+
+static void draw_subdiv_vertex_edge_cb(const SubdivForeachContext *UNUSED(foreach_context),
+                                       void *UNUSED(tls_v),
+                                       const int UNUSED(ptex_face_index),
+                                       const float UNUSED(u),
+                                       const float UNUSED(v),
+                                       const int UNUSED(coarse_edge_index),
+                                       const int UNUSED(coarse_poly_index),
+                                       const int UNUSED(coarse_corner),
+                                       const int UNUSED(subdiv_vertex_index))
+{
+  /* Required if SubdivForeachContext.vertex_corner is also set. */
+}
+
+static void draw_subdiv_edge_cb(const SubdivForeachContext *foreach_context,
+                                void *UNUSED(tls),
+                                const int coarse_edge_index,
+                                const int subdiv_edge_index,
+                                const int UNUSED(subdiv_v1),
+                                const int UNUSED(subdiv_v2))
+{
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+
+  int coarse_index = coarse_edge_index;
+
+  if (coarse_index != -1) {
+    if (ctx->e_origindex) {
+      coarse_index = ctx->e_origindex[coarse_index];
+    }
+  }
+
+  ctx->edge_origindex_map[subdiv_edge_index] = coarse_index;
+}
+
+static void draw_subdiv_loop_cb(const SubdivForeachContext *foreach_context,
+                                void *UNUSED(tls_v),
+                                const int ptex_face_index,
+                                const float u,
+                                const float v,
+                                const int UNUSED(coarse_loop_index),
+                                const int coarse_poly_index,
+                                const int UNUSED(coarse_corner),
+                                const int subdiv_loop_index,
+                                const int subdiv_vertex_index,
+                                const int subdiv_edge_index)
+{
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  ctx->patch_coords[subdiv_loop_index] = make_patch_coord(ptex_face_index, u, v);
+
+  int coarse_vertex_index = ctx->vert_origindex_map[subdiv_vertex_index];
+
+  if (coarse_vertex_index != -1) {
+    if (ctx->v_origindex) {
+      coarse_vertex_index = ctx->v_origindex[coarse_vertex_index];
+    }
+
+    /* Double check as vorigindex may have modified the index. */
+    if (coarse_vertex_index != -1) {
+      ctx->point_indices[coarse_vertex_index] = subdiv_loop_index;
+    }
+  }
+
+  ctx->subdiv_loop_subdiv_vert_index[subdiv_loop_index] = subdiv_vertex_index;
+  /* For now index the subdiv_edge_index, it will be replaced by the actual coarse edge index
+   * at the end of the traversal as some edges are only then traversed. */
+  ctx->subdiv_loop_edge_index[subdiv_loop_index] = subdiv_edge_index;
+  ctx->subdiv_loop_poly_index[subdiv_loop_index] = coarse_poly_index;
+  ctx->subdiv_loop_vert_index[subdiv_loop_index] = coarse_vertex_index;
+}
+
+static void draw_subdiv_foreach_callbacks(SubdivForeachContext *foreach_context)
+{
+  memset(foreach_context, 0, sizeof(*foreach_context));
+  foreach_context->topology_info = draw_subdiv_topology_info_cb;
+  foreach_context->loop = draw_subdiv_loop_cb;
+  foreach_context->edge = draw_subdiv_edge_cb;
+  foreach_context->vertex_corner = draw_subdiv_vertex_corner_cb;
+  foreach_context->vertex_edge = draw_subdiv_vertex_edge_cb;
+}
+
+static void do_subdiv_traversal(DRWCacheBuildingContext *cache_building_context, Subdiv *subdiv)
+{
+  SubdivForeachContext foreach_context;
+  draw_subdiv_foreach_callbacks(&foreach_context);
+  foreach_context.user_data = cache_building_context;
+
+  BKE_subdiv_foreach_subdiv_geometry(subdiv,
+                                     &foreach_context,
+                                     cache_building_context->settings,
+                                     cache_building_context->coarse_mesh);
+
+  /* Now that traversal is done, we can set up the right original indices for the loop-to-edge map.
+   */
+  for (int i = 0; i < cache_building_context->cache->num_subdiv_loops; i++) {
+    cache_building_context->subdiv_loop_edge_index[i] =
+        cache_building_context
+            ->edge_origindex_map[cache_building_context->subdiv_loop_edge_index[i]];
+  }
+}
+
+static GPUVertBuf *gpu_vertbuf_create_from_format(GPUVertFormat *format, uint len)
+{
+  GPUVertBuf *verts = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(verts, format);
+  GPU_vertbuf_data_alloc(verts, len);
+  return verts;
+}
+
+/* Build maps to hold enough information to tell which face is adjacent to which vertex; those will
+ * be used for computing normals if limit surfaces are unavailable. */
+static void build_vertex_face_adjacency_maps(DRWSubdivCache *cache)
+{
+  /* +1 so that we do not require a special case for the last vertex, this extra offset will
+   * contain the total number of adjacent faces. */
+  cache->subdiv_vertex_face_adjacency_offsets = gpu_vertbuf_create_from_format(
+      get_origindex_format(), cache->num_subdiv_verts + 1);
+
+  int *vertex_offsets = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency_offsets);
+  memset(vertex_offsets, 0, sizeof(int) * cache->num_subdiv_verts + 1);
+
+  for (int i = 0; i < cache->num_subdiv_loops; i++) {
+    vertex_offsets[cache->subdiv_loop_subdiv_vert_index[i]]++;
+  }
+
+  int ofs = vertex_offsets[0];
+  vertex_offsets[0] = 0;
+  for (uint i = 1; i < cache->num_subdiv_verts + 1; i++) {
+    int tmp = vertex_offsets[i];
+    vertex_offsets[i] = ofs;
+    ofs += tmp;
+  }
+
+  cache->subdiv_vertex_face_adjacency = gpu_vertbuf_create_from_format(get_origindex_format(),
+                                                                       cache->num_subdiv_loops);
+  int *adjacent_faces = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency);
+  int *tmp_set_faces = static_cast<int *>(
+      MEM_callocN(sizeof(int) * cache->num_subdiv_verts, "tmp subdiv vertex offset"));
+
+  for (int i = 0; i < cache->num_subdiv_loops / 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      const int subdiv_vertex = cache->subdiv_loop_subdiv_vert_index[i * 4 + j];
+      int first_face_offset = vertex_offsets[subdiv_vertex] + tmp_set_faces[subdiv_vertex];
+      adjacent_faces[first_face_offset] = i;
+      tmp_set_faces[subdiv_vertex] += 1;
+    }
+  }
+
+  MEM_freeN(tmp_set_faces);
+}
+
+static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
+                                    Subdiv *subdiv,
+                                    Mesh *mesh_eval,
+                                    const Scene *scene,
+                                    const SubsurfModifierData *smd,
+                                    const bool is_final_render)
+{
+  const int level = get_render_subsurf_level(&scene->r, smd->levels, is_final_render);
+  SubdivToMeshSettings to_mesh_settings;
+  to_mesh_settings.resolution = (1 << level) + 1;
+  to_mesh_settings.use_optimal_display = false;
+
+  if (cache->resolution != to_mesh_settings.resolution) {
+    /* Resolution changed, we need to rebuild, free any existing cached data. */
+    draw_subdiv_cache_free(cache);
+  }
+
+  /* If the resolution between the cache and the settings match for some reason, check if the patch
+   * coordinates were not already generated. Those coordinates are specific to the resolution, so
+   * they should be null either after initialization, or after freeing if the resolution (or some
+   * other subdivision setting) changed.
+   */
+  if (cache->patch_coords != nullptr) {
+    return true;
+  }
+
+  DRWCacheBuildingContext cache_building_context;
+  cache_building_context.coarse_mesh = mesh_eval;
+  cache_building_context.settings = &to_mesh_settings;
+  cache_building_context.cache = cache;
+
+  do_subdiv_traversal(&cache_building_context, subdiv);
+  if (cache->num_subdiv_loops == 0) {
+    /* Either the traversal failed, or we have an empty mesh, either way we cannot go any further.
+     * The subdiv_polygon_offset cannot then be reliably stored in the cache, so free it directly.
+     */
+    MEM_SAFE_FREE(cache->subdiv_polygon_offset);
+    return false;
+  }
+
+  /* Build buffers for the PatchMap. */
+  draw_patch_map_build(&cache->gpu_patch_map, subdiv);
+
+  cache->face_ptex_offset = BKE_subdiv_face_ptex_offset_get(subdiv);
+
+  // Build patch coordinates for all the face dots
+  cache->fdots_patch_coords = gpu_vertbuf_create_from_format(get_blender_patch_coords_format(),
+                                                             mesh_eval->totpoly);
+  CompressedPatchCoord *blender_fdots_patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(
+      cache->fdots_patch_coords);
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const int ptex_face_index = cache->face_ptex_offset[i];
+    if (mesh_eval->mpoly[i].totloop == 4) {
+      /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */
+      blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f);
+    }
+    else {
+      /* For N-gons, since they are split into quads from the center, and since the center is
+       * chosen to be the top right corner of each quad, the center coordinate of the coarse face
+       * is any one of those top right corners with `u = v = 1.0`. */
+      blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 1.0f, 1.0f);
+    }
+  }
+
+  cache->resolution = to_mesh_settings.resolution;
+
+  cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer(
+      cache->subdiv_polygon_offset, mesh_eval->totpoly);
+
+  cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset,
+                                                                      mesh_eval->totpoly + 1);
+  cache->num_coarse_poly = mesh_eval->totpoly;
+  cache->point_indices = cache_building_context.point_indices;
+
+  build_vertex_face_adjacency_maps(cache);
+
+  /* Cleanup. */
+  MEM_freeN(cache_building_context.vert_origindex_map);
+  MEM_freeN(cache_building_context.edge_origindex_map);
+
+  return true;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivUboStorage.
+ *
+ * Common uniforms for the various shaders.
+ * \{ */
+
+typedef struct DRWSubdivUboStorage {
+  /* Offsets in the buffers data where the source and destination data start. */
+  int src_offset;
+  int dst_offset;
+
+  /* Parameters for the DRWPatchMap. */
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+
+  /* Coarse topology information. */
+  int coarse_poly_count;
+  uint edge_loose_offset;
+
+  /* Refined topology information. */
+  uint num_subdiv_loops;
+
+  /* Subdivision settings, is int in C but bool in the GLSL code, as there, bools have the same
+   * size as ints, so we should use int in C to ensure that the size of the structure is what GLSL
+   * expects. */
+  int optimal_display;
+
+  /* The sculpt mask data layer may be null. */
+  int has_sculpt_mask;
+
+  /* Masks for the extra coarse face data. */
+  uint coarse_face_select_mask;
+  uint coarse_face_smooth_mask;
+  uint coarse_face_active_mask;
+  uint coarse_face_loopstart_mask;
+
+  /* Number of elements to process in the compute shader (can be the coarse quad count, or the
+   * final vertex count, depending on which compute pass we do). This is used to early out in case
+   * of out of bond accesses as compute dispatch are of fixed size. */
+  uint total_dispatch_size;
+} DRWSubdivUboStorage;
+
+static_assert((sizeof(DRWSubdivUboStorage) % 16) == 0,
+              "DRWSubdivUboStorage is not padded to a multiple of the size of vec4");
+
+static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache,
+                                         DRWSubdivUboStorage *ubo,
+                                         const int src_offset,
+                                         const int dst_offset,
+                                         const uint total_dispatch_size,
+                                         const bool has_sculpt_mask)
+{
+  ubo->src_offset = src_offset;
+  ubo->dst_offset = dst_offset;
+  ubo->min_patch_face = cache->gpu_patch_map.min_patch_face;
+  ubo->max_patch_face = cache->gpu_patch_map.max_patch_face;
+  ubo->max_depth = cache->gpu_patch_map.max_depth;
+  ubo->patches_are_triangular = cache->gpu_patch_map.patches_are_triangular;
+  ubo->coarse_poly_count = cache->num_coarse_poly;
+  ubo->optimal_display = cache->optimal_display;
+  ubo->num_subdiv_loops = cache->num_subdiv_loops;
+  ubo->edge_loose_offset = cache->num_subdiv_loops * 2;
+  ubo->has_sculpt_mask = has_sculpt_mask;
+  ubo->coarse_face_smooth_mask = SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK;
+  ubo->coarse_face_select_mask = SUBDIV_COARSE_FACE_FLAG_SELECT_MASK;
+  ubo->coarse_face_active_mask = SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK;
+  ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK;
+  ubo->total_dispatch_size = total_dispatch_size;
+}
+
+static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache,
+                                            GPUShader *shader,
+                                            const int src_offset,
+                                            const int dst_offset,
+                                            const uint total_dispatch_size,
+                                            const bool has_sculpt_mask = false)
+{
+  DRWSubdivUboStorage storage;
+  draw_subdiv_init_ubo_storage(
+      cache, &storage, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask);
+
+  if (!cache->ubo) {
+    const_cast<DRWSubdivCache *>(cache)->ubo = GPU_uniformbuf_create_ex(
+        sizeof(DRWSubdivUboStorage), &storage, "DRWSubdivUboStorage");
+  }
+
+  GPU_uniformbuf_update(cache->ubo, &storage);
+
+  const int location = GPU_shader_get_uniform_block(shader, "shader_data");
+  GPU_uniformbuf_bind(cache->ubo, location);
+}
+
+/** \} */
+
+// --------------------------------------------------------
+
+#define SUBDIV_LOCAL_WORK_GROUP_SIZE 64
+static uint get_dispatch_size(uint elements)
+{
+  return divide_ceil_u(elements, SUBDIV_LOCAL_WORK_GROUP_SIZE);
+}
+
+/* Helper to ensure that the UBO is always initalized before dispatching computes and that the same
+ * number of elements that need to be processed is used for the UBO and the dispatch size.
+ * Use this instead of a raw call to #GPU_compute_dispatch. */
+static void drw_subdiv_compute_dispatch(const DRWSubdivCache *cache,
+                                        GPUShader *shader,
+                                        const int src_offset,
+                                        const int dst_offset,
+                                        uint total_dispatch_size,
+                                        const bool has_sculpt_mask = false)
+{
+  const uint max_res_x = static_cast<uint>(GPU_max_work_group_count(0));
+
+  const uint dispatch_size = get_dispatch_size(total_dispatch_size);
+  uint dispatch_rx = dispatch_size;
+  uint dispatch_ry = 1u;
+  if (dispatch_rx > max_res_x) {
+    /* Since there are some limitations with regards to the maximum work group size (could be as
+     * low as 64k elements per call), we split the number elements into a "2d" number, with the
+     * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum
+     * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements
+     * total, which should be enough. If not, we could also use the 3rd dimension. */
+    /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and
+     * get the smallest rect fitting the requirements. */
+    dispatch_rx = dispatch_ry = ceilf(sqrtf(dispatch_size));
+    /* Avoid a completely empty dispatch line caused by rounding. */
+    if ((dispatch_rx * (dispatch_ry - 1)) >= dispatch_size) {
+      dispatch_ry -= 1;
+    }
+  }
+
+  /* X and Y dimensions may have different limits so the above computation may not be right, but
+   * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore,
+   * we presume it all fits. */
+  BLI_assert(dispatch_ry < static_cast<uint>(GPU_max_work_group_count(1)));
+
+  draw_subdiv_ubo_update_and_bind(
+      cache, shader, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask);
+
+  GPU_compute_dispatch(shader, dispatch_rx, dispatch_ry, 1);
+}
+
+void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache,
+                                 GPUVertBuf *pos_nor,
+                                 const bool do_limit_normals)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface,
+                                                       get_subdiv_vertex_format());
+  evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(
+      do_limit_normals ? SHADER_PATCH_EVALUATION_LIMIT_NORMALS : SHADER_PATCH_EVALUATION);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 8);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_extract_uvs(const DRWSubdivCache *cache,
+                             GPUVertBuf *uvs,
+                             const int face_varying_channel,
+                             const int dst_offset)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface, get_uvs_format());
+  evaluator->wrapFVarSrcBuffer(evaluator, face_varying_channel, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  evaluator->fillFVarPatchArraysBuffer(
+      evaluator, face_varying_channel, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapFVarPatchIndexBuffer(
+      evaluator, face_varying_channel, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapFVarPatchParamBuffer(
+      evaluator, face_varying_channel, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FVAR);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(uvs, 8);
+
+  /* The buffer offset has the stride baked in (which is 2 as we have UVs) so remove the stride by
+   * dividing by 2 */
+  const int src_offset = src_buffer_interface.buffer_offset / 2;
+  drw_subdiv_compute_dispatch(cache, shader, src_offset, dst_offset, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array.
+   * Since it may also be used for computing UV stretches, we also need a barrier on the shader
+   * storage. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_SHADER_STORAGE);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
+                                    GPUVertBuf *src_data,
+                                    GPUVertBuf *dst_data,
+                                    int dimensions,
+                                    int dst_offset)
+{
+  GPUShader *shader = nullptr;
+
+  if (dimensions == 1) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 1\n");
+  }
+  else if (dimensions == 2) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_2D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 2\n");
+  }
+  else if (dimensions == 3) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_3D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 3\n");
+  }
+  else if (dimensions == 4) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_4D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 4\n"
+                               "#define GPU_FETCH_U16_TO_FLOAT\n");
+  }
+  else {
+    /* Crash if dimensions are not supported. */
+  }
+
+  GPU_shader_bind(shader);
+
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(src_data, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->face_ptex_offset_buffer, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 4);
+  GPU_vertbuf_bind_as_ssbo(dst_data, 5);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, dst_offset, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache,
+                                          GPUVertBuf *mask_vbo,
+                                          GPUVertBuf *face_set_vbo,
+                                          GPUVertBuf *sculpt_data)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_SCULPT_DATA, nullptr);
+  GPU_shader_bind(shader);
+
+  if (mask_vbo) {
+    GPU_vertbuf_bind_as_ssbo(mask_vbo, 0);
+  }
+
+  GPU_vertbuf_bind_as_ssbo(face_set_vbo, 1);
+  GPU_vertbuf_bind_as_ssbo(sculpt_data, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads, mask_vbo != nullptr);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache,
+                                    GPUVertBuf *pos_nor,
+                                    GPUVertBuf *face_adjacency_offsets,
+                                    GPUVertBuf *face_adjacency_lists,
+                                    GPUVertBuf *vertex_normals)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_ACCUMULATE, nullptr);
+  GPU_shader_bind(shader);
+
+  int binding_point = 0;
+
+  GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(face_adjacency_offsets, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(face_adjacency_lists, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_verts);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_finalize_normals(const DRWSubdivCache *cache,
+                                  GPUVertBuf *vertex_normals,
+                                  GPUVertBuf *subdiv_loop_subdiv_vert_index,
+                                  GPUVertBuf *pos_nor)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_FINALIZE, nullptr);
+  GPU_shader_bind(shader);
+
+  int binding_point = 0;
+  GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(subdiv_loop_subdiv_vert_index, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache,
+                                   GPUIndexBuf *subdiv_tris,
+                                   const int material_count)
+{
+  const bool do_single_material = material_count <= 1;
+
+  const char *defines = "#define SUBDIV_POLYGON_OFFSET\n";
+  if (do_single_material) {
+    defines =
+        "#define SUBDIV_POLYGON_OFFSET\n"
+        "#define SINGLE_MATERIAL\n";
+  }
+
+  GPUShader *shader = get_subdiv_shader(
+      do_single_material ? SHADER_BUFFER_TRIS : SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS, defines);
+  GPU_shader_bind(shader);
+
+  /* Outputs */
+  GPU_indexbuf_bind_as_ssbo(subdiv_tris, 1);
+
+  if (!do_single_material) {
+    GPU_vertbuf_bind_as_ssbo(cache->polygon_mat_offset, 2);
+    /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+    GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+  }
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache,
+                                     GPUVertBuf *fdots_pos,
+                                     GPUVertBuf *fdots_nor,
+                                     GPUIndexBuf *fdots_indices)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface,
+                                                       get_subdiv_vertex_format());
+  evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  opensubdiv_gpu_buffer_init(&patch_arrays_buffer_interface, patch_arrays_buffer);
+  evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FACE_DOTS);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->fdots_patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(fdots_pos, 8);
+  GPU_vertbuf_bind_as_ssbo(fdots_nor, 9);
+  GPU_indexbuf_bind_as_ssbo(fdots_indices, 10);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 11);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_coarse_poly);
+
+  /* This generates two vertex buffers and an index buffer, so we need to put a barrier on the
+   * vertex attributes and element arrays. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache, GPUIndexBuf *lines_indices)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES, nullptr);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(cache->edges_orig_index, 0);
+  GPU_indexbuf_bind_as_ssbo(lines_indices, 1);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache,
+                                          GPUIndexBuf *lines_indices,
+                                          uint num_loose_edges)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES_LOOSE, "#define LINES_LOOSE\n");
+  GPU_shader_bind(shader);
+
+  GPU_indexbuf_bind_as_ssbo(lines_indices, 1);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, num_loose_edges);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache,
+                                       GPUVertBuf *pos_nor,
+                                       GPUVertBuf *edge_idx,
+                                       GPUVertBuf *edge_fac)
+{
+  /* No separate shader for the AMD driver case as we assume that the GPU will not change during
+   * the execution of the program. */
+  const char *defines = GPU_crappy_amd_driver() ? "#define GPU_AMD_DRIVER_BYTE_BUG\n" : nullptr;
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_EDGE_FAC, defines);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 0);
+  GPU_vertbuf_bind_as_ssbo(edge_idx, 1);
+  GPU_vertbuf_bind_as_ssbo(edge_fac, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache,
+                                   GPUVertBuf *pos_nor,
+                                   GPUVertBuf *lnor)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LNOR, "#define SUBDIV_POLYGON_OFFSET\n");
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 2);
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(lnor, 3);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache,
+                                                  GPUVertBuf *coarse_data,
+                                                  GPUVertBuf *subdiv_data)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_AREA,
+                                        "#define SUBDIV_POLYGON_OFFSET\n");
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(coarse_data, 1);
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(subdiv_data, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache,
+                                                   GPUVertBuf *pos_nor,
+                                                   GPUVertBuf *uvs,
+                                                   int uvs_offset,
+                                                   GPUVertBuf *stretch_angles)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_ANGLE, nullptr);
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 0);
+  GPU_vertbuf_bind_as_ssbo(uvs, 1);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(stretch_angles, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, uvs_offset, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+/* -------------------------------------------------------------------- */
+
+void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache,
+                                       MeshRenderData *mr,
+                                       const ToolSettings *toolsettings)
+{
+  Mesh *mesh = cache->mesh;
+
+  /* Setup required data for loose geometry. */
+  mr->me = mesh;
+  mr->medge = mesh->medge;
+  mr->mvert = mesh->mvert;
+  mr->mpoly = mesh->mpoly;
+  mr->mloop = mesh->mloop;
+  mr->vert_len = mesh->totvert;
+  mr->edge_len = mesh->totedge;
+  mr->poly_len = mesh->totpoly;
+  mr->loop_len = mesh->totloop;
+  mr->extract_type = MR_EXTRACT_MESH;
+
+  /* MeshRenderData is only used for generating edit mode data here. */
+  if (!cache->bm) {
+    return;
+  }
+
+  BMesh *bm = cache->bm;
+  BM_mesh_elem_table_ensure(bm, BM_EDGE | BM_FACE | BM_VERT);
+
+  mr->bm = bm;
+  mr->toolsettings = toolsettings;
+  mr->eed_act = BM_mesh_active_edge_get(bm);
+  mr->efa_act = BM_mesh_active_face_get(bm, false, true);
+  mr->eve_act = BM_mesh_active_vert_get(bm);
+  mr->crease_ofs = CustomData_get_offset(&bm->edata, CD_CREASE);
+  mr->bweight_ofs = CustomData_get_offset(&bm->edata, CD_BWEIGHT);
+#ifdef WITH_FREESTYLE
+  mr->freestyle_edge_ofs = CustomData_get_offset(&bm->edata, CD_FREESTYLE_EDGE);
+  mr->freestyle_face_ofs = CustomData_get_offset(&bm->pdata, CD_FREESTYLE_FACE);
+#endif
+  mr->v_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
+  mr->e_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
+  mr->p_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+}
+
+/**
+ * For material assignments we want indices for triangles that share a common material to be laid
+ * out contiguously in memory. To achieve this, we sort the indices based on which material the
+ * coarse polygon was assigned. The sort is performed by offsetting the loops indices so that they
+ * are directly assigned to the right sorted indices.
+ *
+ * \code{.unparsed}
+ * Here is a visual representation, considering four quads:
+ * +---------+---------+---------+---------+
+ * | 3     2 | 7     6 | 11   10 | 15   14 |
+ * |         |         |         |         |
+ * | 0     1 | 4     5 | 8     9 | 12   13 |
+ * +---------+---------+---------+---------+
+ *
+ * If the first and third quads have the same material, we should have:
+ * +---------+---------+---------+---------+
+ * | 3     2 | 11   10 | 7     6 | 15   14 |
+ * |         |         |         |         |
+ * | 0     1 | 8     9 | 4     5 | 12   13 |
+ * +---------+---------+---------+---------+
+ *
+ * So the offsets would be:
+ * +---------+---------+---------+---------+
+ * | 0     0 | 4     4 | -4   -4 | 0     0 |
+ * |         |         |         |         |
+ * | 0     0 | 4     4 | -4   -4 | 0     0 |
+ * +---------+---------+---------+---------+
+ * \endcode
+ *
+ * The offsets are computed not based on the loops indices, but on the number of subdivided
+ * polygons for each coarse polygon. We then only store a single offset for each coarse polygon,
+ * since all sub-faces are contiguous, they all share the same offset.
+ */
+static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
+                                                 Mesh *mesh_eval,
+                                                 uint mat_len)
+{
+  draw_subdiv_cache_free_material_data(cache);
+
+  const int number_of_quads = cache->num_subdiv_loops / 4;
+
+  if (mat_len == 1) {
+    cache->mat_start = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end"));
+    cache->mat_end = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end"));
+    cache->mat_start[0] = 0;
+    cache->mat_end[0] = number_of_quads;
+    return;
+  }
+
+  /* Count number of subdivided polygons for each material. */
+  int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start"));
+  int *subdiv_polygon_offset = cache->subdiv_polygon_offset;
+
+  // TODO: parallel_reduce?
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const MPoly *mpoly = &mesh_eval->mpoly[i];
+    const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
+                                                            subdiv_polygon_offset[i + 1];
+    const int quad_count = next_offset - subdiv_polygon_offset[i];
+    const int mat_index = mpoly->mat_nr;
+    mat_start[mat_index] += quad_count;
+  }
+
+  /* Accumulate offsets. */
+  int ofs = mat_start[0];
+  mat_start[0] = 0;
+  for (uint i = 1; i < mat_len; i++) {
+    int tmp = mat_start[i];
+    mat_start[i] = ofs;
+    ofs += tmp;
+  }
+
+  /* Compute per polygon offsets. */
+  int *mat_end = static_cast<int *>(MEM_dupallocN(mat_start));
+  int *per_polygon_mat_offset = static_cast<int *>(
+      MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset"));
+
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const MPoly *mpoly = &mesh_eval->mpoly[i];
+    const int mat_index = mpoly->mat_nr;
+    const int single_material_index = subdiv_polygon_offset[i];
+    const int material_offset = mat_end[mat_index];
+    const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
+                                                            subdiv_polygon_offset[i + 1];
+    const int quad_count = next_offset - subdiv_polygon_offset[i];
+    mat_end[mat_index] += quad_count;
+
+    per_polygon_mat_offset[i] = material_offset - single_material_index;
+  }
+
+  cache->polygon_mat_offset = draw_subdiv_build_origindex_buffer(per_polygon_mat_offset,
+                                                                 mesh_eval->totpoly);
+  cache->mat_start = mat_start;
+  cache->mat_end = mat_end;
+
+  MEM_freeN(per_polygon_mat_offset);
+}
+
+static bool draw_subdiv_create_requested_buffers(const Scene *scene,
+                                                 Object *ob,
+                                                 Mesh *mesh,
+                                                 struct MeshBatchCache *batch_cache,
+                                                 MeshBufferCache *mbc,
+                                                 const ToolSettings *toolsettings,
+                                                 OpenSubdiv_EvaluatorCache *evaluator_cache)
+{
+  SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob);
+  BLI_assert(smd);
+
+  const bool is_final_render = DRW_state_is_scene_render();
+
+  SubdivSettings settings;
+  BKE_subsurf_modifier_subdiv_settings_init(&settings, smd, is_final_render);
+
+  if (settings.level == 0) {
+    return false;
+  }
+
+  Mesh *mesh_eval = mesh;
+  BMesh *bm = nullptr;
+  if (mesh->edit_mesh) {
+    mesh_eval = mesh->edit_mesh->mesh_eval_final;
+    bm = mesh->edit_mesh->bm;
+  }
+
+  BKE_subsurf_modifier_ensure_runtime(smd);
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &settings, mesh_eval, true);
+  if (!subdiv) {
+    return false;
+  }
+
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) {
+    return false;
+  }
+
+  DRWSubdivCache *draw_cache = mesh_batch_cache_ensure_subdiv_cache(batch_cache);
+  if (!draw_subdiv_build_cache(draw_cache, subdiv, mesh_eval, scene, smd, is_final_render)) {
+    return false;
+  }
+
+  const bool optimal_display = (smd->flags & eSubsurfModifierFlag_ControlEdges);
+
+  draw_cache->bm = bm;
+  draw_cache->mesh = mesh_eval;
+  draw_cache->subdiv = subdiv;
+  draw_cache->optimal_display = optimal_display;
+  draw_cache->num_subdiv_triangles = tris_count_from_number_of_loops(draw_cache->num_subdiv_loops);
+  /* We can only evaluate limit normals if the patches are adaptive. */
+  draw_cache->do_limit_normals = settings.is_adaptive;
+
+  if (DRW_ibo_requested(mbc->buff.ibo.tris)) {
+    draw_subdiv_cache_ensure_mat_offsets(draw_cache, mesh_eval, batch_cache->mat_len);
+  }
+
+  draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval);
+
+  mesh_buffer_cache_create_requested_subdiv(batch_cache, mbc, draw_cache, toolsettings);
+
+  return true;
+}
+
+static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr;
+
+void DRW_create_subdivision(const Scene *scene,
+                            Object *ob,
+                            Mesh *mesh,
+                            struct MeshBatchCache *batch_cache,
+                            MeshBufferCache *mbc,
+                            const ToolSettings *toolsettings)
+{
+  if (g_evaluator_cache == nullptr) {
+    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE);
+  }
+
+#undef TIME_SUBDIV
+
+#ifdef TIME_SUBDIV
+  const double begin_time = PIL_check_seconds_timer();
+#endif
+
+  if (!draw_subdiv_create_requested_buffers(
+          scene, ob, mesh, batch_cache, mbc, toolsettings, g_evaluator_cache)) {
+    return;
+  }
+
+#ifdef TIME_SUBDIV
+  const double end_time = PIL_check_seconds_timer();
+  fprintf(stderr, "Time to update subdivision: %f\n", end_time - begin_time);
+  fprintf(stderr, "Maximum FPS: %f\n", 1.0 / (end_time - begin_time));
+#endif
+}
+
+void DRW_subdiv_free()
+{
+  for (int i = 0; i < NUM_SHADERS; ++i) {
+    GPU_shader_free(g_subdiv_shaders[i]);
+  }
+
+  DRW_cache_free_old_subdiv();
+
+  if (g_evaluator_cache) {
+    openSubdiv_deleteEvaluatorCache(g_evaluator_cache);
+    g_evaluator_cache = nullptr;
+  }
+}
+
+static LinkNode *gpu_subdiv_free_queue = nullptr;
+static ThreadMutex gpu_subdiv_queue_mutex = BLI_MUTEX_INITIALIZER;
+
+void DRW_subdiv_cache_free(Subdiv *subdiv)
+{
+  BLI_mutex_lock(&gpu_subdiv_queue_mutex);
+  BLI_linklist_prepend(&gpu_subdiv_free_queue, subdiv);
+  BLI_mutex_unlock(&gpu_subdiv_queue_mutex);
+}
+
+void DRW_cache_free_old_subdiv()
+{
+  if (gpu_subdiv_free_queue == nullptr) {
+    return;
+  }
+
+  BLI_mutex_lock(&gpu_subdiv_queue_mutex);
+
+  while (gpu_subdiv_free_queue != nullptr) {
+    Subdiv *subdiv = static_cast<Subdiv *>(BLI_linklist_pop(&gpu_subdiv_free_queue));
+    /* Set the type to CPU so that we do actually free the cache. */
+    subdiv->evaluator->type = OPENSUBDIV_EVALUATOR_CPU;
+    BKE_subdiv_free(subdiv);
+  }
+
+  BLI_mutex_unlock(&gpu_subdiv_queue_mutex);
+}
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 930fb6eabef..0bf6468f7cc 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -52,6 +52,7 @@
 #include "BKE_pointcache.h"
 #include "BKE_pointcloud.h"
 #include "BKE_screen.h"
+#include "BKE_subdiv_modifier.h"
 #include "BKE_volume.h"
 
 #include "DNA_camera_types.h"
@@ -90,6 +91,7 @@
 #include "draw_manager_testing.h"
 #include "draw_manager_text.h"
 #include "draw_shader.h"
+#include "draw_subdivision.h"
 #include "draw_texture_pool.h"
 
 /* only for callbacks */
@@ -2975,6 +2977,8 @@ void DRW_engines_register(void)
 
     BKE_volume_batch_cache_dirty_tag_cb = DRW_volume_batch_cache_dirty_tag;
     BKE_volume_batch_cache_free_cb = DRW_volume_batch_cache_free;
+
+    BKE_subsurf_modifier_free_gpu_cache_cb = DRW_subdiv_cache_free;
   }
 }
 
diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h
new file mode 100644
index 00000000000..f60ec7afc77
--- /dev/null
+++ b/source/blender/draw/intern/draw_subdivision.h
@@ -0,0 +1,231 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "BLI_sys_types.h"
+
+struct BMesh;
+struct GPUIndexBuf;
+struct GPUUniformBuf;
+struct GPUVertBuf;
+struct Mesh;
+struct MeshBatchCache;
+struct MeshBufferCache;
+struct MeshRenderData;
+struct Object;
+struct Scene;
+struct Subdiv;
+struct ToolSettings;
+
+/* -------------------------------------------------------------------- */
+/** \name DRWPatchMap
+ *
+ * This is a GPU version of the OpenSubDiv PatchMap. The quad tree and the patch handles are copied
+ * to GPU buffers in order to lookup the right patch for a given set of patch coordinates.
+ * \{ */
+
+typedef struct DRWPatchMap {
+  struct GPUVertBuf *patch_map_handles;
+  struct GPUVertBuf *patch_map_quadtree;
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+} DRWPatchMap;
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivCache
+ *
+ * This holds the various buffers used to evaluate and render subdivision through OpenGL.
+ * \{ */
+
+typedef struct DRWSubdivCache {
+  struct Mesh *mesh;
+  struct BMesh *bm;
+  struct Subdiv *subdiv;
+  bool optimal_display;
+  bool do_limit_normals;
+
+  /* Coordinates used to evaluate patches for UVs, positions, and normals. */
+  struct GPUVertBuf *patch_coords;
+  /* Coordinates used to evaluate patches for the face centers (or face dots) in edit-mode. */
+  struct GPUVertBuf *fdots_patch_coords;
+
+  /* Resolution used to generate the patch coordinates. */
+  int resolution;
+
+  /* Number of subdivided loops, also the number of patch coordinates since we have one coordinate
+   * but quad corner/vertex. */
+  uint num_subdiv_loops;
+  uint num_subdiv_edges;
+  uint num_subdiv_triangles;
+  uint num_subdiv_verts;
+  uint num_subdiv_quads;
+
+  /* Number of polygons in the coarse mesh, notably used to compute a coarse polygon index given a
+   * subdivision loop index. */
+  int num_coarse_poly;
+
+  /* Maps subdivision loop to subdivided vertex index. */
+  int *subdiv_loop_subdiv_vert_index;
+  /* Maps subdivision loop to original coarse poly index. */
+  int *subdiv_loop_poly_index;
+
+  /* Indices of faces adjacent to the vertices, ordered by vertex index, with no particular
+   * winding. */
+  struct GPUVertBuf *subdiv_vertex_face_adjacency;
+  /* The difference between value (i + 1) and (i) gives the number of faces adjacent to vertex (i).
+   */
+  struct GPUVertBuf *subdiv_vertex_face_adjacency_offsets;
+
+  /* Maps subdivision loop to original coarse vertex index, only really useful for edit mode. */
+  struct GPUVertBuf *verts_orig_index;
+  /* Maps subdivision loop to original coarse edge index, only really useful for edit mode. */
+  struct GPUVertBuf *edges_orig_index;
+
+  /* Owned by #Subdiv. Indexed by coarse polygon index, difference between value (i + 1) and (i)
+   * gives the number of ptex faces for coarse polygon (i).  */
+  int *face_ptex_offset;
+  /* Vertex buffer for face_ptex_offset. */
+  struct GPUVertBuf *face_ptex_offset_buffer;
+
+  int *subdiv_polygon_offset;
+  struct GPUVertBuf *subdiv_polygon_offset_buffer;
+
+  /* Contains the start loop index and the smooth flag for each coarse polygon. */
+  struct GPUVertBuf *extra_coarse_face_data;
+
+  /* Computed for ibo.points, one value per subdivided vertex, mapping coarse vertices ->
+   * subdivided loop */
+  int *point_indices;
+
+  /* Material offsets. */
+  int *mat_start;
+  int *mat_end;
+  struct GPUVertBuf *polygon_mat_offset;
+
+  DRWPatchMap gpu_patch_map;
+
+  /* UBO to store settings for the various compute shaders. */
+  struct GPUUniformBuf *ubo;
+} DRWSubdivCache;
+
+/* Only frees the data of the cache, caller is responsible to free the cache itself if necessary.
+ */
+void draw_subdiv_cache_free(DRWSubdivCache *cache);
+
+/** \} */
+
+void DRW_create_subdivision(const struct Scene *scene,
+                            struct Object *ob,
+                            struct Mesh *mesh,
+                            struct MeshBatchCache *batch_cache,
+                            struct MeshBufferCache *mbc,
+                            const struct ToolSettings *toolsettings);
+
+void DRW_subdiv_cache_free(struct Subdiv *subdiv);
+
+void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache,
+                                       struct MeshRenderData *mr,
+                                       const struct ToolSettings *toolsettings);
+
+void draw_subdiv_init_origindex_buffer(struct GPUVertBuf *buffer,
+                                       int *vert_origindex,
+                                       uint num_loops,
+                                       uint loose_len);
+
+struct GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops);
+
+/* Compute shader functions. */
+
+void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache,
+                                          struct GPUVertBuf *mask_vbo,
+                                          struct GPUVertBuf *face_set_vbo,
+                                          struct GPUVertBuf *sculpt_data);
+
+void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache,
+                                    struct GPUVertBuf *pos_nor,
+                                    struct GPUVertBuf *face_adjacency_offsets,
+                                    struct GPUVertBuf *face_adjacency_lists,
+                                    struct GPUVertBuf *vertex_normals);
+
+void draw_subdiv_finalize_normals(const DRWSubdivCache *cache,
+                                  struct GPUVertBuf *vertex_normals,
+                                  struct GPUVertBuf *subdiv_loop_subdiv_vert_index,
+                                  struct GPUVertBuf *pos_nor);
+
+void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache,
+                                 struct GPUVertBuf *pos_nor,
+                                 const bool do_limit_normals);
+
+void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
+                                    struct GPUVertBuf *src_data,
+                                    struct GPUVertBuf *dst_buffer,
+                                    int dimensions,
+                                    int dst_offset);
+
+void draw_subdiv_extract_uvs(const DRWSubdivCache *cache,
+                             struct GPUVertBuf *uvs,
+                             const int face_varying_channel,
+                             const int dst_offset);
+
+void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache,
+                                       struct GPUVertBuf *pos_nor,
+                                       struct GPUVertBuf *edge_idx,
+                                       struct GPUVertBuf *edge_fac);
+
+void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache,
+                                   struct GPUIndexBuf *subdiv_tris,
+                                   const int material_count);
+
+void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache,
+                                    struct GPUIndexBuf *lines_indices);
+
+void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache,
+                                          struct GPUIndexBuf *lines_indices,
+                                          uint num_loose_edges);
+
+void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache,
+                                     struct GPUVertBuf *fdots_pos,
+                                     struct GPUVertBuf *fdots_nor,
+                                     struct GPUIndexBuf *fdots_indices);
+
+void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache,
+                                   struct GPUVertBuf *pos_nor,
+                                   struct GPUVertBuf *lnor);
+
+void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache,
+                                                  struct GPUVertBuf *coarse_data,
+                                                  struct GPUVertBuf *subdiv_data);
+
+void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache,
+                                                   struct GPUVertBuf *pos_nor,
+                                                   struct GPUVertBuf *uvs,
+                                                   int uvs_offset,
+                                                   struct GPUVertBuf *stretch_angles);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.h b/source/blender/draw/intern/mesh_extractors/extract_mesh.h
index 7d21804c08f..35cc2cf986e 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh.h
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.h
@@ -39,6 +39,8 @@
 extern "C" {
 #endif
 
+struct DRWSubdivCache;
+
 #define MIN_RANGE_LEN 1024
 
 /* ---------------------------------------------------------------------- */
@@ -203,6 +205,11 @@ typedef void(ExtractLVertMeshFn)(const MeshRenderData *mr,
                                  const MVert *mv,
                                  const int lvert_index,
                                  void *data);
+typedef void(ExtractLooseGeomSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                       const MeshRenderData *mr,
+                                       const MeshExtractLooseGeom *loose_geom,
+                                       void *buffer,
+                                       void *data);
 typedef void(ExtractInitFn)(const MeshRenderData *mr,
                             struct MeshBatchCache *cache,
                             void *buffer,
@@ -213,6 +220,18 @@ typedef void(ExtractFinishFn)(const MeshRenderData *mr,
                               void *data);
 typedef void(ExtractTaskReduceFn)(void *userdata, void *task_userdata);
 
+typedef void(ExtractInitSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                  const MeshRenderData *mr,
+                                  struct MeshBatchCache *cache,
+                                  void *buf,
+                                  void *data);
+typedef void(ExtractIterSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                  const MeshRenderData *mr,
+                                  void *data);
+typedef void(ExtractFinishSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                    void *buf,
+                                    void *data);
+
 typedef struct MeshExtract {
   /** Executed on main thread and return user data for iteration functions. */
   ExtractInitFn *init;
@@ -225,9 +244,14 @@ typedef struct MeshExtract {
   ExtractLEdgeMeshFn *iter_ledge_mesh;
   ExtractLVertBMeshFn *iter_lvert_bm;
   ExtractLVertMeshFn *iter_lvert_mesh;
+  ExtractLooseGeomSubdivFn *iter_loose_geom_subdiv;
   /** Executed on one worker thread after all elements iterations. */
   ExtractTaskReduceFn *task_reduce;
   ExtractFinishFn *finish;
+  /** Executed on main thread for subdivision evaluation. */
+  ExtractInitSubdivFn *init_subdiv;
+  ExtractIterSubdivFn *iter_subdiv;
+  ExtractFinishSubdivFn *finish_subdiv;
   /** Used to request common data. */
   eMRDataType data_type;
   size_t data_size;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
index 4cc9a875f79..6a1691e8634 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 /* ---------------------------------------------------------------------- */
 /** \name Extract Edit UV Triangles Indices
@@ -94,6 +96,57 @@ static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            MeshBatchCache *UNUSED(cache),
+                                            void *UNUSED(buf),
+                                            void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(&data->elb,
+                    GPU_PRIM_TRIS,
+                    subdiv_cache->num_subdiv_triangles,
+                    subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_tris_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+    const uint loop_idx = i * 4;
+    const int poly_origindex = subdiv_loop_poly_index[loop_idx];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    edituv_tri_add(data,
+                   BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0,
+                   BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                   loop_idx,
+                   loop_idx + 1,
+                   loop_idx + 2);
+
+    edituv_tri_add(data,
+                   BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0,
+                   BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                   loop_idx,
+                   loop_idx + 2,
+                   loop_idx + 3);
+  }
+}
+
+static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                              void *buf,
+                                              void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_tris()
 {
   MeshExtract extractor = {nullptr};
@@ -101,6 +154,9 @@ constexpr MeshExtract create_extractor_edituv_tris()
   extractor.iter_looptri_bm = extract_edituv_tris_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_edituv_tris_iter_looptri_mesh;
   extractor.finish = extract_edituv_tris_finish;
+  extractor.init_subdiv = extract_edituv_tris_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_tris_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_tris_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
@@ -184,6 +240,56 @@ static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *mr,
+                                             MeshBatchCache *UNUSED(cache),
+                                             void *UNUSED(buf),
+                                             void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(
+      &data->elb, GPU_PRIM_LINES, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_lines_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *mr,
+                                             void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+
+    uint start_loop_idx = i * 4;
+    uint end_loop_idx = (i + 1) * 4;
+
+    const int poly_origindex = subdiv_loop_poly_index[start_loop_idx];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
+      const int edge_origindex = subdiv_loop_edge_index[loop_idx];
+      const bool real_edge = (edge_origindex != -1 &&
+                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE);
+      edituv_edge_add(data,
+                      BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) != 0 || !real_edge,
+                      BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) != 0,
+                      loop_idx,
+                      (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
+    }
+  }
+}
+
+static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                               void *buf,
+                                               void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_lines()
 {
   MeshExtract extractor = {nullptr};
@@ -191,6 +297,9 @@ constexpr MeshExtract create_extractor_edituv_lines()
   extractor.iter_poly_bm = extract_edituv_lines_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_lines_iter_poly_mesh;
   extractor.finish = extract_edituv_lines_finish;
+  extractor.init_subdiv = extract_edituv_lines_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_lines_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_lines_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
@@ -268,6 +377,50 @@ static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *mr,
+                                              MeshBatchCache *UNUSED(cache),
+                                              void *UNUSED(buf),
+                                              void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(
+      &data->elb, GPU_PRIM_POINTS, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_points_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *mr,
+                                              void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    const bool real_vert = (mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) &&
+                            vert_origindex != -1 &&
+                            mr->v_origindex[vert_origindex] != ORIGINDEX_NONE);
+    edituv_point_add(data,
+                     (BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) || !real_vert,
+                     BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                     i);
+  }
+}
+
+static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                                void *buf,
+                                                void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_points()
 {
   MeshExtract extractor = {nullptr};
@@ -275,6 +428,9 @@ constexpr MeshExtract create_extractor_edituv_points()
   extractor.iter_poly_bm = extract_edituv_points_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_points_iter_poly_mesh;
   extractor.finish = extract_edituv_points_finish;
+  extractor.init_subdiv = extract_edituv_points_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_points_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_points_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
index 54f5611106f..3d9729dea56 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -155,6 +157,33 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(elb, ibo);
 }
 
+static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                      const MeshRenderData *mr,
+                                      struct MeshBatchCache *UNUSED(cache),
+                                      void *buffer,
+                                      void *UNUSED(data))
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  GPU_indexbuf_init_build_on_device(ibo,
+                                    subdiv_cache->num_subdiv_loops * 2 + mr->edge_loose_len * 2);
+
+  draw_subdiv_build_lines_buffer(subdiv_cache, ibo);
+}
+
+static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *UNUSED(mr),
+                                            const MeshExtractLooseGeom *loose_geom,
+                                            void *buffer,
+                                            void *UNUSED(data))
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  draw_subdiv_build_lines_loose_buffer(subdiv_cache, ibo, static_cast<uint>(loose_geom->edge_len));
+}
+
 constexpr MeshExtract create_extractor_lines()
 {
   MeshExtract extractor = {nullptr};
@@ -163,6 +192,8 @@ constexpr MeshExtract create_extractor_lines()
   extractor.iter_poly_mesh = extract_lines_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_lines_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh;
+  extractor.init_subdiv = extract_lines_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_lines_loose_geom_subdiv;
   extractor.task_reduce = extract_lines_task_reduce;
   extractor.finish = extract_lines_finish;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
index e7dabfa9ee2..6855feb51ed 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
@@ -26,6 +26,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -44,6 +45,18 @@ struct MeshExtract_LineAdjacency_Data {
   uint *vert_to_loop;
 };
 
+static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data,
+                                     uint vert_len,
+                                     uint loop_len,
+                                     uint tess_edge_len)
+{
+  data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * vert_len, __func__));
+
+  GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, loop_len);
+  data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len);
+  data->is_manifold = true;
+}
+
 static void extract_lines_adjacency_init(const MeshRenderData *mr,
                                          struct MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(buf),
@@ -55,11 +68,7 @@ static void extract_lines_adjacency_init(const MeshRenderData *mr,
   uint tess_edge_len = mr->loop_len + mr->tri_len - mr->poly_len;
 
   MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(tls_data);
-  data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * mr->vert_len, __func__));
-
-  GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, mr->loop_len);
-  data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len);
-  data->is_manifold = true;
+  line_adjacency_data_init(data, mr->vert_len, mr->loop_len, tess_edge_len);
 }
 
 BLI_INLINE void lines_adjacency_triangle(
@@ -171,6 +180,56 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
   MEM_freeN(data->vert_to_loop);
 }
 
+static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *UNUSED(mr),
+                                                struct MeshBatchCache *UNUSED(cache),
+                                                void *UNUSED(buf),
+                                                void *_data)
+{
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+
+  /* For each polygon there is (loop + triangle - 1) edges. Since we only have quads, and a quad
+   * is split into 2 triangles, we have (loop + 2 - 1) = (loop + 1) edges for each quad, or in
+   * total: (number_of_loops + number_of_quads). */
+  const uint tess_len = subdiv_cache->num_subdiv_loops + subdiv_cache->num_subdiv_quads;
+  line_adjacency_data_init(
+      data, tess_len, subdiv_cache->num_subdiv_verts, subdiv_cache->num_subdiv_loops);
+}
+
+static void extract_lines_adjacency_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *UNUSED(mr),
+                                                void *_data)
+{
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+    const uint loop_index = i * 4;
+    const uint l0 = loop_index + 0;
+    const uint l1 = loop_index + 1;
+    const uint l2 = loop_index + 2;
+    const uint l3 = loop_index + 3;
+
+    const uint v0 = subdiv_cache->subdiv_loop_subdiv_vert_index[l0];
+    const uint v1 = subdiv_cache->subdiv_loop_subdiv_vert_index[l1];
+    const uint v2 = subdiv_cache->subdiv_loop_subdiv_vert_index[l2];
+    const uint v3 = subdiv_cache->subdiv_loop_subdiv_vert_index[l3];
+
+    lines_adjacency_triangle(v0, v1, v2, l0, l1, l2, data);
+    lines_adjacency_triangle(v0, v2, v3, l0, l2, l3, data);
+  }
+}
+
+static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
+                                                  void *buf,
+                                                  void *_data)
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+  BLI_edgehash_free(data->eh, nullptr);
+  MEM_freeN(data->vert_to_loop);
+}
+
 #undef NO_EDGE
 
 constexpr MeshExtract create_extractor_lines_adjacency()
@@ -180,6 +239,9 @@ constexpr MeshExtract create_extractor_lines_adjacency()
   extractor.iter_looptri_bm = extract_lines_adjacency_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_lines_adjacency_iter_looptri_mesh;
   extractor.finish = extract_lines_adjacency_finish;
+  extractor.init_subdiv = extract_lines_adjacency_init_subdiv;
+  extractor.iter_subdiv = extract_lines_adjacency_iter_subdiv;
+  extractor.finish_subdiv = extract_lines_adjacency_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_LineAdjacency_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
index 01e14a004ed..19167772a42 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
@@ -25,6 +25,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -155,6 +156,74 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(elb, ibo);
 }
 
+static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                       const MeshRenderData *UNUSED(mr),
+                                       struct MeshBatchCache *UNUSED(cache),
+                                       void *UNUSED(buffer),
+                                       void *data)
+{
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
+  /* Copy the points as the data upload will free them. */
+  elb->data = (uint *)MEM_dupallocN(subdiv_cache->point_indices);
+  elb->index_len = subdiv_cache->num_subdiv_verts;
+  elb->index_min = 0;
+  elb->index_max = subdiv_cache->num_subdiv_loops - 1;
+  elb->prim_type = GPU_PRIM_POINTS;
+}
+
+static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *UNUSED(mr),
+                                             const MeshExtractLooseGeom *loose_geom,
+                                             void *UNUSED(buffer),
+                                             void *data)
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
+
+  elb->data = static_cast<uint32_t *>(
+      MEM_reallocN(elb->data, sizeof(uint) * (subdiv_cache->num_subdiv_loops + loop_loose_len)));
+
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    if (elb->data[loose_edge->v1] == -1u) {
+      elb->data[loose_edge->v1] = offset;
+    }
+    if (elb->data[loose_edge->v2] == -1u) {
+      elb->data[loose_edge->v2] = offset + 1;
+    }
+    elb->index_max += 2;
+    elb->index_len += 2;
+    offset += 2;
+  }
+
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    if (elb->data[loose_geom->verts[i]] == -1u) {
+      elb->data[loose_geom->verts[i]] = offset;
+    }
+    elb->index_max += 1;
+    elb->index_len += 1;
+    offset += 1;
+  }
+}
+
+static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
+                                         void *buf,
+                                         void *_userdata)
+{
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_points()
 {
   MeshExtract extractor = {nullptr};
@@ -167,6 +236,9 @@ constexpr MeshExtract create_extractor_points()
   extractor.iter_lvert_mesh = extract_points_iter_lvert_mesh;
   extractor.task_reduce = extract_points_task_reduce;
   extractor.finish = extract_points_finish;
+  extractor.init_subdiv = extract_points_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_points_loose_geom_subdiv;
+  extractor.finish_subdiv = extract_points_finish_subdiv;
   extractor.use_threading = true;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(GPUIndexBufBuilder);
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
index 54e733d3d86..b1ace8bc6c9 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_from)
@@ -123,10 +125,37 @@ static void extract_tris_finish(const MeshRenderData *mr,
   }
 }
 
+static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  /* Initialize the index buffer, it was already allocated, it will be filled on the device. */
+  GPU_indexbuf_init_build_on_device(ibo, subdiv_cache->num_subdiv_triangles * 3);
+
+  if (cache->tris_per_mat) {
+    for (int i = 0; i < cache->mat_len; i++) {
+      if (cache->tris_per_mat[i] == nullptr) {
+        cache->tris_per_mat[i] = GPU_indexbuf_calloc();
+      }
+
+      /* Multiply by 6 since we have 2 triangles per quad. */
+      const int start = subdiv_cache->mat_start[i] * 6;
+      const int len = (subdiv_cache->mat_end[i] - subdiv_cache->mat_start[i]) * 6;
+      GPU_indexbuf_create_subrange_in_place(cache->tris_per_mat[i], ibo, start, len);
+    }
+  }
+
+  draw_subdiv_build_tris_buffer(subdiv_cache, ibo, cache->mat_len);
+}
+
 constexpr MeshExtract create_extractor_tris()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_tris_init;
+  extractor.init_subdiv = extract_tris_init_subdiv;
   extractor.iter_poly_bm = extract_tris_iter_poly_bm;
   extractor.iter_poly_mesh = extract_tris_iter_poly_mesh;
   extractor.task_reduce = extract_tris_mat_task_reduce;
@@ -214,6 +243,7 @@ constexpr MeshExtract create_extractor_tris_single_mat()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_tris_single_mat_init;
+  extractor.init_subdiv = extract_tris_init_subdiv;
   extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh;
   extractor.task_reduce = extract_tris_mat_task_reduce;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
index 8a5a8134ca7..ea702e5efdd 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
@@ -32,6 +32,7 @@
 
 #include "BKE_attribute.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -153,7 +154,9 @@ static GPUVertCompType get_comp_type_for_type(CustomDataType type)
 
 static void init_vbo_for_attribute(const MeshRenderData *mr,
                                    GPUVertBuf *vbo,
-                                   const DRW_AttributeRequest &request)
+                                   const DRW_AttributeRequest &request,
+                                   bool build_on_device,
+                                   uint32_t len)
 {
   GPUVertCompType comp_type = get_comp_type_for_type(request.cd_type);
   GPUVertFetchMode fetch_mode = get_fetch_mode_for_type(request.cd_type);
@@ -184,8 +187,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
     }
   }
 
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, static_cast<uint32_t>(mr->loop_len));
+  if (build_on_device) {
+    GPU_vertbuf_init_build_on_device(vbo, &format, len);
+  }
+  else {
+    GPU_vertbuf_init_with_format(vbo, &format);
+    GPU_vertbuf_data_alloc(vbo, len);
+  }
 }
 
 template<typename AttributeType, typename VBOType>
@@ -309,7 +317,7 @@ static void extract_attr_init(const MeshRenderData *mr,
 
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
 
-  init_vbo_for_attribute(mr, vbo, request);
+  init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
 
   /* TODO(kevindietrich) : float3 is used for scalar attributes as the implicit conversion done by
    * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the
@@ -346,6 +354,68 @@ static void extract_attr_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *mr,
+                                     MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(tls_data),
+                                     int index)
+{
+  const DRW_MeshAttributes *attrs_used = &cache->attr_used;
+  const DRW_AttributeRequest &request = attrs_used->requests[index];
+
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+
+  const uint32_t dimensions = gpu_component_size_for_attribute_type(request.cd_type);
+
+  /* Prepare VBO for coarse data. The compute shader only expects floats. */
+  GPUVertBuf *src_data = GPU_vertbuf_calloc();
+  static GPUVertFormat coarse_format = {0};
+  GPU_vertformat_attr_add(&coarse_format, "data", GPU_COMP_F32, dimensions, GPU_FETCH_FLOAT);
+  GPU_vertbuf_init_with_format_ex(src_data, &coarse_format, GPU_USAGE_STATIC);
+  GPU_vertbuf_data_alloc(src_data, static_cast<uint32_t>(coarse_mesh->totloop));
+
+  switch (request.cd_type) {
+    case CD_PROP_BOOL: {
+      extract_attr_generic<bool, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_INT32: {
+      extract_attr_generic<int32_t, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT: {
+      extract_attr_generic<float, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT2: {
+      extract_attr_generic<float2>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT3: {
+      extract_attr_generic<float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_COLOR: {
+      extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request);
+      break;
+    }
+    default: {
+      BLI_assert(false);
+    }
+  }
+
+  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
+  init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
+
+  /* Ensure data is uploaded properly. */
+  GPU_vertbuf_tag_dirty(src_data);
+  draw_subdiv_interp_custom_data(
+      subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0);
+
+  GPU_vertbuf_discard(src_data);
+}
+
 /* Wrappers around extract_attr_init so we can pass the index of the attribute that we want to
  * extract. The overall API does not allow us to pass this in a convenient way. */
 #define EXTRACT_INIT_WRAPPER(index) \
@@ -353,6 +423,14 @@ static void extract_attr_init(const MeshRenderData *mr,
       const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \
   { \
     extract_attr_init(mr, cache, buf, tls_data, index); \
+  } \
+  static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \
+                                              const MeshRenderData *mr, \
+                                              struct MeshBatchCache *cache, \
+                                              void *buf, \
+                                              void *tls_data) \
+  { \
+    extract_attr_init_subdiv(subdiv_cache, mr, cache, buf, tls_data, index); \
   }
 
 EXTRACT_INIT_WRAPPER(0)
@@ -371,10 +449,12 @@ EXTRACT_INIT_WRAPPER(12)
 EXTRACT_INIT_WRAPPER(13)
 EXTRACT_INIT_WRAPPER(14)
 
-template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn)
+template<int index>
+constexpr MeshExtract create_extractor_attr(ExtractInitFn fn, ExtractInitSubdivFn subdiv_fn)
 {
   MeshExtract extractor = {nullptr};
   extractor.init = fn;
+  extractor.init_subdiv = subdiv_fn;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
@@ -388,7 +468,8 @@ template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn
 
 extern "C" {
 #define CREATE_EXTRACTOR_ATTR(index) \
-  blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index)
+  blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index, \
+                                              blender::draw::extract_attr_init_subdiv##index)
 
 const MeshExtract extract_attr[GPU_MAX_ATTR] = {
     CREATE_EXTRACTOR_ATTR(0),
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
index 2e2444a8e3d..5ee34d7fdb2 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
@@ -25,6 +25,7 @@
 
 #include "GPU_capabilities.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -216,6 +217,86 @@ static void extract_edge_fac_finish(const MeshRenderData *mr,
   MEM_SAFE_FREE(data->edge_loop_count);
 }
 
+/* Different function than the one used for the non-subdivision case, as we directly take care of
+ * the buggy AMD driver case. */
+static GPUVertFormat *get_subdiv_edge_fac_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    if (GPU_crappy_amd_driver()) {
+      GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    }
+    else {
+      GPU_vertformat_attr_add(&format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    }
+  }
+  return &format;
+}
+
+static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         struct MeshBatchCache *cache,
+                                         void *buffer,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *edge_idx = cache->final.buff.vbo.edge_idx;
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPU_vertbuf_init_build_on_device(
+      vbo, get_subdiv_edge_fac_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+
+  /* Create a temporary buffer for the edge original indices if it was not requested. */
+  const bool has_edge_idx = edge_idx != nullptr;
+  GPUVertBuf *loop_edge_idx = nullptr;
+  if (has_edge_idx) {
+    loop_edge_idx = edge_idx;
+  }
+  else {
+    loop_edge_idx = GPU_vertbuf_calloc();
+    draw_subdiv_init_origindex_buffer(
+        loop_edge_idx,
+        static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)),
+        subdiv_cache->num_subdiv_loops,
+        0);
+  }
+
+  draw_subdiv_build_edge_fac_buffer(subdiv_cache, pos_nor, loop_edge_idx, vbo);
+
+  if (!has_edge_idx) {
+    GPU_vertbuf_discard(loop_edge_idx);
+  }
+}
+
+static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  /* Make sure buffer is active for sending loose data. */
+  GPU_vertbuf_use(vbo);
+
+  uint offset = subdiv_cache->num_subdiv_loops;
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    if (GPU_crappy_amd_driver()) {
+      float loose_edge_fac[2] = {1.0f, 1.0f};
+      GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
+    }
+    else {
+      char loose_edge_fac[2] = {255, 255};
+      GPU_vertbuf_update_sub(vbo, offset * sizeof(char), sizeof(loose_edge_fac), loose_edge_fac);
+    }
+
+    offset += 2;
+  }
+}
+
 constexpr MeshExtract create_extractor_edge_fac()
 {
   MeshExtract extractor = {nullptr};
@@ -224,6 +305,8 @@ constexpr MeshExtract create_extractor_edge_fac()
   extractor.iter_poly_mesh = extract_edge_fac_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_edge_fac_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_edge_fac_iter_ledge_mesh;
+  extractor.init_subdiv = extract_edge_fac_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edge_fac_loose_geom_subdiv;
   extractor.finish = extract_edge_fac_finish;
   extractor.data_type = MR_DATA_POLY_NOR;
   extractor.data_size = sizeof(MeshExtract_EdgeFac_Data);
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
index 5232346e51e..eef64085c95 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
@@ -25,6 +25,8 @@
 
 #include "draw_cache_impl.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -107,19 +109,25 @@ static void mesh_render_data_vert_flag(const MeshRenderData *mr,
   }
 }
 
-static void extract_edit_data_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
-                                   void *buf,
-                                   void *tls_data)
+static GPUVertFormat *get_edit_data_format(void)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* WARNING: Adjust #EditLoopData struct accordingly. */
     GPU_vertformat_attr_add(&format, "data", GPU_COMP_U8, 4, GPU_FETCH_INT);
     GPU_vertformat_alias_add(&format, "flag");
   }
-  GPU_vertbuf_init_with_format(vbo, &format);
+  return &format;
+}
+
+static void extract_edit_data_init(const MeshRenderData *mr,
+                                   struct MeshBatchCache *UNUSED(cache),
+                                   void *buf,
+                                   void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat *format = get_edit_data_format();
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len + mr->loop_loose_len);
   EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
   *(EditLoopData **)tls_data = vbo_data;
@@ -240,6 +248,80 @@ static void extract_edit_data_iter_lvert_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_edit_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                          const MeshRenderData *mr,
+                                          MeshBatchCache *UNUSED(cache),
+                                          void *buf,
+                                          void *data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPU_vertbuf_init_with_format(vbo, get_edit_data_format());
+  GPU_vertbuf_data_alloc(vbo, subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+  EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
+  *(EditLoopData **)data = vbo_data;
+}
+
+static void extract_edit_data_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                          const MeshRenderData *mr,
+                                          void *_data)
+{
+  EditLoopData *vbo_data = *(EditLoopData **)_data;
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int edge_origindex = subdiv_loop_edge_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+
+    EditLoopData *edit_loop_data = &vbo_data[i];
+    memset(edit_loop_data, 0, sizeof(EditLoopData));
+
+    if (vert_origindex != -1) {
+      const BMVert *eve = bm_original_vert_get(mr, vert_origindex);
+      if (eve) {
+        mesh_render_data_vert_flag(mr, eve, edit_loop_data);
+      }
+    }
+
+    if (edge_origindex != -1) {
+      const BMEdge *eed = bm_original_edge_get(mr, edge_origindex);
+      if (eed) {
+        mesh_render_data_edge_flag(mr, eed, edit_loop_data);
+      }
+    }
+
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+    /* The -1 parameter is for edit_uvs, which we don't do here. */
+    mesh_render_data_face_flag(mr, efa, -1, edit_loop_data);
+  }
+}
+
+static void extract_edit_data_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *mr,
+                                                const MeshExtractLooseGeom *loose_geom,
+                                                void *UNUSED(buffer),
+                                                void *_data)
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  EditLoopData *vbo_data = *(EditLoopData **)_data;
+
+  for (int ledge_index = 0; ledge_index < loose_geom->edge_len; ledge_index++) {
+    const int offset = subdiv_cache->num_subdiv_loops + ledge_index * 2;
+    EditLoopData *data = &vbo_data[offset];
+    memset(data, 0, sizeof(EditLoopData));
+    BMEdge *eed = bm_original_edge_get(mr, loose_geom->edges[ledge_index]);
+    mesh_render_data_edge_flag(mr, eed, &data[0]);
+    data[1] = data[0];
+    mesh_render_data_vert_flag(mr, eed->v1, &data[0]);
+    mesh_render_data_vert_flag(mr, eed->v2, &data[1]);
+  }
+}
+
 constexpr MeshExtract create_extractor_edit_data()
 {
   MeshExtract extractor = {nullptr};
@@ -250,6 +332,9 @@ constexpr MeshExtract create_extractor_edit_data()
   extractor.iter_ledge_mesh = extract_edit_data_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_edit_data_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_edit_data_iter_lvert_mesh;
+  extractor.init_subdiv = extract_edit_data_init_subdiv;
+  extractor.iter_subdiv = extract_edit_data_iter_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edit_data_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(EditLoopData *);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
index b8494428eed..067d482bc2b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
@@ -25,6 +25,8 @@
 
 #include "draw_cache_impl.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -36,12 +38,11 @@ struct MeshExtract_EditUVData_Data {
   int cd_ofs;
 };
 
-static void extract_edituv_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
-                                     void *buf,
-                                     void *tls_data)
+static void extract_edituv_data_init_common(const MeshRenderData *mr,
+                                            GPUVertBuf *vbo,
+                                            MeshExtract_EditUVData_Data *data,
+                                            uint loop_len)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* WARNING: Adjust #EditLoopData struct accordingly. */
@@ -50,15 +51,23 @@ static void extract_edituv_data_init(const MeshRenderData *mr,
   }
 
   GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, mr->loop_len);
+  GPU_vertbuf_data_alloc(vbo, loop_len);
 
   CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
-
-  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
   data->vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
   data->cd_ofs = CustomData_get_offset(cd_ldata, CD_MLOOPUV);
 }
 
+static void extract_edituv_data_init(const MeshRenderData *mr,
+                                     struct MeshBatchCache *UNUSED(cache),
+                                     void *buf,
+                                     void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
+  extract_edituv_data_init_common(mr, vbo, data, mr->loop_len);
+}
+
 static void extract_edituv_data_iter_poly_bm(const MeshRenderData *mr,
                                              const BMFace *f,
                                              const int UNUSED(f_index),
@@ -119,12 +128,54 @@ static void extract_edituv_data_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_edituv_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            MeshBatchCache *UNUSED(cache),
+                                            void *buf,
+                                            void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
+  extract_edituv_data_init_common(mr, vbo, data, subdiv_cache->num_subdiv_loops);
+}
+
+static void extract_edituv_data_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            void *_data)
+{
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(_data);
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int edge_origindex = subdiv_loop_edge_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+
+    EditLoopData *edit_loop_data = &data->vbo_data[i];
+    memset(edit_loop_data, 0, sizeof(EditLoopData));
+
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    if (vert_origindex != -1 && edge_origindex != -1) {
+      BMEdge *eed = bm_original_edge_get(mr, edge_origindex);
+      /* Loop on an edge endpoint. */
+      BMLoop *l = BM_face_edge_share_loop(efa, eed);
+      mesh_render_data_loop_flag(mr, l, data->cd_ofs, edit_loop_data);
+      mesh_render_data_loop_edge_flag(mr, l, data->cd_ofs, edit_loop_data);
+    }
+  }
+}
+
 constexpr MeshExtract create_extractor_edituv_data()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_data_init;
   extractor.iter_poly_bm = extract_edituv_data_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_data_iter_poly_mesh;
+  extractor.init_subdiv = extract_edituv_data_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_data_iter_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUVData_Data);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
index a947d98f955..0ea4ef5d5db 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -213,12 +215,69 @@ static void extract_edituv_stretch_angle_iter_poly_mesh(const MeshRenderData *mr
   }
 }
 
+static GPUVertFormat *get_edituv_stretch_angle_format_subdiv()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* Waning: adjust #UVStretchAngle struct accordingly. */
+    GPU_vertformat_attr_add(&format, "angle", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "uv_angles", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                     const MeshRenderData *mr,
+                                                     struct MeshBatchCache *cache,
+                                                     void *buffer,
+                                                     void *UNUSED(tls_data))
+{
+  GPUVertBuf *refined_vbo = static_cast<GPUVertBuf *>(buffer);
+
+  GPU_vertbuf_init_build_on_device(
+      refined_vbo, get_edituv_stretch_angle_format_subdiv(), subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  GPUVertBuf *uvs = cache->final.buff.vbo.uv;
+
+  /* UVs are stored contiguouly so we need to compute the offset in the UVs buffer for the active
+   * UV layer. */
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_MESH) ? &mr->me->ldata : &mr->bm->ldata;
+
+  uint32_t uv_layers = cache->cd_used.uv;
+  /* HACK to fix T68857 */
+  if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
+    int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
+    if (layer != -1) {
+      uv_layers |= (1 << layer);
+    }
+  }
+
+  int uvs_offset = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (uv_layers & (1 << i)) {
+      if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) {
+        break;
+      }
+
+      uvs_offset += 1;
+    }
+  }
+
+  /* The data is at `offset * num loops`, and we have 2 values per index. */
+  uvs_offset *= subdiv_cache->num_subdiv_loops * 2;
+
+  draw_subdiv_build_edituv_stretch_angle_buffer(
+      subdiv_cache, pos_nor, uvs, uvs_offset, refined_vbo);
+}
+
 constexpr MeshExtract create_extractor_edituv_edituv_stretch_angle()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_stretch_angle_init;
   extractor.iter_poly_bm = extract_edituv_stretch_angle_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_stretch_angle_iter_poly_mesh;
+  extractor.init_subdiv = extract_edituv_stretch_angle_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_StretchAngle_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
index 3db8cd79af5..3b40b3115f5 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -63,14 +65,12 @@ BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_t
   return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
 }
 
-static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
-                                               struct MeshBatchCache *cache,
-                                               void *buf,
-                                               void *UNUSED(data))
+static void compute_area_ratio(const MeshRenderData *mr,
+                               float *r_area_ratio,
+                               float &r_tot_area,
+                               float &r_tot_uv_area)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   float tot_area = 0.0f, tot_uv_area = 0.0f;
-  float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
 
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     CustomData *cd_ldata = &mr->bm->ldata;
@@ -84,7 +84,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
       float uvarea = BM_face_calc_area_uv(efa, uv_ofs);
       tot_area += area;
       tot_uv_area += uvarea;
-      area_ratio[f] = area_ratio_get(area, uvarea);
+      r_area_ratio[f] = area_ratio_get(area, uvarea);
     }
   }
   else {
@@ -96,12 +96,22 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
       float uvarea = BKE_mesh_calc_poly_uv_area(mp, uv_data);
       tot_area += area;
       tot_uv_area += uvarea;
-      area_ratio[mp_index] = area_ratio_get(area, uvarea);
+      r_area_ratio[mp_index] = area_ratio_get(area, uvarea);
     }
   }
 
-  cache->tot_area = tot_area;
-  cache->tot_uv_area = tot_uv_area;
+  r_tot_area = tot_area;
+  r_tot_uv_area = tot_uv_area;
+}
+
+static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
+                                               struct MeshBatchCache *cache,
+                                               void *buf,
+                                               void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
+  compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area);
 
   /* Convert in place to avoid an extra allocation */
   uint16_t *poly_stretch = (uint16_t *)area_ratio;
@@ -135,11 +145,46 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
   MEM_freeN(area_ratio);
 }
 
+static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                    const MeshRenderData *mr,
+                                                    struct MeshBatchCache *cache,
+                                                    void *buffer,
+                                                    void *UNUSED(data))
+{
+
+  /* Initialise final buffer. */
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+
+  GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops);
+
+  /* Initialize coarse data buffer. */
+
+  GPUVertBuf *coarse_data = GPU_vertbuf_calloc();
+
+  /* We use the same format as we just copy data around. */
+  GPU_vertbuf_init_with_format(coarse_data, &format);
+  GPU_vertbuf_data_alloc(coarse_data, mr->loop_len);
+
+  compute_area_ratio(mr,
+                     static_cast<float *>(GPU_vertbuf_get_data(coarse_data)),
+                     cache->tot_area,
+                     cache->tot_uv_area);
+
+  draw_subdiv_build_edituv_stretch_area_buffer(subdiv_cache, coarse_data, vbo);
+
+  GPU_vertbuf_discard(coarse_data);
+}
+
 constexpr MeshExtract create_extractor_edituv_stretch_area()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_stretch_area_init;
   extractor.finish = extract_edituv_stretch_area_finish;
+  extractor.init_subdiv = extract_edituv_stretch_area_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
index 33f9180e122..f65159f9b95 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
@@ -23,24 +23,40 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
 /** \name Extract Face-dots positions
  * \{ */
 
-static void extract_fdots_pos_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
-                                   void *buf,
-                                   void *tls_data)
+static GPUVertFormat *get_fdots_pos_format()
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
   }
+  return &format;
+}
+
+static GPUVertFormat *get_fdots_nor_format_subdiv()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "norAndFlag", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
 
-  GPU_vertbuf_init_with_format(vbo, &format);
+static void extract_fdots_pos_init(const MeshRenderData *mr,
+                                   struct MeshBatchCache *UNUSED(cache),
+                                   void *buf,
+                                   void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat *format = get_fdots_pos_format();
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->poly_len);
   void *vbo_data = GPU_vertbuf_get_data(vbo);
   *(float(**)[3])tls_data = static_cast<float(*)[3]>(vbo_data);
@@ -97,10 +113,30 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                      const MeshRenderData *UNUSED(mr),
+                                      struct MeshBatchCache *cache,
+                                      void *buffer,
+                                      void *UNUSED(data))
+{
+  /* We "extract" positions, normals, and indices at once. */
+  GPUVertBuf *fdots_pos_vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertBuf *fdots_nor_vbo = cache->final.buff.vbo.fdots_nor;
+  GPUIndexBuf *fdots_pos_ibo = cache->final.buff.ibo.fdots;
+
+  GPU_vertbuf_init_build_on_device(
+      fdots_nor_vbo, get_fdots_nor_format_subdiv(), subdiv_cache->num_coarse_poly);
+  GPU_vertbuf_init_build_on_device(
+      fdots_pos_vbo, get_fdots_pos_format(), subdiv_cache->num_coarse_poly);
+  GPU_indexbuf_init_build_on_device(fdots_pos_ibo, subdiv_cache->num_coarse_poly);
+  draw_subdiv_build_fdots_buffers(subdiv_cache, fdots_pos_vbo, fdots_nor_vbo, fdots_pos_ibo);
+}
+
 constexpr MeshExtract create_extractor_fdots_pos()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_fdots_pos_init;
+  extractor.init_subdiv = extract_fdots_init_subdiv;
   extractor.iter_poly_bm = extract_fdots_pos_iter_poly_bm;
   extractor.iter_poly_mesh = extract_fdots_pos_iter_poly_mesh;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
index 3c3ac7a7a0a..d30c38ef050 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
@@ -23,6 +23,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -107,10 +109,34 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static GPUVertFormat *get_subdiv_lnor_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "lnor");
+  }
+  return &format;
+}
+
+static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  BLI_assert(pos_nor);
+  GPU_vertbuf_init_build_on_device(vbo, get_subdiv_lnor_format(), subdiv_cache->num_subdiv_loops);
+  draw_subdiv_build_lnor_buffer(subdiv_cache, pos_nor, vbo);
+}
+
 constexpr MeshExtract create_extractor_lnor()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_lnor_init;
+  extractor.init_subdiv = extract_lnor_init_subdiv;
   extractor.iter_poly_bm = extract_lnor_iter_poly_bm;
   extractor.iter_poly_mesh = extract_lnor_iter_poly_mesh;
   extractor.data_type = MR_DATA_LOOP_NOR;
@@ -210,6 +236,7 @@ constexpr MeshExtract create_extractor_lnor_hq()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_lnor_hq_init;
+  extractor.init_subdiv = extract_lnor_init_subdiv;
   extractor.iter_poly_bm = extract_lnor_hq_iter_poly_bm;
   extractor.iter_poly_mesh = extract_lnor_hq_iter_poly_mesh;
   extractor.data_type = MR_DATA_LOOP_NOR;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
index eb9a138590c..00ed4ca6359 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -194,6 +196,123 @@ static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr),
   MEM_freeN(data->normals);
 }
 
+static GPUVertFormat *get_pos_nor_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "vnor");
+  }
+  return &format;
+}
+
+static GPUVertFormat *get_normals_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "lnor");
+  }
+  return &format;
+}
+
+static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                        const MeshRenderData *mr,
+                                        struct MeshBatchCache *UNUSED(cache),
+                                        void *buffer,
+                                        void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  const bool do_limit_normals = subdiv_cache->do_limit_normals;
+
+  /* Initialize the vertex buffer, it was already allocated. */
+  GPU_vertbuf_init_build_on_device(
+      vbo, get_pos_nor_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+
+  draw_subdiv_extract_pos_nor(subdiv_cache, vbo, do_limit_normals);
+
+  if (!do_limit_normals) {
+    /* We cannot evaluate vertex normals using the limit surface, so compute them manually. */
+    GPUVertBuf *subdiv_loop_subdiv_vert_index = draw_subdiv_build_origindex_buffer(
+        subdiv_cache->subdiv_loop_subdiv_vert_index, subdiv_cache->num_subdiv_loops);
+
+    GPUVertBuf *vertex_normals = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_build_on_device(
+        vertex_normals, get_normals_format(), subdiv_cache->num_subdiv_verts);
+
+    draw_subdiv_accumulate_normals(subdiv_cache,
+                                   vbo,
+                                   subdiv_cache->subdiv_vertex_face_adjacency_offsets,
+                                   subdiv_cache->subdiv_vertex_face_adjacency,
+                                   vertex_normals);
+
+    draw_subdiv_finalize_normals(subdiv_cache, vertex_normals, subdiv_loop_subdiv_vert_index, vbo);
+
+    GPU_vertbuf_discard(vertex_normals);
+    GPU_vertbuf_discard(subdiv_loop_subdiv_vert_index);
+  }
+}
+
+static void extract_pos_nor_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *UNUSED(mr),
+                                              const MeshExtractLooseGeom *loose_geom,
+                                              void *buffer,
+                                              void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+  const MVert *coarse_verts = coarse_mesh->mvert;
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  /* TODO(kevindietrich) : replace this when compressed normals are supported. */
+  struct SubdivPosNorLoop {
+    float pos[3];
+    float nor[3];
+    float flag;
+  };
+
+  SubdivPosNorLoop edge_data[2];
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    const MVert *loose_vert1 = &coarse_verts[loose_edge->v1];
+    const MVert *loose_vert2 = &coarse_verts[loose_edge->v2];
+
+    copy_v3_v3(edge_data[0].pos, loose_vert1->co);
+    normal_short_to_float_v3(edge_data[0].nor, loose_vert1->no);
+    edge_data[0].flag = 0.0f;
+
+    copy_v3_v3(edge_data[1].pos, loose_vert2->co);
+    normal_short_to_float_v3(edge_data[1].nor, loose_vert2->no);
+    edge_data[1].flag = 0.0f;
+
+    GPU_vertbuf_update_sub(
+        vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop) * 2, &edge_data);
+
+    offset += 2;
+  }
+
+  SubdivPosNorLoop vert_data;
+  vert_data.flag = 0.0f;
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    const MVert *loose_vertex = &coarse_verts[loose_geom->verts[i]];
+
+    copy_v3_v3(vert_data.pos, loose_vertex->co);
+    normal_short_to_float_v3(vert_data.nor, loose_vertex->no);
+
+    GPU_vertbuf_update_sub(
+        vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop), &vert_data);
+
+    offset += 1;
+  }
+}
+
 constexpr MeshExtract create_extractor_pos_nor()
 {
   MeshExtract extractor = {nullptr};
@@ -205,6 +324,8 @@ constexpr MeshExtract create_extractor_pos_nor()
   extractor.iter_lvert_bm = extract_pos_nor_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_pos_nor_iter_lvert_mesh;
   extractor.finish = extract_pos_nor_finish;
+  extractor.init_subdiv = extract_pos_nor_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_pos_nor_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_PosNor_Data);
   extractor.use_threading = true;
@@ -391,6 +512,7 @@ constexpr MeshExtract create_extractor_pos_nor_hq()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_pos_nor_hq_init;
+  extractor.init_subdiv = extract_pos_nor_init_subdiv;
   extractor.iter_poly_bm = extract_pos_nor_hq_iter_poly_bm;
   extractor.iter_poly_mesh = extract_pos_nor_hq_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_pos_nor_hq_iter_ledge_bm;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
index fd91bc5258f..753fbe7e0e2 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
@@ -27,6 +27,7 @@
 
 #include "BKE_paint.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -35,13 +36,23 @@ namespace blender::draw {
 /** \name Extract Sculpt Data
  * \{ */
 
+static GPUVertFormat *get_sculpt_data_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
 static void extract_sculpt_data_init(const MeshRenderData *mr,
                                      struct MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(tls_data))
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
+  GPUVertFormat *format = get_sculpt_data_format();
 
   CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata : &mr->me->vdata;
@@ -50,12 +61,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
   float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
   int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS);
 
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
-  }
-
-  GPU_vertbuf_init_with_format(vbo, &format);
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len);
 
   struct gpuSculptData {
@@ -121,10 +127,99 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            struct MeshBatchCache *UNUSED(cache),
+                                            void *buffer,
+                                            void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  Mesh *coarse_mesh = mr->me;
+  CustomData *cd_vdata = &coarse_mesh->vdata;
+  CustomData *cd_pdata = &coarse_mesh->pdata;
+
+  /* First, interpolate mask if available. */
+  GPUVertBuf *mask_vbo = nullptr;
+  GPUVertBuf *subdiv_mask_vbo = nullptr;
+  float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
+
+  if (cd_mask) {
+    GPUVertFormat mask_format = {0};
+    GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+
+    mask_vbo = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_with_format(mask_vbo, &mask_format);
+    GPU_vertbuf_data_alloc(mask_vbo, coarse_mesh->totloop);
+    float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo));
+
+    for (int i = 0; i < coarse_mesh->totpoly; i++) {
+      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+
+      for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
+           loop_index++) {
+        const MLoop *ml = &coarse_mesh->mloop[loop_index];
+        *v_mask++ = cd_mask[ml->v];
+      }
+    }
+
+    subdiv_mask_vbo = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_build_on_device(
+        subdiv_mask_vbo, &mask_format, subdiv_cache->num_subdiv_loops);
+
+    draw_subdiv_interp_custom_data(subdiv_cache, mask_vbo, subdiv_mask_vbo, 1, 0);
+  }
+
+  /* Then, gather face sets. */
+  GPUVertFormat face_set_format = {0};
+  GPU_vertformat_attr_add(&face_set_format, "msk", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+
+  GPUVertBuf *face_set_vbo = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(face_set_vbo, &face_set_format);
+  GPU_vertbuf_data_alloc(face_set_vbo, subdiv_cache->num_subdiv_loops);
+
+  struct gpuFaceSet {
+    uint8_t color[4];
+  };
+
+  gpuFaceSet *face_sets = (gpuFaceSet *)GPU_vertbuf_get_data(face_set_vbo);
+  int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS);
+
+  GPUVertFormat *format = get_sculpt_data_format();
+  GPU_vertbuf_init_build_on_device(vbo, format, subdiv_cache->num_subdiv_loops);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int mp_index = subdiv_loop_poly_index[i];
+
+    uchar face_set_color[4] = {UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX};
+    if (cd_face_set) {
+      const int face_set_id = cd_face_set[mp_index];
+      /* Skip for the default color Face Set to render it white. */
+      if (face_set_id != coarse_mesh->face_sets_color_default) {
+        BKE_paint_face_set_overlay_color_get(
+            face_set_id, coarse_mesh->face_sets_color_seed, face_set_color);
+      }
+    }
+    copy_v3_v3_uchar(face_sets->color, face_set_color);
+    face_sets++;
+  }
+
+  /* Finally, interleave mask and face sets. */
+  draw_subdiv_build_sculpt_data_buffer(subdiv_cache, subdiv_mask_vbo, face_set_vbo, vbo);
+
+  if (mask_vbo) {
+    GPU_vertbuf_discard(mask_vbo);
+    GPU_vertbuf_discard(subdiv_mask_vbo);
+  }
+  GPU_vertbuf_discard(face_set_vbo);
+}
+
 constexpr MeshExtract create_extractor_sculpt_data()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_sculpt_data_init;
+  extractor.init_subdiv = extract_sculpt_data_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
index 5ac30dd3be9..33c27b45627 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
@@ -21,6 +21,7 @@
  * \ingroup draw
  */
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -196,12 +197,104 @@ static void extract_vert_idx_iter_lvert_mesh(const MeshRenderData *mr,
   (*(uint32_t **)data)[offset + lvert_index] = v_orig;
 }
 
+static void extract_vert_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  /* Each element points to an element in the ibo.points. */
+  draw_subdiv_init_origindex_buffer(vbo,
+                                    subdiv_cache->subdiv_loop_subdiv_vert_index,
+                                    subdiv_cache->num_subdiv_loops,
+                                    mr->loop_loose_len);
+}
+
+static void extract_vert_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo);
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    vert_idx_data[offset] = loose_edge->v1;
+    vert_idx_data[offset + 1] = loose_edge->v2;
+    offset += 2;
+  }
+
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    vert_idx_data[offset] = loose_geom->verts[i];
+    offset += 1;
+  }
+}
+
+static void extract_edge_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  draw_subdiv_init_origindex_buffer(
+      vbo,
+      static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)),
+      subdiv_cache->num_subdiv_loops,
+      mr->edge_loose_len * 2);
+}
+
+static void extract_edge_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo);
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    vert_idx_data[offset] = loose_geom->edges[i];
+    vert_idx_data[offset + 1] = loose_geom->edges[i];
+    offset += 2;
+  }
+}
+
+static void extract_poly_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *UNUSED(mr),
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  draw_subdiv_init_origindex_buffer(
+      vbo, subdiv_cache->subdiv_loop_poly_index, subdiv_cache->num_subdiv_loops, 0);
+}
+
 constexpr MeshExtract create_extractor_poly_idx()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_select_idx_init;
   extractor.iter_poly_bm = extract_poly_idx_iter_poly_bm;
   extractor.iter_poly_mesh = extract_poly_idx_iter_poly_mesh;
+  extractor.init_subdiv = extract_poly_idx_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
@@ -217,6 +310,8 @@ constexpr MeshExtract create_extractor_edge_idx()
   extractor.iter_poly_mesh = extract_edge_idx_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_edge_idx_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_edge_idx_iter_ledge_mesh;
+  extractor.init_subdiv = extract_edge_idx_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edge_idx_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
@@ -234,6 +329,8 @@ constexpr MeshExtract create_extractor_vert_idx()
   extractor.iter_ledge_mesh = extract_vert_idx_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_vert_idx_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_vert_idx_iter_lvert_mesh;
+  extractor.init_subdiv = extract_vert_idx_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_vert_idx_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
index af279b08a59..6e9d8ef6926 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
@@ -23,6 +23,7 @@
 
 #include "BLI_string.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -31,25 +32,27 @@ namespace blender::draw {
 /** \name Extract UV  layers
  * \{ */
 
-static void extract_uv_init(const MeshRenderData *mr,
-                            struct MeshBatchCache *cache,
-                            void *buf,
-                            void *UNUSED(tls_data))
+/* Initialize the vertex format to be used for UVs. Return true if any UV layer is
+ * found, false otherwise. */
+static bool mesh_extract_uv_format_init(GPUVertFormat *format,
+                                        struct MeshBatchCache *cache,
+                                        CustomData *cd_ldata,
+                                        eMRExtractType extract_type,
+                                        uint32_t &r_uv_layers)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-  GPU_vertformat_deinterleave(&format);
+  GPU_vertformat_deinterleave(format);
 
-  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   uint32_t uv_layers = cache->cd_used.uv;
   /* HACK to fix T68857 */
-  if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
+  if (extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
     int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
     if (layer != -1) {
       uv_layers |= (1 << layer);
     }
   }
 
+  r_uv_layers = uv_layers;
+
   for (int i = 0; i < MAX_MTFACE; i++) {
     if (uv_layers & (1 << i)) {
       char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
@@ -58,30 +61,47 @@ static void extract_uv_init(const MeshRenderData *mr,
       GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
       /* UV layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "u%s", attr_safe_name);
-      GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+      GPU_vertformat_attr_add(format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
       /* Auto layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-      GPU_vertformat_alias_add(&format, attr_name);
+      GPU_vertformat_alias_add(format, attr_name);
       /* Active render layer name. */
       if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "u");
+        GPU_vertformat_alias_add(format, "u");
       }
       /* Active display layer name. */
       if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "au");
+        GPU_vertformat_alias_add(format, "au");
         /* Alias to `pos` for edit uvs. */
-        GPU_vertformat_alias_add(&format, "pos");
+        GPU_vertformat_alias_add(format, "pos");
       }
       /* Stencil mask uv layer name. */
       if (i == CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "mu");
+        GPU_vertformat_alias_add(format, "mu");
       }
     }
   }
 
+  if (format->attr_len == 0) {
+    GPU_vertformat_attr_add(format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    return false;
+  }
+
+  return true;
+}
+
+static void extract_uv_init(const MeshRenderData *mr,
+                            struct MeshBatchCache *cache,
+                            void *buf,
+                            void *UNUSED(tls_data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat format = {0};
+
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   int v_len = mr->loop_len;
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  uint32_t uv_layers = cache->cd_used.uv;
+  if (!mesh_extract_uv_format_init(&format, cache, cd_ldata, mr->extract_type, uv_layers)) {
     /* VBO will not be used, only allocate minimum of memory. */
     v_len = 1;
   }
@@ -116,10 +136,45 @@ static void extract_uv_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                   const MeshRenderData *UNUSED(mr),
+                                   struct MeshBatchCache *cache,
+                                   void *buffer,
+                                   void *UNUSED(data))
+{
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertFormat format = {0};
+
+  uint v_len = subdiv_cache->num_subdiv_loops;
+  uint uv_layers;
+  if (!mesh_extract_uv_format_init(
+          &format, cache, &coarse_mesh->ldata, MR_EXTRACT_MESH, uv_layers)) {
+    // TODO(kevindietrich): handle this more gracefully.
+    v_len = 1;
+  }
+
+  GPU_vertbuf_init_build_on_device(vbo, &format, v_len);
+
+  if (uv_layers == 0) {
+    return;
+  }
+
+  /* Index of the UV layer in the compact buffer. Used UV layers are stored in a single buffer. */
+  int pack_layer_index = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (uv_layers & (1 << i)) {
+      const int offset = (int)subdiv_cache->num_subdiv_loops * pack_layer_index++;
+      draw_subdiv_extract_uvs(subdiv_cache, vbo, i, offset);
+    }
+  }
+}
+
 constexpr MeshExtract create_extractor_uv()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_uv_init;
+  extractor.init_subdiv = extract_uv_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
index f8878eb2617..ea7810bcf6b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
@@ -25,6 +25,7 @@
 
 #include "BLI_string.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -33,17 +34,14 @@ namespace blender::draw {
 /** \name Extract VCol
  * \{ */
 
-static void extract_vcol_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data))
+/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */
+static void init_vcol_format(GPUVertFormat *format,
+                             const MeshBatchCache *cache,
+                             CustomData *cd_ldata)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-  GPU_vertformat_deinterleave(&format);
+  GPU_vertformat_deinterleave(format);
 
-  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
-  uint32_t vcol_layers = cache->cd_used.vcol;
+  const uint32_t vcol_layers = cache->cd_used.vcol;
 
   for (int i = 0; i < MAX_MCOL; i++) {
     if (vcol_layers & (1 << i)) {
@@ -52,31 +50,56 @@ static void extract_vcol_init(const MeshRenderData *mr,
       GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
       BLI_snprintf(attr_name, sizeof(attr_name), "c%s", attr_safe_name);
-      GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+      GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
       if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPCOL)) {
-        GPU_vertformat_alias_add(&format, "c");
+        GPU_vertformat_alias_add(format, "c");
       }
       if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL)) {
-        GPU_vertformat_alias_add(&format, "ac");
+        GPU_vertformat_alias_add(format, "ac");
       }
 
       /* Gather number of auto layers. */
       /* We only do `vcols` that are not overridden by `uvs`. */
       if (CustomData_get_named_layer_index(cd_ldata, CD_MLOOPUV, layer_name) == -1) {
         BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-        GPU_vertformat_alias_add(&format, attr_name);
+        GPU_vertformat_alias_add(format, attr_name);
       }
     }
   }
+}
+
+/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision
+ * case. */
+static GPUVertFormat *get_coarse_vcol_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "c");
+    GPU_vertformat_alias_add(&format, "ac");
+  }
+  return &format;
+}
+
+using gpuMeshVcol = struct gpuMeshVcol {
+  ushort r, g, b, a;
+};
+
+static void extract_vcol_init(const MeshRenderData *mr,
+                              struct MeshBatchCache *cache,
+                              void *buf,
+                              void *UNUSED(tls_data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat format = {0};
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
+  const uint32_t vcol_layers = cache->cd_used.vcol;
+  init_vcol_format(&format, cache, cd_ldata);
 
   GPU_vertbuf_init_with_format(vbo, &format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len);
 
-  using gpuMeshVcol = struct gpuMeshVcol {
-    ushort r, g, b, a;
-  };
-
   gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo);
 
   for (int i = 0; i < MAX_MCOL; i++) {
@@ -111,10 +134,64 @@ static void extract_vcol_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+
+  GPUVertFormat format = {0};
+  init_vcol_format(&format, cache, &coarse_mesh->ldata);
+
+  GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *src_data = GPU_vertbuf_calloc();
+  /* Dynamic as we upload and interpolate layers one at a time. */
+  GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC);
+
+  GPU_vertbuf_data_alloc(src_data, coarse_mesh->totloop);
+
+  gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data);
+
+  const CustomData *cd_ldata = &coarse_mesh->ldata;
+
+  const uint vcol_layers = cache->cd_used.vcol;
+
+  /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in
+   * a single buffer. */
+  int pack_layer_index = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (vcol_layers & (1 << i)) {
+      /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */
+      const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++;
+      const MLoopCol *mloopcol = (MLoopCol *)CustomData_get_layer_n(cd_ldata, CD_MLOOPCOL, i);
+
+      gpuMeshVcol *vcol = mesh_vcol;
+
+      for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, mloopcol++) {
+        vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
+        vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
+        vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
+        vcol->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
+      }
+
+      /* Ensure data is uploaded properly. */
+      GPU_vertbuf_tag_dirty(src_data);
+      draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset);
+    }
+  }
+
+  GPU_vertbuf_discard(src_data);
+}
+
 constexpr MeshExtract create_extractor_vcol()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_vcol_init;
+  extractor.init_subdiv = extract_vcol_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
index bdb1410a755..bb8853b8154 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
@@ -25,6 +25,7 @@
 
 #include "BKE_deform.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -167,10 +168,57 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                        const MeshRenderData *UNUSED(mr),
+                                        struct MeshBatchCache *cache,
+                                        void *buffer,
+                                        void *UNUSED(data))
+{
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "weight", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+  GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *coarse_weights = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(coarse_weights, &format);
+  GPU_vertbuf_data_alloc(coarse_weights, coarse_mesh->totloop);
+  float *coarse_weights_data = static_cast<float *>(GPU_vertbuf_get_data(coarse_weights));
+
+  const DRW_MeshWeightState *wstate = &cache->weight_state;
+  const MDeformVert *dverts = static_cast<const MDeformVert *>(
+      CustomData_get_layer(&coarse_mesh->vdata, CD_MDEFORMVERT));
+
+  for (int i = 0; i < coarse_mesh->totpoly; i++) {
+    const MPoly *mpoly = &coarse_mesh->mpoly[i];
+
+    for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
+         loop_index++) {
+      const MLoop *ml = &coarse_mesh->mloop[loop_index];
+
+      if (dverts != nullptr) {
+        const MDeformVert *dvert = &dverts[ml->v];
+        coarse_weights_data[loop_index] = evaluate_vertex_weight(dvert, wstate);
+      }
+      else {
+        coarse_weights_data[loop_index] = evaluate_vertex_weight(nullptr, wstate);
+      }
+    }
+  }
+
+  draw_subdiv_interp_custom_data(subdiv_cache, coarse_weights, vbo, 1, 0);
+
+  GPU_vertbuf_discard(coarse_weights);
+}
+
 constexpr MeshExtract create_extractor_weights()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_weights_init;
+  extractor.init_subdiv = extract_weights_init_subdiv;
   extractor.iter_poly_bm = extract_weights_iter_poly_bm;
   extractor.iter_poly_mesh = extract_weights_iter_poly_mesh;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
new file mode 100644
index 00000000000..36c3970d9a0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
@@ -0,0 +1,230 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly restrict buffer sourceBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint src_data[];
+#else
+  float src_data[];
+#endif
+};
+
+layout(std430, binding = 2) readonly restrict buffer facePTexOffset
+{
+  uint face_ptex_offset[];
+};
+
+layout(std430, binding = 3) readonly restrict buffer patchCoords
+{
+  BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 5) writeonly restrict buffer destBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint dst_data[];
+#else
+  float dst_data[];
+#endif
+};
+
+struct Vertex {
+  float vertex_data[DIMENSIONS];
+};
+
+void clear(inout Vertex v)
+{
+  for (int i = 0; i < DIMENSIONS; i++) {
+    v.vertex_data[i] = 0.0;
+  }
+}
+
+Vertex read_vertex(uint index)
+{
+  Vertex result;
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint base_index = index * 2;
+  if (DIMENSIONS == 4) {
+    uint xy = src_data[base_index];
+    uint zw = src_data[base_index + 1];
+
+    float x = float((xy >> 16) & 0xffff) / 65535.0;
+    float y = float(xy & 0xffff) / 65535.0;
+    float z = float((zw >> 16) & 0xffff) / 65535.0;
+    float w = float(zw & 0xffff) / 65535.0;
+
+    result.vertex_data[0] = x;
+    result.vertex_data[1] = y;
+    result.vertex_data[2] = z;
+    result.vertex_data[3] = w;
+  }
+  else {
+    /* This case is unsupported for now. */
+    clear(result);
+  }
+#else
+  uint base_index = index * DIMENSIONS;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    result.vertex_data[i] = src_data[base_index + i];
+  }
+#endif
+  return result;
+}
+
+void write_vertex(uint index, Vertex v)
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint base_index = dst_offset + index * 2;
+  if (DIMENSIONS == 4) {
+    uint x = uint(v.vertex_data[0] * 65535.0);
+    uint y = uint(v.vertex_data[1] * 65535.0);
+    uint z = uint(v.vertex_data[2] * 65535.0);
+    uint w = uint(v.vertex_data[3] * 65535.0);
+
+    uint xy = x << 16 | y;
+    uint zw = z << 16 | w;
+
+    dst_data[base_index] = xy;
+    dst_data[base_index + 1] = zw;
+  }
+  else {
+    /* This case is unsupported for now. */
+    dst_data[base_index] = 0;
+  }
+#else
+  uint base_index = dst_offset + index * DIMENSIONS;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    dst_data[base_index + i] = v.vertex_data[i];
+  }
+#endif
+}
+
+Vertex interp_vertex(Vertex v0, Vertex v1, Vertex v2, Vertex v3, vec2 uv)
+{
+  Vertex result;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    float e = mix(v0.vertex_data[i], v1.vertex_data[i], uv.x);
+    float f = mix(v2.vertex_data[i], v3.vertex_data[i], uv.x);
+    result.vertex_data[i] = mix(e, f, uv.y);
+  }
+  return result;
+}
+
+void add_with_weight(inout Vertex v0, Vertex v1, float weight)
+{
+  for (int i = 0; i < DIMENSIONS; i++) {
+    v0.vertex_data[i] += v1.vertex_data[i] * weight;
+  }
+}
+
+Vertex average(Vertex v0, Vertex v1)
+{
+  Vertex result;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    result.vertex_data[i] = (v0.vertex_data[i] + v1.vertex_data[i]) * 0.5;
+  }
+  return result;
+}
+
+uint get_vertex_count(uint coarse_polygon)
+{
+  uint number_of_patches = face_ptex_offset[coarse_polygon + 1] - face_ptex_offset[coarse_polygon];
+  if (number_of_patches == 1) {
+    /* If there is only one patch for the current coarse polygon, then it is a quad. */
+    return 4;
+  }
+  /* Otherwise, the number of patches is the number of vertices. */
+  return number_of_patches;
+}
+
+uint get_polygon_corner_index(uint coarse_polygon, uint patch_index)
+{
+  uint patch_offset = face_ptex_offset[coarse_polygon];
+  return patch_index - patch_offset;
+}
+
+uint get_loop_start(uint coarse_polygon)
+{
+  return extra_coarse_face_data[coarse_polygon] & coarse_face_loopstart_mask;
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  /* Find which coarse polygon we came from. */
+  uint coarse_polygon = coarse_polygon_index_from_subdiv_quad_index(quad_index, coarse_poly_count);
+  uint loop_start = get_loop_start(coarse_polygon);
+
+  /* Find the number of vertices for the coarse polygon. */
+  Vertex v0, v1, v2, v3;
+  clear(v0);
+  clear(v1);
+  clear(v2);
+  clear(v3);
+
+  uint number_of_vertices = get_vertex_count(coarse_polygon);
+  if (number_of_vertices == 4) {
+    /* Interpolate the src data. */
+    v0 = read_vertex(loop_start + 0);
+    v1 = read_vertex(loop_start + 1);
+    v2 = read_vertex(loop_start + 2);
+    v3 = read_vertex(loop_start + 3);
+  }
+  else {
+    /* Interpolate the src data for the center. */
+    uint loop_end = loop_start + number_of_vertices - 1;
+    Vertex center_value;
+    clear(center_value);
+
+    float weight = 1.0 / float(number_of_vertices);
+
+    for (uint l = loop_start; l < loop_end; l++) {
+      add_with_weight(center_value, read_vertex(l), weight);
+    }
+
+    /* Interpolate between the previous and next corner for the middle values for the edges. */
+    uint patch_index = uint(patch_coords[start_loop_index].patch_index);
+    uint current_coarse_corner = get_polygon_corner_index(coarse_polygon, patch_index);
+    uint next_coarse_corner = (current_coarse_corner + 1) % number_of_vertices;
+    uint prev_coarse_corner = (current_coarse_corner + number_of_vertices - 1) %
+                              number_of_vertices;
+
+    v0 = read_vertex(loop_start);
+    v1 = average(v0, read_vertex(loop_start + next_coarse_corner));
+    v3 = average(v0, read_vertex(loop_start + prev_coarse_corner));
+
+    /* Interpolate between the current value, and the ones for the center and mid-edges. */
+    v2 = center_value;
+  }
+
+  /* Do a linear interpolation of the data based on the UVs for each loop of this subdivided quad.
+   */
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    BlenderPatchCoord co = patch_coords[loop_index];
+    vec2 uv = decode_uv(co.encoded_uv);
+    /* NOTE: v2 and v3 are reversed to stay consistent with the interpolation weight on the x-axis:
+     *
+     * v3 +-----+ v2
+     *    |     |
+     *    |     |
+     * v0 +-----+ v1
+     *
+     * otherwise, weight would be `1.0 - uv.x` for `v2 <-> v3`, but `uv.x` for `v0 <-> v1`.
+     */
+    Vertex result = interp_vertex(v0, v1, v3, v2, uv);
+    write_vertex(loop_index, result);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
new file mode 100644
index 00000000000..f11c0f6427e
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
@@ -0,0 +1,57 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputEdgeOrigIndex
+{
+  int input_origindex[];
+};
+
+layout(std430, binding = 1) writeonly buffer outputLinesIndices
+{
+  uint output_lines[];
+};
+
+#ifndef LINES_LOOSE
+void emit_line(uint line_offset, uint start_loop_index, uint corner_index)
+{
+  uint vertex_index = start_loop_index + corner_index;
+
+  if (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display) {
+    output_lines[line_offset + 0] = 0xffffffff;
+    output_lines[line_offset + 1] = 0xffffffff;
+  }
+  else {
+    /* Mod 4 so we loop back at the first vertex on the last loop index (3). */
+    uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+
+    output_lines[line_offset + 0] = vertex_index;
+    output_lines[line_offset + 1] = next_vertex_index;
+  }
+}
+#endif
+
+void main()
+{
+  uint index = get_global_invocation_index();
+  if (index >= total_dispatch_size) {
+    return;
+  }
+
+#ifdef LINES_LOOSE
+  /* In the loose lines case, we execute for each line, with two vertices per line. */
+  uint line_offset = edge_loose_offset + index * 2;
+  uint loop_index = num_subdiv_loops + index * 2;
+  output_lines[line_offset] = loop_index;
+  output_lines[line_offset + 1] = loop_index + 1;
+#else
+  /* We execute for each quad, so the start index of the loop is quad_index * 4. */
+  uint start_loop_index = index * 4;
+  /* We execute for each quad, so the start index of the line is quad_index * 8 (with 2 vertices
+   * per line). */
+  uint start_line_index = index * 8;
+
+  for (int i = 0; i < 4; i++) {
+    emit_line(start_line_index + i * 2, start_loop_index, i);
+  }
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
new file mode 100644
index 00000000000..3257ebdae17
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -0,0 +1,43 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Generate triangles from subdivision quads indices. */
+
+layout(std430, binding = 1) writeonly buffer outputTriangles
+{
+  uint output_tris[];
+};
+
+#ifndef SINGLE_MATERIAL
+layout(std430, binding = 2) readonly buffer inputPolygonMatOffset
+{
+  int polygon_mat_offset[];
+};
+#endif
+
+void main()
+{
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint loop_index = quad_index * 4;
+
+#ifdef SINGLE_MATERIAL
+  uint triangle_loop_index = quad_index * 6;
+#else
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+  int mat_offset = polygon_mat_offset[coarse_quad_index];
+
+  int triangle_loop_index = (int(quad_index) + mat_offset) * 6;
+#endif
+
+  output_tris[triangle_loop_index + 0] = loop_index + 0;
+  output_tris[triangle_loop_index + 1] = loop_index + 1;
+  output_tris[triangle_loop_index + 2] = loop_index + 2;
+  output_tris[triangle_loop_index + 3] = loop_index + 0;
+  output_tris[triangle_loop_index + 4] = loop_index + 2;
+  output_tris[triangle_loop_index + 5] = loop_index + 3;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
new file mode 100644
index 00000000000..005561964b8
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
@@ -0,0 +1,176 @@
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+/* Uniform block for #DRWSubivUboStorage. */
+layout(std140) uniform shader_data
+{
+  /* Offsets in the buffers data where the source and destination data start. */
+  int src_offset;
+  int dst_offset;
+
+  /* Parameters for the DRWPatchMap. */
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+
+  /* Coarse topology information. */
+  int coarse_poly_count;
+  uint edge_loose_offset;
+
+  /* Subdiv topology information. */
+  uint num_subdiv_loops;
+
+  /* Subdivision settings. */
+  bool optimal_display;
+
+  /* Sculpt data. */
+  bool has_sculpt_mask;
+
+  /* Masks for the extra coarse face data. */
+  uint coarse_face_select_mask;
+  uint coarse_face_smooth_mask;
+  uint coarse_face_active_mask;
+  uint coarse_face_loopstart_mask;
+
+  /* Total number of elements to process. */
+  uint total_dispatch_size;
+};
+
+uint get_global_invocation_index()
+{
+  uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
+  return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
+}
+
+/* Structure for #CompressedPatchCoord. */
+struct BlenderPatchCoord {
+  int patch_index;
+  uint encoded_uv;
+};
+
+vec2 decode_uv(uint encoded_uv)
+{
+  float u = float((encoded_uv >> 16) & 0xFFFFu) / 65535.0;
+  float v = float(encoded_uv & 0xFFFFu) / 65535.0;
+  return vec2(u, v);
+}
+
+/* This structure is a carbon copy of OpenSubDiv's PatchTable::PatchHandle. */
+struct PatchHandle {
+  int array_index;
+  int patch_index;
+  int vertex_index;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord. */
+struct PatchCoord {
+  int array_index;
+  int patch_index;
+  int vertex_index;
+  float u;
+  float v;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord.QuadNode.
+ * Each child is a bitfield. */
+struct QuadNode {
+  uvec4 child;
+};
+
+bool is_set(uint i)
+{
+  /* QuadNode.Child.isSet is the first bit of the bitfield. */
+  return (i & 0x1u) != 0;
+}
+
+bool is_leaf(uint i)
+{
+  /* QuadNode.Child.isLeaf is the second bit of the bitfield. */
+  return (i & 0x2u) != 0;
+}
+
+uint get_index(uint i)
+{
+  /* QuadNode.Child.index is made of the remaining bits. */
+  return (i >> 2) & 0x3FFFFFFFu;
+}
+
+/* Duplicate of #PosNorLoop from the mesh extract CPU code.
+ * We do not use a vec3 for the position as it will be padded to a vec4 which is incompatible with
+ * the format.  */
+struct PosNorLoop {
+  float x, y, z;
+  /* TODO(kevindietrich) : figure how to compress properly as GLSL does not have char/short types,
+   * bit operations get tricky. */
+  float nx, ny, nz;
+  float flag;
+};
+
+vec3 get_vertex_pos(PosNorLoop vertex_data)
+{
+  return vec3(vertex_data.x, vertex_data.y, vertex_data.z);
+}
+
+vec3 get_vertex_nor(PosNorLoop vertex_data)
+{
+  return vec3(vertex_data.nx, vertex_data.ny, vertex_data.nz);
+}
+
+void set_vertex_pos(inout PosNorLoop vertex_data, vec3 pos)
+{
+  vertex_data.x = pos.x;
+  vertex_data.y = pos.y;
+  vertex_data.z = pos.z;
+}
+
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor, uint flag)
+{
+  vertex_data.nx = nor.x;
+  vertex_data.ny = nor.y;
+  vertex_data.nz = nor.z;
+  vertex_data.flag = float(flag);
+}
+
+/* Set the vertex normal but preserve the existing flag. This is for when we compute manually the
+ * vertex normals when we cannot use the limit surface, in which case the flag and the normal are
+ * set by two separate compute pass. */
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor)
+{
+  set_vertex_nor(vertex_data, nor, 0);
+}
+
+#define ORIGINDEX_NONE -1
+
+#ifdef SUBDIV_POLYGON_OFFSET
+layout(std430, binding = 0) readonly buffer inputSubdivPolygonOffset
+{
+  uint subdiv_polygon_offset[];
+};
+
+/* Given the index of the subdivision quad, return the index of the corresponding coarse polygon.
+ * This uses subdiv_polygon_offset and since it is a growing list of offsets, we can use binary
+ * search to locate the right index. */
+uint coarse_polygon_index_from_subdiv_quad_index(uint subdiv_quad_index, uint coarse_poly_count)
+{
+  uint first = 0;
+  uint last = coarse_poly_count;
+
+  while (first != last) {
+    uint middle = (first + last) / 2;
+
+    if (subdiv_polygon_offset[middle] < subdiv_quad_index) {
+      first = middle + 1;
+    }
+    else {
+      last = middle;
+    }
+  }
+
+  if (subdiv_polygon_offset[first] == subdiv_quad_index) {
+    return first;
+  }
+
+  return first - 1;
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
new file mode 100644
index 00000000000..575090472b1
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
@@ -0,0 +1,56 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer faceAdjacencyOffsets
+{
+  uint face_adjacency_offsets[];
+};
+
+layout(std430, binding = 2) readonly buffer faceAdjacencyLists
+{
+  uint face_adjacency_lists[];
+};
+
+layout(std430, binding = 3) writeonly buffer vertexNormals
+{
+  vec3 normals[];
+};
+
+void main()
+{
+  uint vertex_index = get_global_invocation_index();
+  if (vertex_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint first_adjacent_face_offset = face_adjacency_offsets[vertex_index];
+  uint number_of_adjacent_faces = face_adjacency_offsets[vertex_index + 1] -
+                                  first_adjacent_face_offset;
+
+  vec3 accumulated_normal = vec3(0.0);
+
+  /* For each adjacent face. */
+  for (uint i = 0; i < number_of_adjacent_faces; i++) {
+    uint adjacent_face = face_adjacency_lists[first_adjacent_face_offset + i];
+    uint start_loop_index = adjacent_face * 4;
+
+    /* Compute face normal. */
+    vec3 adjacent_verts[3];
+    for (uint j = 0; j < 3; j++) {
+      adjacent_verts[j] = get_vertex_pos(pos_nor[start_loop_index + j]);
+    }
+
+    vec3 face_normal = normalize(
+        cross(adjacent_verts[1] - adjacent_verts[0], adjacent_verts[2] - adjacent_verts[0]));
+    accumulated_normal += face_normal;
+  }
+
+  float weight = 1.0 / float(number_of_adjacent_faces);
+  vec3 normal = normalize(accumulated_normal);
+  normals[vertex_index] = normal;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
new file mode 100644
index 00000000000..84cd65d4161
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
@@ -0,0 +1,34 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputNormals
+{
+  vec3 vertex_normals[];
+};
+
+layout(std430, binding = 1) readonly buffer inputSubdivVertLoopMap
+{
+  uint vert_loop_map[];
+};
+
+layout(std430, binding = 2) buffer outputPosNor
+{
+  PosNorLoop pos_nor[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (int i = 0; i < 4; i++) {
+    uint subdiv_vert_index = vert_loop_map[start_loop_index + i];
+    vec3 nor = vertex_normals[subdiv_vert_index];
+    set_vertex_nor(pos_nor[start_loop_index + i], nor);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
new file mode 100644
index 00000000000..5dd7decf663
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
@@ -0,0 +1,416 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Source buffer. */
+layout(std430, binding = 0) buffer src_buffer
+{
+  float srcVertexBuffer[];
+};
+
+/* #DRWPatchMap */
+layout(std430, binding = 1) readonly buffer inputPatchHandles
+{
+  PatchHandle input_patch_handles[];
+};
+
+layout(std430, binding = 2) readonly buffer inputQuadNodes
+{
+  QuadNode quad_nodes[];
+};
+
+layout(std430, binding = 3) readonly buffer inputPatchCoords
+{
+  BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly buffer inputVertOrigIndices
+{
+  int input_vert_origindex[];
+};
+
+/* Patch buffers. */
+layout(std430, binding = 5) buffer patchArray_buffer
+{
+  OsdPatchArray patchArrayBuffer[];
+};
+
+layout(std430, binding = 6) buffer patchIndex_buffer
+{
+  int patchIndexBuffer[];
+};
+
+layout(std430, binding = 7) buffer patchParam_buffer
+{
+  OsdPatchParam patchParamBuffer[];
+};
+
+  /* Output buffer(s). */
+
+#if defined(FVAR_EVALUATION)
+layout(std430, binding = 8) writeonly buffer outputFVarData
+{
+  vec2 output_fvar[];
+};
+#elif defined(FDOTS_EVALUATION)
+/* For face dots, we build the position, normals, and index buffers in one go. */
+
+/* vec3 is padded to vec4, but the format used for fdots does not have any padding. */
+struct FDotVert {
+  float x, y, z;
+};
+
+/* Same here, do not use vec3. */
+struct FDotNor {
+  float x, y, z;
+  float flag;
+};
+
+layout(std430, binding = 8) writeonly buffer outputVertices
+{
+  FDotVert output_verts[];
+};
+
+layout(std430, binding = 9) writeonly buffer outputNormals
+{
+  FDotNor output_nors[];
+};
+
+layout(std430, binding = 10) writeonly buffer outputFdotsIndices
+{
+  uint output_indices[];
+};
+
+layout(std430, binding = 11) readonly buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+#else
+layout(std430, binding = 8) writeonly buffer outputVertexData
+{
+  PosNorLoop output_verts[];
+};
+#endif
+
+vec2 read_vec2(int index)
+{
+  vec2 result;
+  result.x = srcVertexBuffer[index * 2];
+  result.y = srcVertexBuffer[index * 2 + 1];
+  return result;
+}
+
+vec3 read_vec3(int index)
+{
+  vec3 result;
+  result.x = srcVertexBuffer[index * 3];
+  result.y = srcVertexBuffer[index * 3 + 1];
+  result.z = srcVertexBuffer[index * 3 + 2];
+  return result;
+}
+
+OsdPatchArray GetPatchArray(int arrayIndex)
+{
+  return patchArrayBuffer[arrayIndex];
+}
+
+OsdPatchParam GetPatchParam(int patchIndex)
+{
+  return patchParamBuffer[patchIndex];
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch Coordinate lookup. Return an OsdPatchCoord for the given patch_index and uvs.
+ * This code is a port of the OpenSubdiv PatchMap lookup code.
+ */
+
+PatchHandle bogus_patch_handle()
+{
+  PatchHandle ret;
+  ret.array_index = -1;
+  ret.vertex_index = -1;
+  ret.patch_index = -1;
+  return ret;
+}
+
+int transformUVToQuadQuadrant(float median, inout float u, inout float v)
+{
+  int uHalf = (u >= median) ? 1 : 0;
+  if (uHalf != 0)
+    u -= median;
+
+  int vHalf = (v >= median) ? 1 : 0;
+  if (vHalf != 0)
+    v -= median;
+
+  return (vHalf << 1) | uHalf;
+}
+
+int transformUVToTriQuadrant(float median, inout float u, inout float v, inout bool rotated)
+{
+
+  if (!rotated) {
+    if (u >= median) {
+      u -= median;
+      return 1;
+    }
+    if (v >= median) {
+      v -= median;
+      return 2;
+    }
+    if ((u + v) >= median) {
+      rotated = true;
+      return 3;
+    }
+    return 0;
+  }
+  else {
+    if (u < median) {
+      v -= median;
+      return 1;
+    }
+    if (v < median) {
+      u -= median;
+      return 2;
+    }
+    u -= median;
+    v -= median;
+    if ((u + v) < median) {
+      rotated = false;
+      return 3;
+    }
+    return 0;
+  }
+}
+
+PatchHandle find_patch(int face_index, float u, float v)
+{
+  if (face_index < min_patch_face || face_index > max_patch_face) {
+    return bogus_patch_handle();
+  }
+
+  QuadNode node = quad_nodes[face_index - min_patch_face];
+
+  if (!is_set(node.child[0])) {
+    return bogus_patch_handle();
+  }
+
+  float median = 0.5;
+  bool tri_rotated = false;
+
+  for (int depth = 0; depth <= max_depth; ++depth, median *= 0.5) {
+    int quadrant = (patches_are_triangular != 0) ?
+                       transformUVToTriQuadrant(median, u, v, tri_rotated) :
+                       transformUVToQuadQuadrant(median, u, v);
+
+    if (is_leaf(node.child[quadrant])) {
+      return input_patch_handles[get_index(node.child[quadrant])];
+    }
+
+    node = quad_nodes[get_index(node.child[quadrant])];
+  }
+}
+
+OsdPatchCoord bogus_patch_coord(int face_index, float u, float v)
+{
+  OsdPatchCoord coord;
+  coord.arrayIndex = 0;
+  coord.patchIndex = face_index;
+  coord.vertIndex = 0;
+  coord.s = u;
+  coord.t = v;
+  return coord;
+}
+
+OsdPatchCoord GetPatchCoord(int face_index, float u, float v)
+{
+  PatchHandle patch_handle = find_patch(face_index, u, v);
+
+  if (patch_handle.array_index == -1) {
+    return bogus_patch_coord(face_index, u, v);
+  }
+
+  OsdPatchCoord coord;
+  coord.arrayIndex = patch_handle.array_index;
+  coord.patchIndex = patch_handle.patch_index;
+  coord.vertIndex = patch_handle.vertex_index;
+  coord.s = u;
+  coord.t = v;
+  return coord;
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch evaluation. Note that the 1st and 2nd derivatives are always computed, although we
+ * only return and use the 1st derivatives if adaptive patches are used. This could
+ * perhaps be optimized.
+ */
+
+#if defined(FVAR_EVALUATION)
+void evaluate_patches_limits(int patch_index, float u, float v, inout vec2 dst)
+{
+  OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+  OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+  OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+  int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+  float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+  int nPoints = OsdEvaluatePatchBasis(
+      patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+  int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+  for (int cv = 0; cv < nPoints; ++cv) {
+    int index = patchIndexBuffer[indexBase + cv];
+    vec2 src_fvar = read_vec2(src_offset + index);
+    dst += src_fvar * wP[cv];
+  }
+}
+#else
+void evaluate_patches_limits(
+    int patch_index, float u, float v, inout vec3 dst, inout vec3 du, inout vec3 dv)
+{
+  OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+  OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+  OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+  int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+  float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+  int nPoints = OsdEvaluatePatchBasis(
+      patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+  int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+  for (int cv = 0; cv < nPoints; ++cv) {
+    int index = patchIndexBuffer[indexBase + cv];
+    vec3 src_vertex = read_vec3(index);
+
+    dst += src_vertex * wP[cv];
+    du += src_vertex * wDu[cv];
+    dv += src_vertex * wDv[cv];
+  }
+}
+#endif
+
+/* ------------------------------------------------------------------------------
+ * Entry point.
+ */
+
+#if defined(FVAR_EVALUATION)
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    vec2 fvar = vec2(0.0);
+
+    BlenderPatchCoord patch_co = patch_coords[loop_index];
+    vec2 uv = decode_uv(patch_co.encoded_uv);
+
+    evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, fvar);
+    output_fvar[dst_offset + loop_index] = fvar;
+  }
+}
+#elif defined(FDOTS_EVALUATION)
+bool is_face_selected(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0;
+}
+
+bool is_face_active(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_active_mask) != 0;
+}
+
+float get_face_flag(uint coarse_quad_index)
+{
+  if (is_face_active(coarse_quad_index)) {
+    return -1.0;
+  }
+
+  if (is_face_selected(coarse_quad_index)) {
+    return 1.0;
+  }
+
+  return 0.0;
+}
+
+void main()
+{
+  /* We execute for each coarse quad. */
+  uint coarse_quad_index = get_global_invocation_index();
+  if (coarse_quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  BlenderPatchCoord patch_co = patch_coords[coarse_quad_index];
+  vec2 uv = decode_uv(patch_co.encoded_uv);
+
+  vec3 pos = vec3(0.0);
+  vec3 du = vec3(0.0);
+  vec3 dv = vec3(0.0);
+  evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+  vec3 nor = normalize(cross(du, dv));
+
+  FDotVert vert;
+  vert.x = pos.x;
+  vert.y = pos.y;
+  vert.z = pos.z;
+
+  FDotNor fnor;
+  fnor.x = nor.x;
+  fnor.y = nor.y;
+  fnor.z = nor.z;
+  fnor.flag = get_face_flag(coarse_quad_index);
+
+  output_verts[coarse_quad_index] = vert;
+  output_nors[coarse_quad_index] = fnor;
+  output_indices[coarse_quad_index] = coarse_quad_index;
+}
+#else
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    vec3 pos = vec3(0.0);
+    vec3 du = vec3(0.0);
+    vec3 dv = vec3(0.0);
+
+    BlenderPatchCoord patch_co = patch_coords[loop_index];
+    vec2 uv = decode_uv(patch_co.encoded_uv);
+
+    evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+
+#  if defined(LIMIT_NORMALS)
+    vec3 nor = normalize(cross(du, dv));
+#  else
+    /* This will be computed later. */
+    vec3 nor = vec3(0.0);
+#  endif
+
+    int origindex = input_vert_origindex[loop_index];
+    uint flag = 0;
+    if (origindex == -1) {
+      flag = -1;
+    }
+
+    PosNorLoop vertex_data;
+    set_vertex_pos(vertex_data, pos);
+    set_vertex_nor(vertex_data, nor, flag);
+    output_verts[loop_index] = vertex_data;
+  }
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
new file mode 100644
index 00000000000..6c76cd41ca4
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
@@ -0,0 +1,97 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputEdgeIndex
+{
+  uint input_edge_index[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputEdgeFactors
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  float output_edge_fac[];
+#else
+  uint output_edge_fac[];
+#endif
+};
+
+void write_vec4(uint index, vec4 edge_facs)
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  for (uint i = 0; i < 4; i++) {
+    output_edge_fac[index + i] = edge_facs[i];
+  }
+#else
+  /* Use same scaling as in extract_edge_fac_iter_poly_mesh. */
+  uint a = uint(clamp(edge_facs.x * 253.0 + 1.0, 0.0, 255.0));
+  uint b = uint(clamp(edge_facs.y * 253.0 + 1.0, 0.0, 255.0));
+  uint c = uint(clamp(edge_facs.z * 253.0 + 1.0, 0.0, 255.0));
+  uint d = uint(clamp(edge_facs.w * 253.0 + 1.0, 0.0, 255.0));
+  uint packed_edge_fac = a << 24 | b << 16 | c << 8 | d;
+  output_edge_fac[index] = packed_edge_fac;
+#endif
+}
+
+/* From extract_mesh_vbo_edge_fac.cc, keep in sync! */
+float loop_edge_factor_get(vec3 f_no, vec3 v_co, vec3 v_no, vec3 v_next_co)
+{
+  vec3 evec = v_next_co - v_co;
+  vec3 enor = normalize(cross(v_no, evec));
+  float d = abs(dot(enor, f_no));
+  /* Re-scale to the slider range. */
+  d *= (1.0 / 0.065);
+  return clamp(d, 0.0, 1.0);
+}
+
+float compute_line_factor(uint start_loop_index, uint corner_index, vec3 face_normal)
+{
+  uint vertex_index = start_loop_index + corner_index;
+  uint edge_index = input_edge_index[vertex_index];
+
+  if (edge_index == -1 && optimal_display) {
+    return 0.0;
+  }
+
+  /* Mod 4 so we loop back at the first vertex on the last loop index (3), but only the corner
+   * index needs to be wrapped. */
+  uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+  vec3 vertex_pos = get_vertex_pos(pos_nor[vertex_index]);
+  vec3 vertex_nor = get_vertex_nor(pos_nor[vertex_index]);
+  vec3 next_vertex_pos = get_vertex_pos(pos_nor[next_vertex_index]);
+  return loop_edge_factor_get(face_normal, vertex_pos, vertex_nor, next_vertex_pos);
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  /* First compute the face normal, we need it to compute the bihedral edge angle. */
+  vec3 v0 = get_vertex_pos(pos_nor[start_loop_index + 0]);
+  vec3 v1 = get_vertex_pos(pos_nor[start_loop_index + 1]);
+  vec3 v2 = get_vertex_pos(pos_nor[start_loop_index + 2]);
+  vec3 face_normal = normalize(cross(v1 - v0, v2 - v0));
+
+  vec4 edge_facs = vec4(0.0);
+  for (int i = 0; i < 4; i++) {
+    edge_facs[i] = compute_line_factor(start_loop_index, i, face_normal);
+  }
+
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  write_vec4(start_loop_index, edge_facs);
+#else
+  /* When packed into bytes, the index is the same as for the quad. */
+  write_vec4(quad_index, edge_facs);
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
new file mode 100644
index 00000000000..ea73b9482d3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
@@ -0,0 +1,80 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVerts
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputUVs
+{
+  vec2 uvs[];
+};
+
+/* Mirror of #UVStretchAngle in the C++ code, but using floats until proper data compression
+ * is implemented for all subdivision data. */
+struct UVStretchAngle {
+  float angle;
+  float uv_angle0;
+  float uv_angle1;
+};
+
+layout(std430, binding = 2) writeonly buffer outputStretchAngles
+{
+  UVStretchAngle uv_stretches[];
+};
+
+#define M_PI 3.1415926535897932
+#define M_1_PI 0.31830988618379067154
+
+/* Adapted from BLI_math_vector.h */
+float angle_normalized_v3v3(vec3 v1, vec3 v2)
+{
+  /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */
+  bool q = (dot(v1, v2) >= 0.0);
+  vec3 v = (q) ? (v1 - v2) : (v1 + v2);
+  float a = 2.0 * asin(length(v) / 2.0);
+  return (q) ? a : M_PI - a;
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint i = 0; i < 4; i++) {
+    uint cur_loop_index = start_loop_index + i;
+    uint next_loop_index = start_loop_index + (i + 1) % 4;
+    uint prev_loop_index = start_loop_index + (i + 3) % 4;
+
+    /* Compute 2d edge vectors from UVs. */
+    vec2 cur_uv = uvs[src_offset + cur_loop_index];
+    vec2 next_uv = uvs[src_offset + next_loop_index];
+    vec2 prev_uv = uvs[src_offset + prev_loop_index];
+
+    vec2 norm_uv_edge0 = normalize(prev_uv - cur_uv);
+    vec2 norm_uv_edge1 = normalize(cur_uv - next_uv);
+
+    /* Compute 3d edge vectors from positions. */
+    vec3 cur_pos = get_vertex_pos(pos_nor[cur_loop_index]);
+    vec3 next_pos = get_vertex_pos(pos_nor[next_loop_index]);
+    vec3 prev_pos = get_vertex_pos(pos_nor[prev_loop_index]);
+
+    vec3 norm_pos_edge0 = normalize(prev_pos - cur_pos);
+    vec3 norm_pos_edge1 = normalize(cur_pos - next_pos);
+
+    /* Compute stretches, this logic is adapted from #edituv_get_edituv_stretch_angle.
+     * Keep in sync! */
+    UVStretchAngle stretch;
+    stretch.uv_angle0 = atan(norm_uv_edge0.y, norm_uv_edge0.x) * M_1_PI;
+    stretch.uv_angle1 = atan(norm_uv_edge1.y, norm_uv_edge1.x) * M_1_PI;
+    stretch.angle = angle_normalized_v3v3(norm_pos_edge0, norm_pos_edge1) * M_1_PI;
+
+    uv_stretches[cur_loop_index] = stretch;
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
new file mode 100644
index 00000000000..e897fb3f3c0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
@@ -0,0 +1,31 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputCoarseData
+{
+  float coarse_stretch_area[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputSubdivData
+{
+  float subdiv_stretch_area[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+
+  for (int i = 0; i < 4; i++) {
+    subdiv_stretch_area[start_loop_index + i] = coarse_stretch_area[coarse_quad_index];
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
new file mode 100644
index 00000000000..41a8df3cf82
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
@@ -0,0 +1,52 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 2) readonly buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 3) writeonly buffer outputLoopNormals
+{
+  vec3 output_lnor[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+
+  if ((extra_coarse_face_data[coarse_quad_index] & coarse_face_smooth_mask) != 0) {
+    /* Face is smooth, use vertex normals. */
+    for (int i = 0; i < 4; i++) {
+      PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i];
+      output_lnor[start_loop_index + i] = get_vertex_nor(pos_nor_loop);
+    }
+  }
+  else {
+    /* Face is flat shaded, compute flat face normal from an inscribed triangle. */
+    vec3 verts[3];
+    for (int i = 0; i < 3; i++) {
+      verts[i] = get_vertex_pos(pos_nor[start_loop_index + i]);
+    }
+
+    vec3 face_normal = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+    for (int i = 0; i < 4; i++) {
+      output_lnor[start_loop_index + i] = face_normal;
+    }
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
new file mode 100644
index 00000000000..7182ce57ad3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
@@ -0,0 +1,47 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+struct SculptData {
+  uint face_set_color;
+  float mask;
+};
+
+layout(std430, binding = 0) readonly restrict buffer sculptMask
+{
+  float sculpt_mask[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer faceSetColor
+{
+  uint face_set_color[];
+};
+
+layout(std430, binding = 2) writeonly restrict buffer sculptData
+{
+  SculptData sculpt_data[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    SculptData data;
+    data.face_set_color = face_set_color[loop_index];
+
+    if (has_sculpt_mask) {
+      data.mask = sculpt_mask[loop_index];
+    }
+    else {
+      data.mask = 0.0;
+    }
+
+    sculpt_data[loop_index] = data;
+  }
+}
author	Kévin Dietrich <kevin.dietrich@mailoo.org>	2021-12-27 18:34:47 +0300
committer	Kévin Dietrich <kevin.dietrich@mailoo.org>	2021-12-27 18:35:54 +0300
commit	eed45d2a239a2a18a2420ba15dfb55e0f8dc5630 (patch)
tree	aa55ce966caa8e28db4853d7d755003ed249805b /source/blender/draw
parent	31e120ef4997583332aa9b5af93521e7e666e9f3 (diff)