84 files changed, 5799 insertions, 200 deletions
diff --git a/source/blender/blenkernel/BKE_mesh_wrapper.h b/source/blender/blenkernel/BKE_mesh_wrapper.h
index 2fe264fd0f7..12e8fd71503 100644
--- a/source/blender/blenkernel/BKE_mesh_wrapper.h
+++ b/source/blender/blenkernel/BKE_mesh_wrapper.h
@@ -22,6 +22,7 @@
 struct BMEditMesh;
 struct CustomData_MeshMasks;
 struct Mesh;
+struct Object;
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,6 +52,8 @@ void BKE_mesh_wrapper_vert_coords_copy_with_mat4(const struct Mesh *me,
                                                  int vert_coords_len,
                                                  const float mat[4][4]);
 
+struct Mesh *BKE_mesh_wrapper_ensure_subdivision(const struct Object *ob, struct Mesh *me);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/blenkernel/BKE_object.h b/source/blender/blenkernel/BKE_object.h
index 03565bd3bda..a7d39598e54 100644
--- a/source/blender/blenkernel/BKE_object.h
+++ b/source/blender/blenkernel/BKE_object.h
@@ -48,6 +48,7 @@ struct RegionView3D;
 struct RigidBodyWorld;
 struct Scene;
 struct ShaderFxData;
+struct SubsurfModifierData;
 struct View3D;
 struct ViewLayer;
 
@@ -512,6 +513,7 @@ bool BKE_object_obdata_texspace_get(struct Object *ob,
                                     float **r_loc,
                                     float **r_size);
 
+struct Mesh *BKE_object_get_evaluated_mesh_no_subsurf(const struct Object *object);
 /** Get evaluated mesh for given object. */
 struct Mesh *BKE_object_get_evaluated_mesh(const struct Object *object);
 /**
@@ -712,6 +714,15 @@ void BKE_object_modifiers_lib_link_common(void *userData,
                                           struct ID **idpoin,
                                           int cb_flag);
 
+/**
+ * Return the last subsurf modifier of an object, this does not check whether modifiers on top of
+ * it are disabled. Return NULL if no such modifier is found.
+ *
+ * This does not check if the modifier is enabled as it is assumed that the caller verified that it
+ * is enabled for its evaluation mode.
+ */
+struct SubsurfModifierData *BKE_object_get_last_subsurf_modifier(const struct Object *ob);
+
 void BKE_object_replace_data_on_shallow_copy(struct Object *ob, struct ID *new_data);
 
 struct PartEff;
diff --git a/source/blender/blenkernel/BKE_subdiv.h b/source/blender/blenkernel/BKE_subdiv.h
index 2fb27fad30d..169a4337f6a 100644
--- a/source/blender/blenkernel/BKE_subdiv.h
+++ b/source/blender/blenkernel/BKE_subdiv.h
@@ -188,7 +188,16 @@ typedef struct Subdiv {
   /* Cached values, are not supposed to be accessed directly. */
   struct {
     /* Indexed by base face index, element indicates total number of ptex
-     * faces created for preceding base faces. */
+     * faces created for preceding base faces. This also stores the final
+     * ptex offset (the total number of PTex faces) at the end of the array
+     * so that algorithms can compute the number of ptex faces for a given
+     * face by computing the delta with the offset for the next face without
+     * using a separate data structure, e.g.:
+     *
+     * const int num_face_ptex_faces = face_ptex_offset[i + 1] - face_ptex_offset[i];
+     *
+     * In total this array has a size of `num base faces + 1`.
+     */
     int *face_ptex_offset;
   } cache_;
 } Subdiv;
@@ -257,6 +266,9 @@ void BKE_subdiv_displacement_detach(Subdiv *subdiv);
 
 /* ============================ TOPOLOGY HELPERS ============================ */
 
+/* For each element in the array, this stores the total number of ptex faces up to that element,
+ * with the total number of ptex faces being the last element in the array. The array is of length
+ * `base face count + 1`. */
 int *BKE_subdiv_face_ptex_offset_get(Subdiv *subdiv);
 
 /* =========================== PTEX FACES AND GRIDS ========================= */
diff --git a/source/blender/blenkernel/BKE_subdiv_eval.h b/source/blender/blenkernel/BKE_subdiv_eval.h
index 0b61e62c89c..177d5f386a8 100644
--- a/source/blender/blenkernel/BKE_subdiv_eval.h
+++ b/source/blender/blenkernel/BKE_subdiv_eval.h
@@ -31,15 +31,25 @@ extern "C" {
 
 struct Mesh;
 struct Subdiv;
+struct OpenSubdiv_EvaluatorCache;
+
+typedef enum eSubdivEvaluatorType {
+  SUBDIV_EVALUATOR_TYPE_CPU,
+  SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE,
+} eSubdivEvaluatorType;
 
 /* Returns true if evaluator is ready for use. */
-bool BKE_subdiv_eval_begin(struct Subdiv *subdiv);
+bool BKE_subdiv_eval_begin(struct Subdiv *subdiv,
+                           eSubdivEvaluatorType evaluator_type,
+                           struct OpenSubdiv_EvaluatorCache *evaluator_cache);
 
 /* coarse_vertex_cos is an optional argument which allows to override coordinates of the coarse
  * mesh. */
 bool BKE_subdiv_eval_begin_from_mesh(struct Subdiv *subdiv,
                                      const struct Mesh *mesh,
-                                     const float (*coarse_vertex_cos)[3]);
+                                     const float (*coarse_vertex_cos)[3],
+                                     eSubdivEvaluatorType evaluator_type,
+                                     struct OpenSubdiv_EvaluatorCache *evaluator_cache);
 bool BKE_subdiv_eval_refine_from_mesh(struct Subdiv *subdiv,
                                       const struct Mesh *mesh,
                                       const float (*coarse_vertex_cos)[3]);
diff --git a/source/blender/blenkernel/BKE_subdiv_foreach.h b/source/blender/blenkernel/BKE_subdiv_foreach.h
index 3f74299455d..f63e23917ef 100644
--- a/source/blender/blenkernel/BKE_subdiv_foreach.h
+++ b/source/blender/blenkernel/BKE_subdiv_foreach.h
@@ -38,7 +38,8 @@ typedef bool (*SubdivForeachTopologyInformationCb)(const struct SubdivForeachCon
                                                    const int num_vertices,
                                                    const int num_edges,
                                                    const int num_loops,
-                                                   const int num_polygons);
+                                                   const int num_polygons,
+                                                   const int *subdiv_polygon_offset);
 
 typedef void (*SubdivForeachVertexFromCornerCb)(const struct SubdivForeachContext *context,
                                                 void *tls,
diff --git a/source/blender/blenkernel/BKE_subdiv_modifier.h b/source/blender/blenkernel/BKE_subdiv_modifier.h
new file mode 100644
index 00000000000..94068613101
--- /dev/null
+++ b/source/blender/blenkernel/BKE_subdiv_modifier.h
@@ -0,0 +1,71 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2021 by Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bke
+ */
+
+#pragma once
+
+#include "BLI_sys_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Mesh;
+struct Object;
+struct Scene;
+struct Subdiv;
+struct SubdivSettings;
+struct SubsurfModifierData;
+
+void BKE_subsurf_modifier_subdiv_settings_init(struct SubdivSettings *settings,
+                                               const struct SubsurfModifierData *smd,
+                                               const bool use_render_params);
+
+/* If skip_check_is_last is true, we assume that the modifier passed is the last enabled modifier
+ * in the stack. */
+bool BKE_subsurf_modifier_can_do_gpu_subdiv_ex(const struct Scene *scene,
+                                               const struct Object *ob,
+                                               const struct SubsurfModifierData *smd,
+                                               int required_mode,
+                                               bool skip_check_is_last);
+
+bool BKE_subsurf_modifier_can_do_gpu_subdiv(const struct Scene *scene,
+                                            const struct Object *ob,
+                                            const int required_mode);
+
+extern void (*BKE_subsurf_modifier_free_gpu_cache_cb)(struct Subdiv *subdiv);
+
+struct Subdiv *BKE_subsurf_modifier_subdiv_descriptor_ensure(
+    const struct SubsurfModifierData *smd,
+    const struct SubdivSettings *subdiv_settings,
+    const struct Mesh *mesh,
+    const bool for_draw_code);
+
+struct SubsurfRuntimeData *BKE_subsurf_modifier_ensure_runtime(struct SubsurfModifierData *smd);
+
+/* Return the #ModifierMode required for the evaluation of the subsurf modifier, which should be
+ * used to check if the modifier is enabled. */
+int BKE_subsurf_modifier_eval_required_mode(bool is_final_render, bool is_edit_mode);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt
index fe33abd17c0..3c780a933d3 100644
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@@ -275,6 +275,7 @@ set(SRC
   intern/subdiv_eval.c
   intern/subdiv_foreach.c
   intern/subdiv_mesh.c
+  intern/subdiv_modifier.c
   intern/subdiv_stats.c
   intern/subdiv_topology.c
   intern/subsurf_ccg.c
@@ -453,6 +454,7 @@ set(SRC
   BKE_subdiv_eval.h
   BKE_subdiv_foreach.h
   BKE_subdiv_mesh.h
+  BKE_subdiv_modifier.h
   BKE_subdiv_topology.h
   BKE_subsurf.h
   BKE_text.h
diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc
index da5b4ccc764..47ea55be871 100644
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@@ -319,6 +319,7 @@ void BKE_mesh_ensure_normals(Mesh *mesh)
 void BKE_mesh_ensure_normals_for_display(Mesh *mesh)
 {
   switch ((eMeshWrapperType)mesh->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       /* Run code below. */
       break;
diff --git a/source/blender/blenkernel/intern/mesh_wrapper.c b/source/blender/blenkernel/intern/mesh_wrapper.c
index bc1ffeb8cf4..5956f2802b5 100644
--- a/source/blender/blenkernel/intern/mesh_wrapper.c
+++ b/source/blender/blenkernel/intern/mesh_wrapper.c
@@ -36,6 +36,7 @@
 
 #include "DNA_mesh_types.h"
 #include "DNA_meshdata_types.h"
+#include "DNA_modifier_types.h"
 #include "DNA_object_types.h"
 
 #include "BLI_ghash.h"
@@ -50,8 +51,14 @@
 #include "BKE_mesh.h"
 #include "BKE_mesh_runtime.h"
 #include "BKE_mesh_wrapper.h"
+#include "BKE_modifier.h"
+#include "BKE_object.h"
+#include "BKE_subdiv.h"
+#include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
 
 #include "DEG_depsgraph.h"
+#include "DEG_depsgraph_query.h"
 
 Mesh *BKE_mesh_wrapper_from_editmesh_with_coords(BMEditMesh *em,
                                                  const CustomData_MeshMasks *cd_mask_extra,
@@ -106,7 +113,8 @@ static void mesh_wrapper_ensure_mdata_isolated(void *userdata)
   me->runtime.wrapper_type = ME_WRAPPER_TYPE_MDATA;
 
   switch (geom_type_orig) {
-    case ME_WRAPPER_TYPE_MDATA: {
+    case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD: {
       break; /* Quiet warning. */
     }
     case ME_WRAPPER_TYPE_BMESH: {
@@ -157,6 +165,7 @@ bool BKE_mesh_wrapper_minmax(const Mesh *me, float min[3], float max[3])
     case ME_WRAPPER_TYPE_BMESH:
       return BKE_editmesh_cache_calc_minmax(me->edit_mesh, me->runtime.edit_data, min, max);
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return BKE_mesh_minmax(me, min, max);
   }
   BLI_assert_unreachable();
@@ -191,7 +200,8 @@ void BKE_mesh_wrapper_vert_coords_copy(const Mesh *me,
       }
       return;
     }
-    case ME_WRAPPER_TYPE_MDATA: {
+    case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD: {
       BLI_assert(vert_coords_len <= me->totvert);
       const MVert *mvert = me->mvert;
       for (int i = 0; i < vert_coords_len; i++) {
@@ -228,7 +238,8 @@ void BKE_mesh_wrapper_vert_coords_copy_with_mat4(const Mesh *me,
       }
       return;
     }
-    case ME_WRAPPER_TYPE_MDATA: {
+    case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD: {
       BLI_assert(vert_coords_len == me->totvert);
       const MVert *mvert = me->mvert;
       for (int i = 0; i < vert_coords_len; i++) {
@@ -252,6 +263,7 @@ int BKE_mesh_wrapper_vert_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totvert;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totvert;
   }
   BLI_assert_unreachable();
@@ -264,6 +276,7 @@ int BKE_mesh_wrapper_edge_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totedge;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totedge;
   }
   BLI_assert_unreachable();
@@ -276,6 +289,7 @@ int BKE_mesh_wrapper_loop_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totloop;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totloop;
   }
   BLI_assert_unreachable();
@@ -288,6 +302,7 @@ int BKE_mesh_wrapper_poly_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totface;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totpoly;
   }
   BLI_assert_unreachable();
@@ -295,3 +310,67 @@ int BKE_mesh_wrapper_poly_len(const Mesh *me)
 }
 
 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name CPU Subdivision Evaluation
+ * \{ */
+
+Mesh *BKE_mesh_wrapper_ensure_subdivision(const Object *ob, Mesh *me)
+{
+  ThreadMutex *mesh_eval_mutex = (ThreadMutex *)me->runtime.eval_mutex;
+  BLI_mutex_lock(mesh_eval_mutex);
+
+  if (me->runtime.wrapper_type == ME_WRAPPER_TYPE_SUBD) {
+    BLI_mutex_unlock(mesh_eval_mutex);
+    return me->runtime.mesh_eval;
+  }
+
+  SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob);
+  if (!smd) {
+    BLI_mutex_unlock(mesh_eval_mutex);
+    return me;
+  }
+
+  const bool apply_render = me->runtime.subsurf_apply_render;
+
+  SubdivSettings subdiv_settings;
+  BKE_subsurf_modifier_subdiv_settings_init(&subdiv_settings, smd, apply_render);
+  if (subdiv_settings.level == 0) {
+    return me;
+  }
+
+  SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd);
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &subdiv_settings, me, false);
+  if (subdiv == NULL) {
+    /* Happens on bad topology, but also on empty input mesh. */
+    return me;
+  }
+
+  SubdivToMeshSettings mesh_settings;
+  mesh_settings.resolution = me->runtime.subsurf_resolution;
+  mesh_settings.use_optimal_display = me->runtime.subsurf_use_optimal_display;
+
+  if (mesh_settings.resolution < 3) {
+    return me;
+  }
+
+  Mesh *subdiv_mesh = BKE_subdiv_to_mesh(subdiv, &mesh_settings, me);
+
+  if (subdiv != runtime_data->subdiv) {
+    BKE_subdiv_free(subdiv);
+  }
+
+  if (subdiv_mesh != me) {
+    if (me->runtime.mesh_eval != NULL) {
+      BKE_id_free(NULL, me->runtime.mesh_eval);
+    }
+    me->runtime.mesh_eval = subdiv_mesh;
+    me->runtime.wrapper_type = ME_WRAPPER_TYPE_SUBD;
+  }
+
+  BLI_mutex_unlock(mesh_eval_mutex);
+  return me->runtime.mesh_eval;
+}
+
+/** \} */
diff --git a/source/blender/blenkernel/intern/modifier.c b/source/blender/blenkernel/intern/modifier.c
index e1d201d7806..f3b6c2544bf 100644
--- a/source/blender/blenkernel/intern/modifier.c
+++ b/source/blender/blenkernel/intern/modifier.c
@@ -970,6 +970,7 @@ static void modwrap_dependsOnNormals(Mesh *me)
       }
       break;
     }
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       BKE_mesh_calc_normals(me);
       break;
diff --git a/source/blender/blenkernel/intern/multires_reshape_smooth.c b/source/blender/blenkernel/intern/multires_reshape_smooth.c
index 3665d01926b..50b4410a28e 100644
--- a/source/blender/blenkernel/intern/multires_reshape_smooth.c
+++ b/source/blender/blenkernel/intern/multires_reshape_smooth.c
@@ -566,7 +566,8 @@ static bool foreach_topology_info(const SubdivForeachContext *foreach_context,
                                   const int num_vertices,
                                   const int num_edges,
                                   const int num_loops,
-                                  const int num_polygons)
+                                  const int num_polygons,
+                                  const int *UNUSED(subdiv_polygon_offset))
 {
   MultiresReshapeSmoothContext *reshape_smooth_context = foreach_context->user_data;
   const int max_edges = reshape_smooth_context->smoothing_type == MULTIRES_SUBDIVIDE_LINEAR ?
@@ -1037,7 +1038,7 @@ static void reshape_subdiv_create(MultiresReshapeSmoothContext *reshape_smooth_c
   converter_init(reshape_smooth_context, &converter);
 
   Subdiv *reshape_subdiv = BKE_subdiv_new_from_converter(settings, &converter);
-  BKE_subdiv_eval_begin(reshape_subdiv);
+  BKE_subdiv_eval_begin(reshape_subdiv, SUBDIV_EVALUATOR_TYPE_CPU, NULL);
 
   reshape_smooth_context->reshape_subdiv = reshape_subdiv;
 
diff --git a/source/blender/blenkernel/intern/multires_reshape_util.c b/source/blender/blenkernel/intern/multires_reshape_util.c
index b7572204182..07a5d7c4a61 100644
--- a/source/blender/blenkernel/intern/multires_reshape_util.c
+++ b/source/blender/blenkernel/intern/multires_reshape_util.c
@@ -65,7 +65,7 @@ Subdiv *multires_reshape_create_subdiv(Depsgraph *depsgraph,
   SubdivSettings subdiv_settings;
   BKE_multires_subdiv_settings_init(&subdiv_settings, mmd);
   Subdiv *subdiv = BKE_subdiv_new_from_mesh(&subdiv_settings, base_mesh);
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     BKE_subdiv_free(subdiv);
     return NULL;
   }
diff --git a/source/blender/blenkernel/intern/multires_reshape_vertcos.c b/source/blender/blenkernel/intern/multires_reshape_vertcos.c
index ed2df1ba8c5..c009349ff1b 100644
--- a/source/blender/blenkernel/intern/multires_reshape_vertcos.c
+++ b/source/blender/blenkernel/intern/multires_reshape_vertcos.c
@@ -114,7 +114,8 @@ static bool multires_reshape_vertcos_foreach_topology_info(
     const int num_vertices,
     const int UNUSED(num_edges),
     const int UNUSED(num_loops),
-    const int UNUSED(num_polygons))
+    const int UNUSED(num_polygons),
+    const int *UNUSED(subdiv_polygon_offset))
 {
   MultiresReshapeAssignVertcosContext *reshape_vertcos_context = foreach_context->user_data;
   if (num_vertices != reshape_vertcos_context->num_vert_coords) {
diff --git a/source/blender/blenkernel/intern/multires_versioning.c b/source/blender/blenkernel/intern/multires_versioning.c
index 4c0d7165cd0..18708c43f26 100644
--- a/source/blender/blenkernel/intern/multires_versioning.c
+++ b/source/blender/blenkernel/intern/multires_versioning.c
@@ -61,7 +61,7 @@ static Subdiv *subdiv_for_simple_to_catmull_clark(Object *object, MultiresModifi
   Subdiv *subdiv = BKE_subdiv_new_from_converter(&subdiv_settings, &converter);
   BKE_subdiv_converter_free(&converter);
 
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     BKE_subdiv_free(subdiv);
     return NULL;
   }
diff --git a/source/blender/blenkernel/intern/object.cc b/source/blender/blenkernel/intern/object.cc
index 6cc6219b7d7..d08ea74d2c6 100644
--- a/source/blender/blenkernel/intern/object.cc
+++ b/source/blender/blenkernel/intern/object.cc
@@ -1773,8 +1773,9 @@ static void object_update_from_subsurf_ccg(Object *object)
   if (!object->runtime.is_data_eval_owned) {
     return;
   }
-  /* Object was never evaluated, so can not have CCG subdivision surface. */
-  Mesh *mesh_eval = BKE_object_get_evaluated_mesh(object);
+  /* Object was never evaluated, so can not have CCG subdivision surface. If it were evaluated, do
+   * not try to compute OpenSubDiv on the CPU as it is not needed here. */
+  Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(object);
   if (mesh_eval == nullptr) {
     return;
   }
@@ -4496,7 +4497,7 @@ bool BKE_object_obdata_texspace_get(Object *ob, char **r_texflag, float **r_loc,
   return true;
 }
 
-Mesh *BKE_object_get_evaluated_mesh(const Object *object)
+Mesh *BKE_object_get_evaluated_mesh_no_subsurf(const Object *object)
 {
   /* First attempt to retrieve the evaluated mesh from the evaluated geometry set. Most
    * object types either store it there or add a reference to it if it's owned elsewhere. */
@@ -4523,6 +4524,20 @@ Mesh *BKE_object_get_evaluated_mesh(const Object *object)
   return nullptr;
 }
 
+Mesh *BKE_object_get_evaluated_mesh(const Object *object)
+{
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(object);
+  if (!mesh) {
+    return nullptr;
+  }
+
+  if (object->data && GS(((const ID *)object->data)->name) == ID_ME) {
+    mesh = BKE_mesh_wrapper_ensure_subdivision(object, mesh);
+  }
+
+  return mesh;
+}
+
 Mesh *BKE_object_get_pre_modified_mesh(const Object *object)
 {
   if (object->type == OB_MESH && object->runtime.data_orig != nullptr) {
@@ -5779,6 +5794,21 @@ void BKE_object_modifiers_lib_link_common(void *userData,
   }
 }
 
+SubsurfModifierData *BKE_object_get_last_subsurf_modifier(const Object *ob)
+{
+  ModifierData *md = (ModifierData *)(ob->modifiers.last);
+
+  while (md) {
+    if (md->type == eModifierType_Subsurf) {
+      break;
+    }
+
+    md = md->prev;
+  }
+
+  return (SubsurfModifierData *)(md);
+}
+
 void BKE_object_replace_data_on_shallow_copy(Object *ob, ID *new_data)
 {
   ob->type = BKE_object_obdata_to_type(new_data);
diff --git a/source/blender/blenkernel/intern/subdiv.c b/source/blender/blenkernel/intern/subdiv.c
index fd32f52351a..45810e29565 100644
--- a/source/blender/blenkernel/intern/subdiv.c
+++ b/source/blender/blenkernel/intern/subdiv.c
@@ -29,6 +29,9 @@
 
 #include "BLI_utildefines.h"
 
+#include "BKE_modifier.h"
+#include "BKE_subdiv_modifier.h"
+
 #include "MEM_guardedalloc.h"
 
 #include "subdiv_converter.h"
@@ -189,6 +192,12 @@ Subdiv *BKE_subdiv_update_from_mesh(Subdiv *subdiv,
 void BKE_subdiv_free(Subdiv *subdiv)
 {
   if (subdiv->evaluator != NULL) {
+    const eOpenSubdivEvaluator evaluator_type = subdiv->evaluator->type;
+    if (evaluator_type != OPENSUBDIV_EVALUATOR_CPU) {
+      /* Let the draw code do the freeing, to ensure that the OpenGL context is valid. */
+      BKE_subsurf_modifier_free_gpu_cache_cb(subdiv);
+      return;
+    }
     openSubdiv_deleteEvaluator(subdiv->evaluator);
   }
   if (subdiv->topology_refiner != NULL) {
@@ -214,12 +223,13 @@ int *BKE_subdiv_face_ptex_offset_get(Subdiv *subdiv)
   }
   const int num_coarse_faces = topology_refiner->getNumFaces(topology_refiner);
   subdiv->cache_.face_ptex_offset = MEM_malloc_arrayN(
-      num_coarse_faces, sizeof(int), "subdiv face_ptex_offset");
+      num_coarse_faces + 1, sizeof(int), "subdiv face_ptex_offset");
   int ptex_offset = 0;
   for (int face_index = 0; face_index < num_coarse_faces; face_index++) {
     const int num_ptex_faces = topology_refiner->getNumFacePtexFaces(topology_refiner, face_index);
     subdiv->cache_.face_ptex_offset[face_index] = ptex_offset;
     ptex_offset += num_ptex_faces;
   }
+  subdiv->cache_.face_ptex_offset[num_coarse_faces] = ptex_offset;
   return subdiv->cache_.face_ptex_offset;
 }
diff --git a/source/blender/blenkernel/intern/subdiv_ccg.c b/source/blender/blenkernel/intern/subdiv_ccg.c
index 77962ec924c..7d876acf776 100644
--- a/source/blender/blenkernel/intern/subdiv_ccg.c
+++ b/source/blender/blenkernel/intern/subdiv_ccg.c
@@ -603,7 +603,8 @@ Mesh *BKE_subdiv_to_ccg_mesh(Subdiv *subdiv,
 {
   /* Make sure evaluator is ready. */
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_CCG);
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, coarse_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     if (coarse_mesh->totpoly) {
       return NULL;
     }
diff --git a/source/blender/blenkernel/intern/subdiv_deform.c b/source/blender/blenkernel/intern/subdiv_deform.c
index 7a2d639e4e5..c385b1b291d 100644
--- a/source/blender/blenkernel/intern/subdiv_deform.c
+++ b/source/blender/blenkernel/intern/subdiv_deform.c
@@ -117,7 +117,8 @@ static bool subdiv_mesh_topology_info(const SubdivForeachContext *foreach_contex
                                       const int UNUSED(num_vertices),
                                       const int UNUSED(num_edges),
                                       const int UNUSED(num_loops),
-                                      const int UNUSED(num_polygons))
+                                      const int UNUSED(num_polygons),
+                                      const int *UNUSED(subdiv_polygon_offset))
 {
   SubdivDeformContext *subdiv_context = foreach_context->user_data;
   subdiv_mesh_prepare_accumulator(subdiv_context, subdiv_context->coarse_mesh->totvert);
@@ -202,7 +203,8 @@ void BKE_subdiv_deform_coarse_vertices(struct Subdiv *subdiv,
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_MESH);
   /* Make sure evaluator is up to date with possible new topology, and that
    * is refined for the new positions of coarse vertices. */
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, vertex_cos)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, coarse_mesh, vertex_cos, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     /* This could happen in two situations:
      * - OpenSubdiv is disabled.
      * - Something totally bad happened, and OpenSubdiv rejected our
diff --git a/source/blender/blenkernel/intern/subdiv_eval.c b/source/blender/blenkernel/intern/subdiv_eval.c
index 0001eb8a205..9733a1498a6 100644
--- a/source/blender/blenkernel/intern/subdiv_eval.c
+++ b/source/blender/blenkernel/intern/subdiv_eval.c
@@ -28,6 +28,7 @@
 
 #include "BLI_bitmap.h"
 #include "BLI_math_vector.h"
+#include "BLI_task.h"
 #include "BLI_utildefines.h"
 
 #include "BKE_customdata.h"
@@ -38,7 +39,28 @@
 #include "opensubdiv_evaluator_capi.h"
 #include "opensubdiv_topology_refiner_capi.h"
 
-bool BKE_subdiv_eval_begin(Subdiv *subdiv)
+/* ============================  Helper Function ============================ */
+
+static eOpenSubdivEvaluator opensubdiv_evalutor_from_subdiv_evaluator_type(
+    eSubdivEvaluatorType evaluator_type)
+{
+  switch (evaluator_type) {
+    case SUBDIV_EVALUATOR_TYPE_CPU: {
+      return OPENSUBDIV_EVALUATOR_CPU;
+    }
+    case SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE: {
+      return OPENSUBDIV_EVALUATOR_GLSL_COMPUTE;
+    }
+  }
+  BLI_assert_msg(0, "Unknown evaluator type");
+  return OPENSUBDIV_EVALUATOR_CPU;
+}
+
+/* ======================  Main Subdivision Evaluation ====================== */
+
+bool BKE_subdiv_eval_begin(Subdiv *subdiv,
+                           eSubdivEvaluatorType evaluator_type,
+                           OpenSubdiv_EvaluatorCache *evaluator_cache)
 {
   BKE_subdiv_stats_reset(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE);
   if (subdiv->topology_refiner == NULL) {
@@ -47,8 +69,11 @@ bool BKE_subdiv_eval_begin(Subdiv *subdiv)
     return false;
   }
   if (subdiv->evaluator == NULL) {
+    eOpenSubdivEvaluator opensubdiv_evaluator_type =
+        opensubdiv_evalutor_from_subdiv_evaluator_type(evaluator_type);
     BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE);
-    subdiv->evaluator = openSubdiv_createEvaluatorFromTopologyRefiner(subdiv->topology_refiner);
+    subdiv->evaluator = openSubdiv_createEvaluatorFromTopologyRefiner(
+        subdiv->topology_refiner, opensubdiv_evaluator_type, evaluator_cache);
     BKE_subdiv_stats_end(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE);
     if (subdiv->evaluator == NULL) {
       return false;
@@ -80,6 +105,9 @@ static void set_coarse_positions(Subdiv *subdiv,
       BLI_BITMAP_ENABLE(vertex_used_map, loop->v);
     }
   }
+  /* Use a temporary buffer so we do not upload vertices one at a time to the GPU. */
+  float(*buffer)[3] = MEM_mallocN(sizeof(float[3]) * mesh->totvert, "subdiv tmp coarse positions");
+  int manifold_vertex_count = 0;
   for (int vertex_index = 0, manifold_vertex_index = 0; vertex_index < mesh->totvert;
        vertex_index++) {
     if (!BLI_BITMAP_TEST_BOOL(vertex_used_map, vertex_index)) {
@@ -93,13 +121,49 @@ static void set_coarse_positions(Subdiv *subdiv,
       const MVert *vertex = &mvert[vertex_index];
       vertex_co = vertex->co;
     }
-    subdiv->evaluator->setCoarsePositions(subdiv->evaluator, vertex_co, manifold_vertex_index, 1);
+    copy_v3_v3(&buffer[manifold_vertex_index][0], vertex_co);
     manifold_vertex_index++;
+    manifold_vertex_count++;
   }
+  subdiv->evaluator->setCoarsePositions(
+      subdiv->evaluator, &buffer[0][0], 0, manifold_vertex_count);
   MEM_freeN(vertex_used_map);
+  MEM_freeN(buffer);
+}
+
+/* Context which is used to fill face varying data in parallel. */
+typedef struct FaceVaryingDataFromUVContext {
+  OpenSubdiv_TopologyRefiner *topology_refiner;
+  const Mesh *mesh;
+  const MLoopUV *mloopuv;
+  float (*buffer)[2];
+  int layer_index;
+} FaceVaryingDataFromUVContext;
+
+static void set_face_varying_data_from_uv_task(void *__restrict userdata,
+                                               const int face_index,
+                                               const TaskParallelTLS *__restrict UNUSED(tls))
+{
+  FaceVaryingDataFromUVContext *ctx = userdata;
+  OpenSubdiv_TopologyRefiner *topology_refiner = ctx->topology_refiner;
+  const int layer_index = ctx->layer_index;
+  const Mesh *mesh = ctx->mesh;
+  const MPoly *mpoly = &mesh->mpoly[face_index];
+  const MLoopUV *mluv = &ctx->mloopuv[mpoly->loopstart];
+
+  /* TODO(sergey): OpenSubdiv's C-API converter can change winding of
+   * loops of a face, need to watch for that, to prevent wrong UVs assigned.
+   */
+  const int num_face_vertices = topology_refiner->getNumFaceVertices(topology_refiner, face_index);
+  const int *uv_indices = topology_refiner->getFaceFVarValueIndices(
+      topology_refiner, face_index, layer_index);
+  for (int vertex_index = 0; vertex_index < num_face_vertices; vertex_index++, mluv++) {
+    copy_v2_v2(ctx->buffer[uv_indices[vertex_index]], mluv->uv);
+  }
 }
 
 static void set_face_varying_data_from_uv(Subdiv *subdiv,
+                                          const Mesh *mesh,
                                           const MLoopUV *mloopuv,
                                           const int layer_index)
 {
@@ -107,25 +171,37 @@ static void set_face_varying_data_from_uv(Subdiv *subdiv,
   OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
   const int num_faces = topology_refiner->getNumFaces(topology_refiner);
   const MLoopUV *mluv = mloopuv;
-  /* TODO(sergey): OpenSubdiv's C-API converter can change winding of
-   * loops of a face, need to watch for that, to prevent wrong UVs assigned.
-   */
-  for (int face_index = 0; face_index < num_faces; face_index++) {
-    const int num_face_vertices = topology_refiner->getNumFaceVertices(topology_refiner,
-                                                                       face_index);
-    const int *uv_indices = topology_refiner->getFaceFVarValueIndices(
-        topology_refiner, face_index, layer_index);
-    for (int vertex_index = 0; vertex_index < num_face_vertices; vertex_index++, mluv++) {
-      evaluator->setFaceVaryingData(evaluator, layer_index, mluv->uv, uv_indices[vertex_index], 1);
-    }
-  }
+
+  const int num_fvar_values = topology_refiner->getNumFVarValues(topology_refiner, layer_index);
+  /* Use a temporary buffer so we do not upload UVs one at a time to the GPU. */
+  float(*buffer)[2] = MEM_mallocN(sizeof(float[2]) * num_fvar_values, "temp UV storage");
+
+  FaceVaryingDataFromUVContext ctx;
+  ctx.topology_refiner = topology_refiner;
+  ctx.layer_index = layer_index;
+  ctx.mloopuv = mluv;
+  ctx.mesh = mesh;
+  ctx.buffer = buffer;
+
+  TaskParallelSettings parallel_range_settings;
+  BLI_parallel_range_settings_defaults(&parallel_range_settings);
+  parallel_range_settings.min_iter_per_thread = 1;
+
+  BLI_task_parallel_range(
+      0, num_faces, &ctx, set_face_varying_data_from_uv_task, &parallel_range_settings);
+
+  evaluator->setFaceVaryingData(evaluator, layer_index, &buffer[0][0], 0, num_fvar_values);
+
+  MEM_freeN(buffer);
 }
 
 bool BKE_subdiv_eval_begin_from_mesh(Subdiv *subdiv,
                                      const Mesh *mesh,
-                                     const float (*coarse_vertex_cos)[3])
+                                     const float (*coarse_vertex_cos)[3],
+                                     eSubdivEvaluatorType evaluator_type,
+                                     OpenSubdiv_EvaluatorCache *evaluator_cache)
 {
-  if (!BKE_subdiv_eval_begin(subdiv)) {
+  if (!BKE_subdiv_eval_begin(subdiv, evaluator_type, evaluator_cache)) {
     return false;
   }
   return BKE_subdiv_eval_refine_from_mesh(subdiv, mesh, coarse_vertex_cos);
@@ -146,7 +222,7 @@ bool BKE_subdiv_eval_refine_from_mesh(Subdiv *subdiv,
   const int num_uv_layers = CustomData_number_of_layers(&mesh->ldata, CD_MLOOPUV);
   for (int layer_index = 0; layer_index < num_uv_layers; layer_index++) {
     const MLoopUV *mloopuv = CustomData_get_layer_n(&mesh->ldata, CD_MLOOPUV, layer_index);
-    set_face_varying_data_from_uv(subdiv, mloopuv, layer_index);
+    set_face_varying_data_from_uv(subdiv, mesh, mloopuv, layer_index);
   }
   /* Update evaluator to the new coarse geometry. */
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_EVALUATOR_REFINE);
diff --git a/source/blender/blenkernel/intern/subdiv_foreach.c b/source/blender/blenkernel/intern/subdiv_foreach.c
index 061c196df2a..69bead27fe6 100644
--- a/source/blender/blenkernel/intern/subdiv_foreach.c
+++ b/source/blender/blenkernel/intern/subdiv_foreach.c
@@ -1877,7 +1877,8 @@ bool BKE_subdiv_foreach_subdiv_geometry(Subdiv *subdiv,
                                 ctx.num_subdiv_vertices,
                                 ctx.num_subdiv_edges,
                                 ctx.num_subdiv_loops,
-                                ctx.num_subdiv_polygons)) {
+                                ctx.num_subdiv_polygons,
+                                ctx.subdiv_polygon_offset)) {
       subdiv_foreach_ctx_free(&ctx);
       return false;
     }
diff --git a/source/blender/blenkernel/intern/subdiv_mesh.c b/source/blender/blenkernel/intern/subdiv_mesh.c
index e5c7d13edab..1f31d0543ad 100644
--- a/source/blender/blenkernel/intern/subdiv_mesh.c
+++ b/source/blender/blenkernel/intern/subdiv_mesh.c
@@ -514,7 +514,8 @@ static bool subdiv_mesh_topology_info(const SubdivForeachContext *foreach_contex
                                       const int num_vertices,
                                       const int num_edges,
                                       const int num_loops,
-                                      const int num_polygons)
+                                      const int num_polygons,
+                                      const int *UNUSED(subdiv_polygon_offset))
 {
   /* Multires grid data will be applied or become invalid after subdivision,
    * so don't try to preserve it and use memory. */
@@ -1193,7 +1194,8 @@ Mesh *BKE_subdiv_to_mesh(Subdiv *subdiv,
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_MESH);
   /* Make sure evaluator is up to date with possible new topology, and that
    * it is refined for the new positions of coarse vertices. */
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, coarse_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     /* This could happen in two situations:
      * - OpenSubdiv is disabled.
      * - Something totally bad happened, and OpenSubdiv rejected our
diff --git a/source/blender/blenkernel/intern/subdiv_modifier.c b/source/blender/blenkernel/intern/subdiv_modifier.c
new file mode 100644
index 00000000000..bafcb631f59
--- /dev/null
+++ b/source/blender/blenkernel/intern/subdiv_modifier.c
@@ -0,0 +1,162 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2021 by Blender Foundation.
+ * All rights reserved.
+ */
+
+#include "BKE_subdiv_modifier.h"
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_mesh_types.h"
+#include "DNA_modifier_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+#include "DNA_userdef_types.h"
+
+#include "BKE_modifier.h"
+#include "BKE_subdiv.h"
+
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
+
+#include "opensubdiv_capi.h"
+
+void BKE_subsurf_modifier_subdiv_settings_init(SubdivSettings *settings,
+                                               const SubsurfModifierData *smd,
+                                               const bool use_render_params)
+{
+  const int requested_levels = (use_render_params) ? smd->renderLevels : smd->levels;
+
+  settings->is_simple = (smd->subdivType == SUBSURF_TYPE_SIMPLE);
+  settings->is_adaptive = !(smd->flags & eSubsurfModifierFlag_UseRecursiveSubdivision);
+  settings->level = settings->is_simple ?
+                        1 :
+                        (settings->is_adaptive ? smd->quality : requested_levels);
+  settings->use_creases = (smd->flags & eSubsurfModifierFlag_UseCrease);
+  settings->vtx_boundary_interpolation = BKE_subdiv_vtx_boundary_interpolation_from_subsurf(
+      smd->boundary_smooth);
+  settings->fvar_linear_interpolation = BKE_subdiv_fvar_interpolation_from_uv_smooth(
+      smd->uv_smooth);
+}
+
+static ModifierData *modifier_get_last_enabled_for_mode(const Scene *scene,
+                                                        const Object *ob,
+                                                        int required_mode)
+{
+  ModifierData *md = ob->modifiers.last;
+
+  while (md) {
+    if (BKE_modifier_is_enabled(scene, md, required_mode)) {
+      break;
+    }
+
+    md = md->prev;
+  }
+
+  return md;
+}
+
+bool BKE_subsurf_modifier_can_do_gpu_subdiv_ex(const Scene *scene,
+                                               const Object *ob,
+                                               const SubsurfModifierData *smd,
+                                               int required_mode,
+                                               bool skip_check_is_last)
+{
+  if ((U.gpu_flag & USER_GPU_FLAG_SUBDIVISION_EVALUATION) == 0) {
+    return false;
+  }
+
+  if (!skip_check_is_last) {
+    ModifierData *md = modifier_get_last_enabled_for_mode(scene, ob, required_mode);
+    if (md != (const ModifierData *)smd) {
+      return false;
+    }
+  }
+
+  /* Only OpenGL is supported for OpenSubdiv evaluation for now. */
+  if (GPU_backend_get_type() != GPU_BACKEND_OPENGL) {
+    return false;
+  }
+
+  if (!GPU_compute_shader_support()) {
+    return false;
+  }
+
+  const int available_evaluators = openSubdiv_getAvailableEvaluators();
+  if ((available_evaluators & OPENSUBDIV_EVALUATOR_GLSL_COMPUTE) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+bool BKE_subsurf_modifier_can_do_gpu_subdiv(const Scene *scene,
+                                            const Object *ob,
+                                            int required_mode)
+{
+  ModifierData *md = modifier_get_last_enabled_for_mode(scene, ob, required_mode);
+
+  if (!md) {
+    return false;
+  }
+
+  if (md->type != eModifierType_Subsurf) {
+    return false;
+  }
+
+  return BKE_subsurf_modifier_can_do_gpu_subdiv_ex(
+      scene, ob, (SubsurfModifierData *)md, required_mode, true);
+}
+
+void (*BKE_subsurf_modifier_free_gpu_cache_cb)(Subdiv *subdiv) = NULL;
+
+/* Main goal of this function is to give usable subdivision surface descriptor
+ * which matches settings and topology. */
+Subdiv *BKE_subsurf_modifier_subdiv_descriptor_ensure(const SubsurfModifierData *smd,
+                                                      const SubdivSettings *subdiv_settings,
+                                                      const Mesh *mesh,
+                                                      const bool for_draw_code)
+{
+  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
+  if (runtime_data->subdiv && runtime_data->set_by_draw_code != for_draw_code) {
+    BKE_subdiv_free(runtime_data->subdiv);
+    runtime_data->subdiv = NULL;
+  }
+  Subdiv *subdiv = BKE_subdiv_update_from_mesh(runtime_data->subdiv, subdiv_settings, mesh);
+  runtime_data->subdiv = subdiv;
+  runtime_data->set_by_draw_code = for_draw_code;
+  return subdiv;
+}
+
+SubsurfRuntimeData *BKE_subsurf_modifier_ensure_runtime(SubsurfModifierData *smd)
+{
+  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
+  if (runtime_data == NULL) {
+    runtime_data = MEM_callocN(sizeof(*runtime_data), "subsurf runtime");
+    smd->modifier.runtime = runtime_data;
+  }
+  return runtime_data;
+}
+
+int BKE_subsurf_modifier_eval_required_mode(bool is_final_render, bool is_edit_mode)
+{
+  if (is_final_render) {
+    return eModifierMode_Render;
+  }
+
+  return eModifierMode_Realtime | (is_edit_mode ? eModifierMode_Editmode : 0);
+}
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 821b6025fff..eea3adc440a 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -44,9 +44,11 @@ set(INC
   ../../../intern/atomic
   ../../../intern/glew-mx
   ../../../intern/guardedalloc
+  ../../../intern/opensubdiv
 
   # dna_type_offsets.h
   ${CMAKE_CURRENT_BINARY_DIR}/../makesdna/intern
+  ${OPENSUBDIV_INCLUDE_DIRS}
 )
 
 set(SRC
@@ -91,6 +93,7 @@ set(SRC
   intern/draw_cache_impl_metaball.c
   intern/draw_cache_impl_particles.c
   intern/draw_cache_impl_pointcloud.c
+  intern/draw_cache_impl_subdivision.cc
   intern/draw_cache_impl_volume.c
   intern/draw_color_management.cc
   intern/draw_common.c
@@ -209,6 +212,7 @@ set(SRC
   intern/draw_manager_testing.h
   intern/draw_manager_text.h
   intern/draw_shader.h
+  intern/draw_subdivision.h
   intern/draw_texture_pool.h
   intern/draw_view.h
   intern/draw_view_data.h
@@ -372,6 +376,18 @@ data_to_c_simple(intern/shaders/common_view_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_fxaa_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_smaa_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_fullscreen_vert.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_custom_data_interp_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_ibo_lines_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_ibo_tris_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_lib.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_normals_accumulate_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_normals_finalize_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_patch_evaluation_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_lnor_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl SRC)
 
 data_to_c_simple(engines/gpencil/shaders/gpencil_frag.glsl SRC)
 data_to_c_simple(engines/gpencil/shaders/gpencil_vert.glsl SRC)
diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h
index 98e166ac3a7..132f66ecb1e 100644
--- a/source/blender/draw/DRW_engine.h
+++ b/source/blender/draw/DRW_engine.h
@@ -191,6 +191,10 @@ void DRW_xr_drawing_end(void);
 
 /* For garbage collection */
 void DRW_cache_free_old_batches(struct Main *bmain);
+void DRW_cache_free_old_subdiv(void);
+
+/* For the OpenGL evaluators and garbage collected subdivision data. */
+void DRW_subdiv_free(void);
 
 /* Never use this. Only for closing blender. */
 void DRW_opengl_context_enable_ex(bool restore);
diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c
index 2345a110134..a754e81b949 100644
--- a/source/blender/draw/engines/overlay/overlay_armature.c
+++ b/source/blender/draw/engines/overlay/overlay_armature.c
@@ -589,7 +589,7 @@ static void drw_shgroup_bone_custom_wire(ArmatureDrawContext *ctx,
                                          Object *custom)
 {
   /* See comments in #drw_shgroup_bone_custom_solid. */
-  Mesh *mesh = BKE_object_get_evaluated_mesh(custom);
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(custom);
   if (mesh == NULL) {
     return;
   }
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 03fb3b92277..1110658e3b2 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -923,7 +923,7 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob)
 
 GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob)
 {
-  Mesh *me = BKE_object_get_evaluated_mesh(ob);
+  Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   short type = (me != NULL) ? OB_MESH : ob->type;
 
   switch (type) {
@@ -950,7 +950,7 @@ int DRW_cache_object_material_count_get(struct Object *ob)
 {
   short type = ob->type;
 
-  Mesh *me = BKE_object_get_evaluated_mesh(ob);
+  Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (me != NULL && type != OB_POINTCLOUD) {
     /* Some object types can have one data type in ob->data, but will be rendered as mesh.
      * For point clouds this never happens. Ideally this check would happen at another level
@@ -3021,7 +3021,7 @@ GPUBatch *DRW_cache_surf_surface_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_surface(mesh_eval);
   }
@@ -3034,7 +3034,7 @@ GPUBatch *DRW_cache_surf_edge_wire_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
@@ -3047,7 +3047,7 @@ GPUBatch *DRW_cache_surf_face_wireframe_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_wireframes_face(mesh_eval);
   }
@@ -3059,7 +3059,7 @@ GPUBatch *DRW_cache_surf_edge_detection_get(Object *ob, bool *r_is_manifold)
 {
   BLI_assert(ob->type == OB_SURF);
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_edge_detection(mesh_eval, r_is_manifold);
   }
@@ -3072,7 +3072,7 @@ GPUBatch *DRW_cache_surf_loose_edges_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
@@ -3089,7 +3089,7 @@ GPUBatch **DRW_cache_surf_surface_shaded_get(Object *ob,
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_surface_shaded(mesh_eval, gpumat_array, gpumat_array_len);
   }
@@ -3382,7 +3382,7 @@ GPUBatch *DRW_cache_cursor_get(bool crosshair_lines)
 
 void drw_batch_cache_validate(Object *ob)
 {
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   switch (ob->type) {
     case OB_MESH:
       DRW_mesh_batch_cache_validate((Mesh *)ob->data);
@@ -3431,7 +3431,7 @@ void drw_batch_cache_generate_requested(Object *ob)
                            DRW_object_use_hide_faces(ob)) ||
                           ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob))));
 
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   switch (ob->type) {
     case OB_MESH:
       DRW_mesh_batch_cache_create_requested(
@@ -3470,7 +3470,7 @@ void drw_batch_cache_generate_requested_evaluated_mesh(Object *ob)
                            DRW_object_use_hide_faces(ob)) ||
                           ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob))));
 
-  Mesh *mesh = BKE_object_get_evaluated_mesh(ob);
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   DRW_mesh_batch_cache_create_requested(DST.task_graph, ob, mesh, scene, is_paint_mode, use_hide);
 }
 
@@ -3481,7 +3481,7 @@ void drw_batch_cache_generate_requested_delayed(Object *ob)
 
 void DRW_batch_cache_free_old(Object *ob, int ctime)
 {
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
 
   switch (ob->type) {
     case OB_MESH:
diff --git a/source/blender/draw/intern/draw_cache_extract.h b/source/blender/draw/intern/draw_cache_extract.h
index ba42cdf66e7..6de9788b434 100644
--- a/source/blender/draw/intern/draw_cache_extract.h
+++ b/source/blender/draw/intern/draw_cache_extract.h
@@ -22,6 +22,7 @@
 
 #pragma once
 
+struct DRWSubdivCache;
 struct TaskGraph;
 
 #include "DNA_customdata_types.h"
@@ -244,6 +245,13 @@ typedef enum DRWBatchFlag {
 
 BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of bit fields");
 
+typedef struct MeshExtractLooseGeom {
+  int edge_len;
+  int vert_len;
+  int *verts;
+  int *edges;
+} MeshExtractLooseGeom;
+
 /**
  * Data that are kept around between extractions to reduce rebuilding time.
  *
@@ -252,12 +260,7 @@ BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of b
 typedef struct MeshBufferCache {
   MeshBufferList buff;
 
-  struct {
-    int edge_len;
-    int vert_len;
-    int *verts;
-    int *edges;
-  } loose_geom;
+  MeshExtractLooseGeom loose_geom;
 
   struct {
     int *tri_first_index;
@@ -283,6 +286,8 @@ typedef struct MeshBatchCache {
 
   GPUBatch **surface_per_mat;
 
+  struct DRWSubdivCache *subdiv_cache;
+
   DRWBatchFlag batch_requested; /* DRWBatchFlag */
   DRWBatchFlag batch_ready;     /* DRWBatchFlag */
 
@@ -332,9 +337,14 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
                                         const bool do_uvedit,
                                         const bool use_subsurf_fdots,
                                         const Scene *scene,
-                                        const ToolSettings *ts,
+                                        const struct ToolSettings *ts,
                                         const bool use_hide);
 
+void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                               MeshBufferCache *mbc,
+                                               struct DRWSubdivCache *subdiv_cache,
+                                               const struct ToolSettings *ts);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc
index 485b803310c..383a3b05b67 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@@ -42,6 +42,7 @@
 
 #include "draw_cache_extract.h"
 #include "draw_cache_inline.h"
+#include "draw_subdivision.h"
 
 #include "mesh_extractors/extract_mesh.h"
 
@@ -783,6 +784,99 @@ static void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
 
 /** \} */
 
+/* ---------------------------------------------------------------------- */
+/** \name Subdivision Extract Loop
+ * \{ */
+
+static void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                                      MeshBufferCache *mbc,
+                                                      DRWSubdivCache *subdiv_cache,
+                                                      const ToolSettings *ts)
+{
+  /* Create an array containing all the extractors that needs to be executed. */
+  ExtractorRunDatas extractors;
+
+  MeshBufferList *mbuflist = &mbc->buff;
+
+#define EXTRACT_ADD_REQUESTED(type, name) \
+  do { \
+    if (DRW_##type##_requested(mbuflist->type.name)) { \
+      const MeshExtract *extractor = &extract_##name; \
+      extractors.append(extractor); \
+    } \
+  } while (0)
+
+  /* The order in which extractors are added to the list matters somewhat, as some buffers are
+   * reused when building others. */
+  EXTRACT_ADD_REQUESTED(ibo, tris);
+  EXTRACT_ADD_REQUESTED(vbo, pos_nor);
+  EXTRACT_ADD_REQUESTED(vbo, lnor);
+  for (int i = 0; i < GPU_MAX_ATTR; i++) {
+    EXTRACT_ADD_REQUESTED(vbo, attr[i]);
+  }
+
+  /* We use only one extractor for face dots, as the work is done in a single compute shader. */
+  if (DRW_vbo_requested(mbuflist->vbo.fdots_nor) || DRW_vbo_requested(mbuflist->vbo.fdots_pos) ||
+      DRW_ibo_requested(mbuflist->ibo.fdots)) {
+    extractors.append(&extract_fdots_pos);
+  }
+
+  EXTRACT_ADD_REQUESTED(ibo, lines);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_points);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_tris);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_lines);
+  EXTRACT_ADD_REQUESTED(vbo, vert_idx);
+  EXTRACT_ADD_REQUESTED(vbo, edge_idx);
+  EXTRACT_ADD_REQUESTED(vbo, poly_idx);
+  EXTRACT_ADD_REQUESTED(vbo, edge_fac);
+  EXTRACT_ADD_REQUESTED(ibo, points);
+  EXTRACT_ADD_REQUESTED(vbo, edit_data);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_data);
+  /* Make sure UVs are computed before edituv stuffs. */
+  EXTRACT_ADD_REQUESTED(vbo, uv);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_area);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle);
+  EXTRACT_ADD_REQUESTED(ibo, lines_adjacency);
+  EXTRACT_ADD_REQUESTED(vbo, vcol);
+  EXTRACT_ADD_REQUESTED(vbo, weights);
+  EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
+
+#undef EXTRACT_ADD_REQUESTED
+
+  if (extractors.is_empty()) {
+    return;
+  }
+
+  MeshRenderData mr;
+  draw_subdiv_init_mesh_render_data(subdiv_cache, &mr, ts);
+  mesh_render_data_update_loose_geom(&mr, mbc, MR_ITER_LEDGE | MR_ITER_LVERT, MR_DATA_LOOSE_GEOM);
+
+  void *data_stack = MEM_mallocN(extractors.data_size_total(), __func__);
+  uint32_t data_offset = 0;
+  for (const ExtractorRunData &run_data : extractors) {
+    const MeshExtract *extractor = run_data.extractor;
+    void *buffer = mesh_extract_buffer_get(extractor, mbuflist);
+    void *data = POINTER_OFFSET(data_stack, data_offset);
+
+    extractor->init_subdiv(subdiv_cache, &mr, cache, buffer, data);
+
+    if (extractor->iter_subdiv) {
+      extractor->iter_subdiv(subdiv_cache, &mr, data);
+    }
+
+    if (extractor->iter_loose_geom_subdiv) {
+      extractor->iter_loose_geom_subdiv(subdiv_cache, &mr, &mbc->loose_geom, buffer, data);
+    }
+
+    if (extractor->finish_subdiv) {
+      extractor->finish_subdiv(subdiv_cache, buffer, data);
+    }
+  }
+  MEM_freeN(data_stack);
+}
+
+/** \} */
+
 }  // namespace blender::draw
 
 extern "C" {
@@ -818,4 +912,12 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
                                                     use_hide);
 }
 
+void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                               MeshBufferCache *mbc,
+                                               DRWSubdivCache *subdiv_cache,
+                                               const ToolSettings *ts)
+{
+  blender::draw::mesh_buffer_cache_create_requested_subdiv(cache, mbc, subdiv_cache, ts);
+}
+
 }  // extern "C"
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c
index 82b3b5aee41..1e5ffc14911 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@@ -54,6 +54,7 @@
 #include "BKE_object_deform.h"
 #include "BKE_paint.h"
 #include "BKE_pbvh.h"
+#include "BKE_subdiv_modifier.h"
 
 #include "atomic_ops.h"
 
@@ -69,6 +70,7 @@
 
 #include "draw_cache_extract.h"
 #include "draw_cache_inline.h"
+#include "draw_subdivision.h"
 
 #include "draw_cache_impl.h" /* own include */
 
@@ -380,6 +382,7 @@ static void drw_mesh_attributes_add_request(DRW_MeshAttributes *attrs,
 BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->ldata;
       break;
@@ -395,6 +398,7 @@ BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->pdata;
       break;
@@ -410,6 +414,7 @@ BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->edata;
       break;
@@ -425,6 +430,7 @@ BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->vdata;
       break;
@@ -1037,6 +1043,15 @@ static void mesh_buffer_cache_clear(MeshBufferCache *mbc)
   mbc->poly_sorted.visible_tri_len = 0;
 }
 
+static void mesh_batch_cache_free_subdiv_cache(MeshBatchCache *cache)
+{
+  if (cache->subdiv_cache) {
+    draw_subdiv_cache_free(cache->subdiv_cache);
+    MEM_freeN(cache->subdiv_cache);
+    cache->subdiv_cache = NULL;
+  }
+}
+
 static void mesh_batch_cache_clear(Mesh *me)
 {
   MeshBatchCache *cache = me->runtime.batch_cache;
@@ -1064,6 +1079,8 @@ static void mesh_batch_cache_clear(Mesh *me)
 
   cache->batch_ready = 0;
   drw_mesh_weight_state_clear(&cache->weight_state);
+
+  mesh_batch_cache_free_subdiv_cache(cache);
 }
 
 void DRW_mesh_batch_cache_free(Mesh *me)
@@ -1693,6 +1710,10 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
 
   const bool do_uvcage = is_editmode && !me->edit_mesh->mesh_eval_final->runtime.is_original;
 
+  const int required_mode = BKE_subsurf_modifier_eval_required_mode(DRW_state_is_scene_render(),
+                                                                    is_editmode);
+  const bool do_subdivision = BKE_subsurf_modifier_can_do_gpu_subdiv(scene, ob, required_mode);
+
   MeshBufferList *mbuflist = &cache->final.buff;
 
   /* Initialize batches and request VBO's & IBO's. */
@@ -2038,6 +2059,15 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                                        true);
   }
 
+  if (do_subdivision) {
+    DRW_create_subdivision(scene, ob, me, cache, &cache->final, ts);
+  }
+  else {
+    /* The subsurf modifier may have been recently removed, or another modifier was added after it,
+     * so free any potential subdivision cache as it is not needed anymore. */
+    mesh_batch_cache_free_subdiv_cache(cache);
+  }
+
   mesh_buffer_cache_create_requested(task_graph,
                                      cache,
                                      &cache->final,
diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
new file mode 100644
index 00000000000..5533130212e
--- /dev/null
+++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
@@ -0,0 +1,1932 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#include "draw_subdivision.h"
+
+#include "DNA_mesh_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+
+#include "BKE_editmesh.h"
+#include "BKE_modifier.h"
+#include "BKE_object.h"
+#include "BKE_scene.h"
+#include "BKE_subdiv.h"
+#include "BKE_subdiv_eval.h"
+#include "BKE_subdiv_foreach.h"
+#include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
+
+#include "BLI_linklist.h"
+
+#include "BLI_string.h"
+
+#include "PIL_time.h"
+
+#include "DRW_engine.h"
+#include "DRW_render.h"
+
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_index_buffer.h"
+#include "GPU_state.h"
+#include "GPU_vertex_buffer.h"
+
+#include "opensubdiv_capi.h"
+#include "opensubdiv_capi_type.h"
+#include "opensubdiv_converter_capi.h"
+#include "opensubdiv_evaluator_capi.h"
+#include "opensubdiv_topology_refiner_capi.h"
+
+#include "draw_cache_extract.h"
+#include "draw_cache_impl.h"
+#include "draw_cache_inline.h"
+#include "mesh_extractors/extract_mesh.h"
+
+extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[];
+extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[];
+extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[];
+extern "C" char datatoc_common_subdiv_lib_glsl[];
+extern "C" char datatoc_common_subdiv_normals_accumulate_comp_glsl[];
+extern "C" char datatoc_common_subdiv_normals_finalize_comp_glsl[];
+extern "C" char datatoc_common_subdiv_patch_evaluation_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edge_fac_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_lnor_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_sculpt_data_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl[];
+
+enum {
+  SHADER_BUFFER_LINES,
+  SHADER_BUFFER_LINES_LOOSE,
+  SHADER_BUFFER_EDGE_FAC,
+  SHADER_BUFFER_LNOR,
+  SHADER_BUFFER_TRIS,
+  SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS,
+  SHADER_BUFFER_NORMALS_ACCUMULATE,
+  SHADER_BUFFER_NORMALS_FINALIZE,
+  SHADER_PATCH_EVALUATION,
+  SHADER_PATCH_EVALUATION_LIMIT_NORMALS,
+  SHADER_PATCH_EVALUATION_FVAR,
+  SHADER_PATCH_EVALUATION_FACE_DOTS,
+  SHADER_COMP_CUSTOM_DATA_INTERP_1D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_2D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_3D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_4D,
+  SHADER_BUFFER_SCULPT_DATA,
+  SHADER_BUFFER_UV_STRETCH_ANGLE,
+  SHADER_BUFFER_UV_STRETCH_AREA,
+
+  NUM_SHADERS,
+};
+
+static GPUShader *g_subdiv_shaders[NUM_SHADERS];
+
+static const char *get_shader_code(int shader_type)
+{
+  switch (shader_type) {
+    case SHADER_BUFFER_LINES:
+    case SHADER_BUFFER_LINES_LOOSE: {
+      return datatoc_common_subdiv_ibo_lines_comp_glsl;
+    }
+    case SHADER_BUFFER_EDGE_FAC: {
+      return datatoc_common_subdiv_vbo_edge_fac_comp_glsl;
+    }
+    case SHADER_BUFFER_LNOR: {
+      return datatoc_common_subdiv_vbo_lnor_comp_glsl;
+    }
+    case SHADER_BUFFER_TRIS:
+    case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: {
+      return datatoc_common_subdiv_ibo_tris_comp_glsl;
+    }
+    case SHADER_BUFFER_NORMALS_ACCUMULATE: {
+      return datatoc_common_subdiv_normals_accumulate_comp_glsl;
+    }
+    case SHADER_BUFFER_NORMALS_FINALIZE: {
+      return datatoc_common_subdiv_normals_finalize_comp_glsl;
+    }
+    case SHADER_PATCH_EVALUATION:
+    case SHADER_PATCH_EVALUATION_LIMIT_NORMALS:
+    case SHADER_PATCH_EVALUATION_FVAR:
+    case SHADER_PATCH_EVALUATION_FACE_DOTS: {
+      return datatoc_common_subdiv_patch_evaluation_comp_glsl;
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_1D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_2D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_3D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_4D: {
+      return datatoc_common_subdiv_custom_data_interp_comp_glsl;
+    }
+    case SHADER_BUFFER_SCULPT_DATA: {
+      return datatoc_common_subdiv_vbo_sculpt_data_comp_glsl;
+    }
+    case SHADER_BUFFER_UV_STRETCH_ANGLE: {
+      return datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl;
+    }
+    case SHADER_BUFFER_UV_STRETCH_AREA: {
+      return datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl;
+    }
+  }
+  return nullptr;
+}
+
+static const char *get_shader_name(int shader_type)
+{
+  switch (shader_type) {
+    case SHADER_BUFFER_LINES: {
+      return "subdiv lines build";
+    }
+    case SHADER_BUFFER_LINES_LOOSE: {
+      return "subdiv lines loose build";
+    }
+    case SHADER_BUFFER_LNOR: {
+      return "subdiv lnor build";
+    }
+    case SHADER_BUFFER_EDGE_FAC: {
+      return "subdiv edge fac build";
+    }
+    case SHADER_BUFFER_TRIS:
+    case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: {
+      return "subdiv tris";
+    }
+    case SHADER_BUFFER_NORMALS_ACCUMULATE: {
+      return "subdiv normals accumulate";
+    }
+    case SHADER_BUFFER_NORMALS_FINALIZE: {
+      return "subdiv normals finalize";
+    }
+    case SHADER_PATCH_EVALUATION: {
+      return "subdiv patch evaluation";
+    }
+    case SHADER_PATCH_EVALUATION_LIMIT_NORMALS: {
+      return "subdiv patch evaluation limit normals";
+    }
+    case SHADER_PATCH_EVALUATION_FVAR: {
+      return "subdiv patch evaluation face-varying";
+    }
+    case SHADER_PATCH_EVALUATION_FACE_DOTS: {
+      return "subdiv patch evaluation face dots";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_1D: {
+      return "subdiv custom data interp 1D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_2D: {
+      return "subdiv custom data interp 2D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_3D: {
+      return "subdiv custom data interp 3D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_4D: {
+      return "subdiv custom data interp 4D";
+    }
+    case SHADER_BUFFER_SCULPT_DATA: {
+      return "subdiv sculpt data";
+    }
+    case SHADER_BUFFER_UV_STRETCH_ANGLE: {
+      return "subdiv uv stretch angle";
+    }
+    case SHADER_BUFFER_UV_STRETCH_AREA: {
+      return "subdiv uv stretch area";
+    }
+  }
+  return nullptr;
+}
+
+static GPUShader *get_patch_evaluation_shader(int shader_type)
+{
+  if (g_subdiv_shaders[shader_type] == nullptr) {
+    const char *compute_code = get_shader_code(shader_type);
+
+    const char *defines = nullptr;
+    if (shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define LIMIT_NORMALS\n";
+    }
+    else if (shader_type == SHADER_PATCH_EVALUATION_FVAR) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define FVAR_EVALUATION\n";
+    }
+    else if (shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define FDOTS_EVALUATION\n";
+    }
+    else {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
+    }
+
+    /* Merge OpenSubdiv library code with our own library code. */
+    const char *patch_basis_source = openSubdiv_getGLSLPatchBasisSource();
+    const char *subdiv_lib_code = datatoc_common_subdiv_lib_glsl;
+    char *library_code = static_cast<char *>(
+        MEM_mallocN(strlen(patch_basis_source) + strlen(subdiv_lib_code) + 1,
+                    "subdiv patch evaluation library code"));
+    library_code[0] = '\0';
+    strcat(library_code, patch_basis_source);
+    strcat(library_code, subdiv_lib_code);
+
+    g_subdiv_shaders[shader_type] = GPU_shader_create_compute(
+        compute_code, library_code, defines, get_shader_name(shader_type));
+
+    MEM_freeN(library_code);
+  }
+
+  return g_subdiv_shaders[shader_type];
+}
+
+static GPUShader *get_subdiv_shader(int shader_type, const char *defines)
+{
+  if (shader_type == SHADER_PATCH_EVALUATION ||
+      shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS ||
+      shader_type == SHADER_PATCH_EVALUATION_FVAR ||
+      shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) {
+    return get_patch_evaluation_shader(shader_type);
+  }
+  if (g_subdiv_shaders[shader_type] == nullptr) {
+    const char *compute_code = get_shader_code(shader_type);
+    g_subdiv_shaders[shader_type] = GPU_shader_create_compute(
+        compute_code, datatoc_common_subdiv_lib_glsl, defines, get_shader_name(shader_type));
+  }
+  return g_subdiv_shaders[shader_type];
+}
+
+/* -------------------------------------------------------------------- */
+/** Vertex formats used for data transfer from OpenSubdiv, and for data processing on our side.
+ * \{ */
+
+static GPUVertFormat *get_uvs_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "uvs", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+/* Vertex format for `OpenSubdiv::Osd::PatchArray`. */
+static GPUVertFormat *get_patch_array_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "regDesc", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "desc", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "numPatches", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "indexBase", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "stride", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "primitiveIdBase", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format used for the `PatchTable::PatchHandle`. */
+static GPUVertFormat *get_patch_handle_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "vertex_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "array_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "patch_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format used for the quad-tree nodes of the PatchMap. */
+static GPUVertFormat *get_quadtree_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "child", GPU_COMP_U32, 4, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format for `OpenSubdiv::Osd::PatchParam`, not really used, it is only for making sure
+ * that the #GPUVertBuf used to wrap the OpenSubdiv patch param buffer is valid. */
+static GPUVertFormat *get_patch_param_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "data", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+/* Vertex format for the patches' vertices index buffer. */
+static GPUVertFormat *get_patch_index_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "data", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format for the OpenSubdiv vertex buffer. */
+static GPUVertFormat *get_subdiv_vertex_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* We use 4 components for the vectors to account for padding in the compute shaders, where
+     * vec3 is promoted to vec4. */
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+typedef struct CompressedPatchCoord {
+  int ptex_face_index;
+  /* UV coordinate encoded as u << 16 | v, where u and v are quantized on 16-bits. */
+  unsigned int encoded_uv;
+} CompressedPatchCoord;
+
+MINLINE CompressedPatchCoord make_patch_coord(int ptex_face_index, float u, float v)
+{
+  CompressedPatchCoord patch_coord = {
+      ptex_face_index,
+      (static_cast<unsigned int>(u * 65535.0f) << 16) | static_cast<unsigned int>(v * 65535.0f),
+  };
+  return patch_coord;
+}
+
+/* Vertex format used for the #CompressedPatchCoord. */
+static GPUVertFormat *get_blender_patch_coords_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* WARNING! Adjust #CompressedPatchCoord accordingly. */
+    GPU_vertformat_attr_add(&format, "ptex_face_index", GPU_COMP_U32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "uv", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+static GPUVertFormat *get_origindex_format(void)
+{
+  static GPUVertFormat format;
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "color", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities to initialize a OpenSubdiv_Buffer for a GPUVertBuf.
+ * \{ */
+
+static void vertbuf_bind_gpu(const OpenSubdiv_Buffer *buffer)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(buffer->data);
+  GPU_vertbuf_use(verts);
+}
+
+static void *vertbuf_alloc(const OpenSubdiv_Buffer *interface, const uint len)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_data_alloc(verts, len);
+  return GPU_vertbuf_get_data(verts);
+}
+
+static void vertbuf_device_alloc(const OpenSubdiv_Buffer *interface, const uint len)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  /* This assumes that GPU_USAGE_DEVICE_ONLY was used, which won't allocate host memory. */
+  // BLI_assert(GPU_vertbuf_get_usage(verts) == GPU_USAGE_DEVICE_ONLY);
+  GPU_vertbuf_data_alloc(verts, len);
+}
+
+static void vertbuf_wrap_device_handle(const OpenSubdiv_Buffer *interface, uint64_t handle)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_wrap_handle(verts, handle);
+}
+
+static void vertbuf_update_data(const OpenSubdiv_Buffer *interface,
+                                uint start,
+                                uint len,
+                                const void *data)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_update_sub(verts, start, len, data);
+}
+
+static void opensubdiv_gpu_buffer_init(OpenSubdiv_Buffer *buffer_interface, GPUVertBuf *vertbuf)
+{
+  buffer_interface->data = vertbuf;
+  buffer_interface->bind_gpu = vertbuf_bind_gpu;
+  buffer_interface->buffer_offset = 0;
+  buffer_interface->wrap_device_handle = vertbuf_wrap_device_handle;
+  buffer_interface->alloc = vertbuf_alloc;
+  buffer_interface->device_alloc = vertbuf_device_alloc;
+  buffer_interface->device_update = vertbuf_update_data;
+}
+
+static GPUVertBuf *create_buffer_and_interface(OpenSubdiv_Buffer *interface, GPUVertFormat *format)
+{
+  GPUVertBuf *buffer = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(buffer, format, GPU_USAGE_DEVICE_ONLY);
+  opensubdiv_gpu_buffer_init(interface, buffer);
+  return buffer;
+}
+
+/** \} */
+
+// --------------------------------------------------------
+
+static uint tris_count_from_number_of_loops(const uint number_of_loops)
+{
+  const uint32_t number_of_quads = number_of_loops / 4;
+  return number_of_quads * 2;
+}
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities to build a GPUVertBuf from an origindex buffer.
+ * \{ */
+
+void draw_subdiv_init_origindex_buffer(GPUVertBuf *buffer,
+                                       int *vert_origindex,
+                                       uint num_loops,
+                                       uint loose_len)
+{
+  GPU_vertbuf_init_with_format_ex(buffer, get_origindex_format(), GPU_USAGE_STATIC);
+  GPU_vertbuf_data_alloc(buffer, num_loops + loose_len);
+
+  int *vbo_data = (int *)GPU_vertbuf_get_data(buffer);
+  memcpy(vbo_data, vert_origindex, num_loops * sizeof(int));
+}
+
+GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops)
+{
+  GPUVertBuf *buffer = GPU_vertbuf_calloc();
+  draw_subdiv_init_origindex_buffer(buffer, vert_origindex, num_loops, 0);
+  return buffer;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities for DRWPatchMap.
+ * \{ */
+
+static void draw_patch_map_build(DRWPatchMap *gpu_patch_map, Subdiv *subdiv)
+{
+  GPUVertBuf *patch_map_handles = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(patch_map_handles, get_patch_handle_format(), GPU_USAGE_STATIC);
+
+  GPUVertBuf *patch_map_quadtree = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(patch_map_quadtree, get_quadtree_format(), GPU_USAGE_STATIC);
+
+  OpenSubdiv_Buffer patch_map_handles_interface;
+  opensubdiv_gpu_buffer_init(&patch_map_handles_interface, patch_map_handles);
+
+  OpenSubdiv_Buffer patch_map_quad_tree_interface;
+  opensubdiv_gpu_buffer_init(&patch_map_quad_tree_interface, patch_map_quadtree);
+
+  int min_patch_face = 0;
+  int max_patch_face = 0;
+  int max_depth = 0;
+  int patches_are_triangular = 0;
+
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+  evaluator->getPatchMap(evaluator,
+                         &patch_map_handles_interface,
+                         &patch_map_quad_tree_interface,
+                         &min_patch_face,
+                         &max_patch_face,
+                         &max_depth,
+                         &patches_are_triangular);
+
+  gpu_patch_map->patch_map_handles = patch_map_handles;
+  gpu_patch_map->patch_map_quadtree = patch_map_quadtree;
+  gpu_patch_map->min_patch_face = min_patch_face;
+  gpu_patch_map->max_patch_face = max_patch_face;
+  gpu_patch_map->max_depth = max_depth;
+  gpu_patch_map->patches_are_triangular = patches_are_triangular;
+}
+
+static void draw_patch_map_free(DRWPatchMap *gpu_patch_map)
+{
+  GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_handles);
+  GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_quadtree);
+  gpu_patch_map->min_patch_face = 0;
+  gpu_patch_map->max_patch_face = 0;
+  gpu_patch_map->max_depth = 0;
+  gpu_patch_map->patches_are_triangular = 0;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivCache
+ * \{ */
+
+static void draw_subdiv_cache_free_material_data(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->polygon_mat_offset);
+  MEM_SAFE_FREE(cache->mat_start);
+  MEM_SAFE_FREE(cache->mat_end);
+}
+
+static void draw_subdiv_free_edit_mode_cache(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->verts_orig_index);
+  GPU_VERTBUF_DISCARD_SAFE(cache->edges_orig_index);
+  GPU_VERTBUF_DISCARD_SAFE(cache->fdots_patch_coords);
+}
+
+void draw_subdiv_cache_free(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->patch_coords);
+  GPU_VERTBUF_DISCARD_SAFE(cache->face_ptex_offset_buffer);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_polygon_offset_buffer);
+  GPU_VERTBUF_DISCARD_SAFE(cache->extra_coarse_face_data);
+  MEM_SAFE_FREE(cache->subdiv_loop_subdiv_vert_index);
+  MEM_SAFE_FREE(cache->subdiv_loop_poly_index);
+  MEM_SAFE_FREE(cache->point_indices);
+  MEM_SAFE_FREE(cache->subdiv_polygon_offset);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency_offsets);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency);
+  cache->resolution = 0;
+  cache->num_subdiv_loops = 0;
+  cache->num_coarse_poly = 0;
+  cache->num_subdiv_quads = 0;
+  draw_subdiv_free_edit_mode_cache(cache);
+  draw_subdiv_cache_free_material_data(cache);
+  draw_patch_map_free(&cache->gpu_patch_map);
+  if (cache->ubo) {
+    GPU_uniformbuf_free(cache->ubo);
+    cache->ubo = nullptr;
+  }
+}
+
+/* Flags used in #DRWSubdivCache.extra_coarse_face_data. The flags are packed in the upper bits of
+ * each uint (one per coarse face), #SUBDIV_COARSE_FACE_FLAG_OFFSET tells where they are in the
+ * packed bits. */
+#define SUBDIV_COARSE_FACE_FLAG_SMOOTH 1u
+#define SUBDIV_COARSE_FACE_FLAG_SELECT 2u
+#define SUBDIV_COARSE_FACE_FLAG_ACTIVE 4u
+
+#define SUBDIV_COARSE_FACE_FLAG_OFFSET 29u
+
+#define SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_SMOOTH << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+#define SUBDIV_COARSE_FACE_FLAG_SELECT_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_SELECT << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+#define SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_ACTIVE << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+
+#define SUBDIV_COARSE_FACE_LOOP_START_MASK \
+  ~((SUBDIV_COARSE_FACE_FLAG_SMOOTH | SUBDIV_COARSE_FACE_FLAG_SELECT | \
+     SUBDIV_COARSE_FACE_FLAG_ACTIVE) \
+    << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+
+static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cache, Mesh *mesh)
+{
+  if (cache->extra_coarse_face_data == nullptr) {
+    cache->extra_coarse_face_data = GPU_vertbuf_calloc();
+    static GPUVertFormat format;
+    if (format.attr_len == 0) {
+      GPU_vertformat_attr_add(&format, "data", GPU_COMP_U32, 1, GPU_FETCH_INT);
+    }
+    GPU_vertbuf_init_with_format_ex(cache->extra_coarse_face_data, &format, GPU_USAGE_DYNAMIC);
+    GPU_vertbuf_data_alloc(cache->extra_coarse_face_data, mesh->totpoly);
+  }
+
+  uint32_t *flags_data = (uint32_t *)(GPU_vertbuf_get_data(cache->extra_coarse_face_data));
+
+  if (cache->bm) {
+    BMesh *bm = cache->bm;
+    BMFace *f;
+    BMIter iter;
+
+    /* Ensure all current elements follow new customdata layout. */
+    BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
+      const int index = BM_elem_index_get(f);
+      uint32_t flag = 0;
+      if (BM_elem_flag_test(f, BM_ELEM_SMOOTH)) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH;
+      }
+      if (BM_elem_flag_test(f, BM_ELEM_SELECT)) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_SELECT;
+      }
+      if (f == bm->act_face) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_ACTIVE;
+      }
+      const int loopstart = BM_elem_index_get(f->l_first);
+      flags_data[index] = (uint)(loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    }
+  }
+  else {
+    for (int i = 0; i < mesh->totpoly; i++) {
+      uint32_t flag = 0;
+      if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) {
+        flag = SUBDIV_COARSE_FACE_FLAG_SMOOTH;
+      }
+      flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    }
+  }
+
+  /* Make sure updated data is re-uploaded. */
+  GPU_vertbuf_tag_dirty(cache->extra_coarse_face_data);
+}
+
+static DRWSubdivCache *mesh_batch_cache_ensure_subdiv_cache(MeshBatchCache *mbc)
+{
+  DRWSubdivCache *subdiv_cache = mbc->subdiv_cache;
+  if (subdiv_cache == nullptr) {
+    subdiv_cache = static_cast<DRWSubdivCache *>(
+        MEM_callocN(sizeof(DRWSubdivCache), "DRWSubdivCache"));
+  }
+  mbc->subdiv_cache = subdiv_cache;
+  return subdiv_cache;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Subdivision grid traversal.
+ *
+ * Traverse the uniform subdivision grid over coarse faces and gather useful information for
+ * building the draw buffers on the GPU. We primarily gather the patch coordinates for all
+ * subdivision faces, as well as the original coarse indices for each subdivision element (vertex,
+ * face, or edge) which directly maps to its coarse counterpart (note that all subdivision faces
+ * map to a coarse face). This information will then be cached in #DRWSubdivCache for subsequent
+ * reevaluations, as long as the topology does not change.
+ * \{ */
+
+typedef struct DRWCacheBuildingContext {
+  const Mesh *coarse_mesh;
+  const SubdivToMeshSettings *settings;
+
+  DRWSubdivCache *cache;
+
+  /* Pointers into DRWSubdivCache buffers for easier access during traversal. */
+  CompressedPatchCoord *patch_coords;
+  int *subdiv_loop_vert_index;
+  int *subdiv_loop_subdiv_vert_index;
+  int *subdiv_loop_edge_index;
+  int *subdiv_loop_poly_index;
+  int *point_indices;
+
+  /* Temporary buffers used during traversal. */
+  int *vert_origindex_map;
+  int *edge_origindex_map;
+
+  /* Origindex layers from the mesh to directly look up during traversal the origindex from the
+   * base mesh for edit data so that we do not have to handle yet another GPU buffer and do this in
+   * the shaders. */
+  int *v_origindex;
+  int *e_origindex;
+} DRWCacheBuildingContext;
+
+static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context,
+                                         const int num_vertices,
+                                         const int num_edges,
+                                         const int num_loops,
+                                         const int num_polygons,
+                                         const int *subdiv_polygon_offset)
+{
+  if (num_loops == 0) {
+    return false;
+  }
+
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  DRWSubdivCache *cache = ctx->cache;
+
+  /* Set topology information. */
+  cache->num_subdiv_edges = (uint)num_edges;
+  cache->num_subdiv_loops = (uint)num_loops;
+  cache->num_subdiv_verts = (uint)num_vertices;
+  cache->num_subdiv_quads = (uint)num_polygons;
+  cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset));
+
+  /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after
+   * it was sent to the device, since we may use the data while building other buffers on the CPU
+   * side. */
+  cache->patch_coords = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->patch_coords, get_blender_patch_coords_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->patch_coords, cache->num_subdiv_loops);
+
+  cache->verts_orig_index = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->verts_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->verts_orig_index, cache->num_subdiv_loops);
+
+  cache->edges_orig_index = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->edges_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->edges_orig_index, cache->num_subdiv_loops);
+
+  cache->subdiv_loop_subdiv_vert_index = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_subdiv_vert_index"));
+
+  cache->subdiv_loop_poly_index = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_poly_index"));
+
+  cache->point_indices = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "point_indices"));
+  for (int i = 0; i < num_vertices; i++) {
+    cache->point_indices[i] = -1;
+  }
+
+  /* Initialize context pointers and temporary buffers. */
+  ctx->patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(cache->patch_coords);
+  ctx->subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(cache->verts_orig_index);
+  ctx->subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(cache->edges_orig_index);
+  ctx->subdiv_loop_subdiv_vert_index = cache->subdiv_loop_subdiv_vert_index;
+  ctx->subdiv_loop_poly_index = cache->subdiv_loop_poly_index;
+  ctx->point_indices = cache->point_indices;
+
+  ctx->v_origindex = static_cast<int *>(
+      CustomData_get_layer(&ctx->coarse_mesh->vdata, CD_ORIGINDEX));
+
+  ctx->e_origindex = static_cast<int *>(
+      CustomData_get_layer(&ctx->coarse_mesh->edata, CD_ORIGINDEX));
+
+  ctx->vert_origindex_map = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map"));
+  for (int i = 0; i < num_vertices; i++) {
+    ctx->vert_origindex_map[i] = -1;
+  }
+
+  ctx->edge_origindex_map = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_edges * sizeof(int), "subdiv_edge_origindex_map"));
+  for (int i = 0; i < num_edges; i++) {
+    ctx->edge_origindex_map[i] = -1;
+  }
+
+  return true;
+}
+
+static void draw_subdiv_vertex_corner_cb(const SubdivForeachContext *foreach_context,
+                                         void *UNUSED(tls),
+                                         const int UNUSED(ptex_face_index),
+                                         const float UNUSED(u),
+                                         const float UNUSED(v),
+                                         const int coarse_vertex_index,
+                                         const int UNUSED(coarse_poly_index),
+                                         const int UNUSED(coarse_corner),
+                                         const int subdiv_vertex_index)
+{
+  BLI_assert(coarse_vertex_index != ORIGINDEX_NONE);
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  ctx->vert_origindex_map[subdiv_vertex_index] = coarse_vertex_index;
+}
+
+static void draw_subdiv_vertex_edge_cb(const SubdivForeachContext *UNUSED(foreach_context),
+                                       void *UNUSED(tls_v),
+                                       const int UNUSED(ptex_face_index),
+                                       const float UNUSED(u),
+                                       const float UNUSED(v),
+                                       const int UNUSED(coarse_edge_index),
+                                       const int UNUSED(coarse_poly_index),
+                                       const int UNUSED(coarse_corner),
+                                       const int UNUSED(subdiv_vertex_index))
+{
+  /* Required if SubdivForeachContext.vertex_corner is also set. */
+}
+
+static void draw_subdiv_edge_cb(const SubdivForeachContext *foreach_context,
+                                void *UNUSED(tls),
+                                const int coarse_edge_index,
+                                const int subdiv_edge_index,
+                                const int UNUSED(subdiv_v1),
+                                const int UNUSED(subdiv_v2))
+{
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+
+  int coarse_index = coarse_edge_index;
+
+  if (coarse_index != -1) {
+    if (ctx->e_origindex) {
+      coarse_index = ctx->e_origindex[coarse_index];
+    }
+  }
+
+  ctx->edge_origindex_map[subdiv_edge_index] = coarse_index;
+}
+
+static void draw_subdiv_loop_cb(const SubdivForeachContext *foreach_context,
+                                void *UNUSED(tls_v),
+                                const int ptex_face_index,
+                                const float u,
+                                const float v,
+                                const int UNUSED(coarse_loop_index),
+                                const int coarse_poly_index,
+                                const int UNUSED(coarse_corner),
+                                const int subdiv_loop_index,
+                                const int subdiv_vertex_index,
+                                const int subdiv_edge_index)
+{
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  ctx->patch_coords[subdiv_loop_index] = make_patch_coord(ptex_face_index, u, v);
+
+  int coarse_vertex_index = ctx->vert_origindex_map[subdiv_vertex_index];
+
+  if (coarse_vertex_index != -1) {
+    if (ctx->v_origindex) {
+      coarse_vertex_index = ctx->v_origindex[coarse_vertex_index];
+    }
+
+    /* Double check as vorigindex may have modified the index. */
+    if (coarse_vertex_index != -1) {
+      ctx->point_indices[coarse_vertex_index] = subdiv_loop_index;
+    }
+  }
+
+  ctx->subdiv_loop_subdiv_vert_index[subdiv_loop_index] = subdiv_vertex_index;
+  /* For now index the subdiv_edge_index, it will be replaced by the actual coarse edge index
+   * at the end of the traversal as some edges are only then traversed. */
+  ctx->subdiv_loop_edge_index[subdiv_loop_index] = subdiv_edge_index;
+  ctx->subdiv_loop_poly_index[subdiv_loop_index] = coarse_poly_index;
+  ctx->subdiv_loop_vert_index[subdiv_loop_index] = coarse_vertex_index;
+}
+
+static void draw_subdiv_foreach_callbacks(SubdivForeachContext *foreach_context)
+{
+  memset(foreach_context, 0, sizeof(*foreach_context));
+  foreach_context->topology_info = draw_subdiv_topology_info_cb;
+  foreach_context->loop = draw_subdiv_loop_cb;
+  foreach_context->edge = draw_subdiv_edge_cb;
+  foreach_context->vertex_corner = draw_subdiv_vertex_corner_cb;
+  foreach_context->vertex_edge = draw_subdiv_vertex_edge_cb;
+}
+
+static void do_subdiv_traversal(DRWCacheBuildingContext *cache_building_context, Subdiv *subdiv)
+{
+  SubdivForeachContext foreach_context;
+  draw_subdiv_foreach_callbacks(&foreach_context);
+  foreach_context.user_data = cache_building_context;
+
+  BKE_subdiv_foreach_subdiv_geometry(subdiv,
+                                     &foreach_context,
+                                     cache_building_context->settings,
+                                     cache_building_context->coarse_mesh);
+
+  /* Now that traversal is done, we can set up the right original indices for the loop-to-edge map.
+   */
+  for (int i = 0; i < cache_building_context->cache->num_subdiv_loops; i++) {
+    cache_building_context->subdiv_loop_edge_index[i] =
+        cache_building_context
+            ->edge_origindex_map[cache_building_context->subdiv_loop_edge_index[i]];
+  }
+}
+
+static GPUVertBuf *gpu_vertbuf_create_from_format(GPUVertFormat *format, uint len)
+{
+  GPUVertBuf *verts = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(verts, format);
+  GPU_vertbuf_data_alloc(verts, len);
+  return verts;
+}
+
+/* Build maps to hold enough information to tell which face is adjacent to which vertex; those will
+ * be used for computing normals if limit surfaces are unavailable. */
+static void build_vertex_face_adjacency_maps(DRWSubdivCache *cache)
+{
+  /* +1 so that we do not require a special case for the last vertex, this extra offset will
+   * contain the total number of adjacent faces. */
+  cache->subdiv_vertex_face_adjacency_offsets = gpu_vertbuf_create_from_format(
+      get_origindex_format(), cache->num_subdiv_verts + 1);
+
+  int *vertex_offsets = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency_offsets);
+  memset(vertex_offsets, 0, sizeof(int) * cache->num_subdiv_verts + 1);
+
+  for (int i = 0; i < cache->num_subdiv_loops; i++) {
+    vertex_offsets[cache->subdiv_loop_subdiv_vert_index[i]]++;
+  }
+
+  int ofs = vertex_offsets[0];
+  vertex_offsets[0] = 0;
+  for (uint i = 1; i < cache->num_subdiv_verts + 1; i++) {
+    int tmp = vertex_offsets[i];
+    vertex_offsets[i] = ofs;
+    ofs += tmp;
+  }
+
+  cache->subdiv_vertex_face_adjacency = gpu_vertbuf_create_from_format(get_origindex_format(),
+                                                                       cache->num_subdiv_loops);
+  int *adjacent_faces = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency);
+  int *tmp_set_faces = static_cast<int *>(
+      MEM_callocN(sizeof(int) * cache->num_subdiv_verts, "tmp subdiv vertex offset"));
+
+  for (int i = 0; i < cache->num_subdiv_loops / 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      const int subdiv_vertex = cache->subdiv_loop_subdiv_vert_index[i * 4 + j];
+      int first_face_offset = vertex_offsets[subdiv_vertex] + tmp_set_faces[subdiv_vertex];
+      adjacent_faces[first_face_offset] = i;
+      tmp_set_faces[subdiv_vertex] += 1;
+    }
+  }
+
+  MEM_freeN(tmp_set_faces);
+}
+
+static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
+                                    Subdiv *subdiv,
+                                    Mesh *mesh_eval,
+                                    const Scene *scene,
+                                    const SubsurfModifierData *smd,
+                                    const bool is_final_render)
+{
+  const int level = get_render_subsurf_level(&scene->r, smd->levels, is_final_render);
+  SubdivToMeshSettings to_mesh_settings;
+  to_mesh_settings.resolution = (1 << level) + 1;
+  to_mesh_settings.use_optimal_display = false;
+
+  if (cache->resolution != to_mesh_settings.resolution) {
+    /* Resolution changed, we need to rebuild, free any existing cached data. */
+    draw_subdiv_cache_free(cache);
+  }
+
+  /* If the resolution between the cache and the settings match for some reason, check if the patch
+   * coordinates were not already generated. Those coordinates are specific to the resolution, so
+   * they should be null either after initialization, or after freeing if the resolution (or some
+   * other subdivision setting) changed.
+   */
+  if (cache->patch_coords != nullptr) {
+    return true;
+  }
+
+  DRWCacheBuildingContext cache_building_context;
+  cache_building_context.coarse_mesh = mesh_eval;
+  cache_building_context.settings = &to_mesh_settings;
+  cache_building_context.cache = cache;
+
+  do_subdiv_traversal(&cache_building_context, subdiv);
+  if (cache->num_subdiv_loops == 0) {
+    /* Either the traversal failed, or we have an empty mesh, either way we cannot go any further.
+     * The subdiv_polygon_offset cannot then be reliably stored in the cache, so free it directly.
+     */
+    MEM_SAFE_FREE(cache->subdiv_polygon_offset);
+    return false;
+  }
+
+  /* Build buffers for the PatchMap. */
+  draw_patch_map_build(&cache->gpu_patch_map, subdiv);
+
+  cache->face_ptex_offset = BKE_subdiv_face_ptex_offset_get(subdiv);
+
+  // Build patch coordinates for all the face dots
+  cache->fdots_patch_coords = gpu_vertbuf_create_from_format(get_blender_patch_coords_format(),
+                                                             mesh_eval->totpoly);
+  CompressedPatchCoord *blender_fdots_patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(
+      cache->fdots_patch_coords);
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const int ptex_face_index = cache->face_ptex_offset[i];
+    if (mesh_eval->mpoly[i].totloop == 4) {
+      /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */
+      blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f);
+    }
+    else {
+      /* For N-gons, since they are split into quads from the center, and since the center is
+       * chosen to be the top right corner of each quad, the center coordinate of the coarse face
+       * is any one of those top right corners with `u = v = 1.0`. */
+      blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 1.0f, 1.0f);
+    }
+  }
+
+  cache->resolution = to_mesh_settings.resolution;
+
+  cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer(
+      cache->subdiv_polygon_offset, mesh_eval->totpoly);
+
+  cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset,
+                                                                      mesh_eval->totpoly + 1);
+  cache->num_coarse_poly = mesh_eval->totpoly;
+  cache->point_indices = cache_building_context.point_indices;
+
+  build_vertex_face_adjacency_maps(cache);
+
+  /* Cleanup. */
+  MEM_freeN(cache_building_context.vert_origindex_map);
+  MEM_freeN(cache_building_context.edge_origindex_map);
+
+  return true;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivUboStorage.
+ *
+ * Common uniforms for the various shaders.
+ * \{ */
+
+typedef struct DRWSubdivUboStorage {
+  /* Offsets in the buffers data where the source and destination data start. */
+  int src_offset;
+  int dst_offset;
+
+  /* Parameters for the DRWPatchMap. */
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+
+  /* Coarse topology information. */
+  int coarse_poly_count;
+  uint edge_loose_offset;
+
+  /* Refined topology information. */
+  uint num_subdiv_loops;
+
+  /* Subdivision settings, is int in C but bool in the GLSL code, as there, bools have the same
+   * size as ints, so we should use int in C to ensure that the size of the structure is what GLSL
+   * expects. */
+  int optimal_display;
+
+  /* The sculpt mask data layer may be null. */
+  int has_sculpt_mask;
+
+  /* Masks for the extra coarse face data. */
+  uint coarse_face_select_mask;
+  uint coarse_face_smooth_mask;
+  uint coarse_face_active_mask;
+  uint coarse_face_loopstart_mask;
+
+  /* Number of elements to process in the compute shader (can be the coarse quad count, or the
+   * final vertex count, depending on which compute pass we do). This is used to early out in case
+   * of out of bond accesses as compute dispatch are of fixed size. */
+  uint total_dispatch_size;
+} DRWSubdivUboStorage;
+
+static_assert((sizeof(DRWSubdivUboStorage) % 16) == 0,
+              "DRWSubdivUboStorage is not padded to a multiple of the size of vec4");
+
+static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache,
+                                         DRWSubdivUboStorage *ubo,
+                                         const int src_offset,
+                                         const int dst_offset,
+                                         const uint total_dispatch_size,
+                                         const bool has_sculpt_mask)
+{
+  ubo->src_offset = src_offset;
+  ubo->dst_offset = dst_offset;
+  ubo->min_patch_face = cache->gpu_patch_map.min_patch_face;
+  ubo->max_patch_face = cache->gpu_patch_map.max_patch_face;
+  ubo->max_depth = cache->gpu_patch_map.max_depth;
+  ubo->patches_are_triangular = cache->gpu_patch_map.patches_are_triangular;
+  ubo->coarse_poly_count = cache->num_coarse_poly;
+  ubo->optimal_display = cache->optimal_display;
+  ubo->num_subdiv_loops = cache->num_subdiv_loops;
+  ubo->edge_loose_offset = cache->num_subdiv_loops * 2;
+  ubo->has_sculpt_mask = has_sculpt_mask;
+  ubo->coarse_face_smooth_mask = SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK;
+  ubo->coarse_face_select_mask = SUBDIV_COARSE_FACE_FLAG_SELECT_MASK;
+  ubo->coarse_face_active_mask = SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK;
+  ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK;
+  ubo->total_dispatch_size = total_dispatch_size;
+}
+
+static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache,
+                                            GPUShader *shader,
+                                            const int src_offset,
+                                            const int dst_offset,
+                                            const uint total_dispatch_size,
+                                            const bool has_sculpt_mask = false)
+{
+  DRWSubdivUboStorage storage;
+  draw_subdiv_init_ubo_storage(
+      cache, &storage, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask);
+
+  if (!cache->ubo) {
+    const_cast<DRWSubdivCache *>(cache)->ubo = GPU_uniformbuf_create_ex(
+        sizeof(DRWSubdivUboStorage), &storage, "DRWSubdivUboStorage");
+  }
+
+  GPU_uniformbuf_update(cache->ubo, &storage);
+
+  const int location = GPU_shader_get_uniform_block(shader, "shader_data");
+  GPU_uniformbuf_bind(cache->ubo, location);
+}
+
+/** \} */
+
+// --------------------------------------------------------
+
+#define SUBDIV_LOCAL_WORK_GROUP_SIZE 64
+static uint get_dispatch_size(uint elements)
+{
+  return divide_ceil_u(elements, SUBDIV_LOCAL_WORK_GROUP_SIZE);
+}
+
+/* Helper to ensure that the UBO is always initalized before dispatching computes and that the same
+ * number of elements that need to be processed is used for the UBO and the dispatch size.
+ * Use this instead of a raw call to #GPU_compute_dispatch. */
+static void drw_subdiv_compute_dispatch(const DRWSubdivCache *cache,
+                                        GPUShader *shader,
+                                        const int src_offset,
+                                        const int dst_offset,
+                                        uint total_dispatch_size,
+                                        const bool has_sculpt_mask = false)
+{
+  const uint max_res_x = static_cast<uint>(GPU_max_work_group_count(0));
+
+  const uint dispatch_size = get_dispatch_size(total_dispatch_size);
+  uint dispatch_rx = dispatch_size;
+  uint dispatch_ry = 1u;
+  if (dispatch_rx > max_res_x) {
+    /* Since there are some limitations with regards to the maximum work group size (could be as
+     * low as 64k elements per call), we split the number elements into a "2d" number, with the
+     * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum
+     * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements
+     * total, which should be enough. If not, we could also use the 3rd dimension. */
+    /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and
+     * get the smallest rect fitting the requirements. */
+    dispatch_rx = dispatch_ry = ceilf(sqrtf(dispatch_size));
+    /* Avoid a completely empty dispatch line caused by rounding. */
+    if ((dispatch_rx * (dispatch_ry - 1)) >= dispatch_size) {
+      dispatch_ry -= 1;
+    }
+  }
+
+  /* X and Y dimensions may have different limits so the above computation may not be right, but
+   * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore,
+   * we presume it all fits. */
+  BLI_assert(dispatch_ry < static_cast<uint>(GPU_max_work_group_count(1)));
+
+  draw_subdiv_ubo_update_and_bind(
+      cache, shader, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask);
+
+  GPU_compute_dispatch(shader, dispatch_rx, dispatch_ry, 1);
+}
+
+void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache,
+                                 GPUVertBuf *pos_nor,
+                                 const bool do_limit_normals)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface,
+                                                       get_subdiv_vertex_format());
+  evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(
+      do_limit_normals ? SHADER_PATCH_EVALUATION_LIMIT_NORMALS : SHADER_PATCH_EVALUATION);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 8);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_extract_uvs(const DRWSubdivCache *cache,
+                             GPUVertBuf *uvs,
+                             const int face_varying_channel,
+                             const int dst_offset)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface, get_uvs_format());
+  evaluator->wrapFVarSrcBuffer(evaluator, face_varying_channel, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  evaluator->fillFVarPatchArraysBuffer(
+      evaluator, face_varying_channel, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapFVarPatchIndexBuffer(
+      evaluator, face_varying_channel, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapFVarPatchParamBuffer(
+      evaluator, face_varying_channel, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FVAR);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(uvs, 8);
+
+  /* The buffer offset has the stride baked in (which is 2 as we have UVs) so remove the stride by
+   * dividing by 2 */
+  const int src_offset = src_buffer_interface.buffer_offset / 2;
+  drw_subdiv_compute_dispatch(cache, shader, src_offset, dst_offset, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array.
+   * Since it may also be used for computing UV stretches, we also need a barrier on the shader
+   * storage. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_SHADER_STORAGE);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
+                                    GPUVertBuf *src_data,
+                                    GPUVertBuf *dst_data,
+                                    int dimensions,
+                                    int dst_offset)
+{
+  GPUShader *shader = nullptr;
+
+  if (dimensions == 1) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 1\n");
+  }
+  else if (dimensions == 2) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_2D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 2\n");
+  }
+  else if (dimensions == 3) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_3D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 3\n");
+  }
+  else if (dimensions == 4) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_4D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 4\n"
+                               "#define GPU_FETCH_U16_TO_FLOAT\n");
+  }
+  else {
+    /* Crash if dimensions are not supported. */
+  }
+
+  GPU_shader_bind(shader);
+
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(src_data, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->face_ptex_offset_buffer, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 4);
+  GPU_vertbuf_bind_as_ssbo(dst_data, 5);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, dst_offset, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache,
+                                          GPUVertBuf *mask_vbo,
+                                          GPUVertBuf *face_set_vbo,
+                                          GPUVertBuf *sculpt_data)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_SCULPT_DATA, nullptr);
+  GPU_shader_bind(shader);
+
+  if (mask_vbo) {
+    GPU_vertbuf_bind_as_ssbo(mask_vbo, 0);
+  }
+
+  GPU_vertbuf_bind_as_ssbo(face_set_vbo, 1);
+  GPU_vertbuf_bind_as_ssbo(sculpt_data, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads, mask_vbo != nullptr);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache,
+                                    GPUVertBuf *pos_nor,
+                                    GPUVertBuf *face_adjacency_offsets,
+                                    GPUVertBuf *face_adjacency_lists,
+                                    GPUVertBuf *vertex_normals)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_ACCUMULATE, nullptr);
+  GPU_shader_bind(shader);
+
+  int binding_point = 0;
+
+  GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(face_adjacency_offsets, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(face_adjacency_lists, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_verts);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_finalize_normals(const DRWSubdivCache *cache,
+                                  GPUVertBuf *vertex_normals,
+                                  GPUVertBuf *subdiv_loop_subdiv_vert_index,
+                                  GPUVertBuf *pos_nor)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_FINALIZE, nullptr);
+  GPU_shader_bind(shader);
+
+  int binding_point = 0;
+  GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(subdiv_loop_subdiv_vert_index, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache,
+                                   GPUIndexBuf *subdiv_tris,
+                                   const int material_count)
+{
+  const bool do_single_material = material_count <= 1;
+
+  const char *defines = "#define SUBDIV_POLYGON_OFFSET\n";
+  if (do_single_material) {
+    defines =
+        "#define SUBDIV_POLYGON_OFFSET\n"
+        "#define SINGLE_MATERIAL\n";
+  }
+
+  GPUShader *shader = get_subdiv_shader(
+      do_single_material ? SHADER_BUFFER_TRIS : SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS, defines);
+  GPU_shader_bind(shader);
+
+  /* Outputs */
+  GPU_indexbuf_bind_as_ssbo(subdiv_tris, 1);
+
+  if (!do_single_material) {
+    GPU_vertbuf_bind_as_ssbo(cache->polygon_mat_offset, 2);
+    /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+    GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+  }
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache,
+                                     GPUVertBuf *fdots_pos,
+                                     GPUVertBuf *fdots_nor,
+                                     GPUIndexBuf *fdots_indices)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface,
+                                                       get_subdiv_vertex_format());
+  evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  opensubdiv_gpu_buffer_init(&patch_arrays_buffer_interface, patch_arrays_buffer);
+  evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FACE_DOTS);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->fdots_patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(fdots_pos, 8);
+  GPU_vertbuf_bind_as_ssbo(fdots_nor, 9);
+  GPU_indexbuf_bind_as_ssbo(fdots_indices, 10);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 11);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_coarse_poly);
+
+  /* This generates two vertex buffers and an index buffer, so we need to put a barrier on the
+   * vertex attributes and element arrays. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache, GPUIndexBuf *lines_indices)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES, nullptr);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(cache->edges_orig_index, 0);
+  GPU_indexbuf_bind_as_ssbo(lines_indices, 1);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache,
+                                          GPUIndexBuf *lines_indices,
+                                          uint num_loose_edges)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES_LOOSE, "#define LINES_LOOSE\n");
+  GPU_shader_bind(shader);
+
+  GPU_indexbuf_bind_as_ssbo(lines_indices, 1);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, num_loose_edges);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache,
+                                       GPUVertBuf *pos_nor,
+                                       GPUVertBuf *edge_idx,
+                                       GPUVertBuf *edge_fac)
+{
+  /* No separate shader for the AMD driver case as we assume that the GPU will not change during
+   * the execution of the program. */
+  const char *defines = GPU_crappy_amd_driver() ? "#define GPU_AMD_DRIVER_BYTE_BUG\n" : nullptr;
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_EDGE_FAC, defines);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 0);
+  GPU_vertbuf_bind_as_ssbo(edge_idx, 1);
+  GPU_vertbuf_bind_as_ssbo(edge_fac, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache,
+                                   GPUVertBuf *pos_nor,
+                                   GPUVertBuf *lnor)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LNOR, "#define SUBDIV_POLYGON_OFFSET\n");
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 2);
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(lnor, 3);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache,
+                                                  GPUVertBuf *coarse_data,
+                                                  GPUVertBuf *subdiv_data)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_AREA,
+                                        "#define SUBDIV_POLYGON_OFFSET\n");
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(coarse_data, 1);
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(subdiv_data, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache,
+                                                   GPUVertBuf *pos_nor,
+                                                   GPUVertBuf *uvs,
+                                                   int uvs_offset,
+                                                   GPUVertBuf *stretch_angles)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_ANGLE, nullptr);
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 0);
+  GPU_vertbuf_bind_as_ssbo(uvs, 1);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(stretch_angles, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, uvs_offset, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+/* -------------------------------------------------------------------- */
+
+void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache,
+                                       MeshRenderData *mr,
+                                       const ToolSettings *toolsettings)
+{
+  Mesh *mesh = cache->mesh;
+
+  /* Setup required data for loose geometry. */
+  mr->me = mesh;
+  mr->medge = mesh->medge;
+  mr->mvert = mesh->mvert;
+  mr->mpoly = mesh->mpoly;
+  mr->mloop = mesh->mloop;
+  mr->vert_len = mesh->totvert;
+  mr->edge_len = mesh->totedge;
+  mr->poly_len = mesh->totpoly;
+  mr->loop_len = mesh->totloop;
+  mr->extract_type = MR_EXTRACT_MESH;
+
+  /* MeshRenderData is only used for generating edit mode data here. */
+  if (!cache->bm) {
+    return;
+  }
+
+  BMesh *bm = cache->bm;
+  BM_mesh_elem_table_ensure(bm, BM_EDGE | BM_FACE | BM_VERT);
+
+  mr->bm = bm;
+  mr->toolsettings = toolsettings;
+  mr->eed_act = BM_mesh_active_edge_get(bm);
+  mr->efa_act = BM_mesh_active_face_get(bm, false, true);
+  mr->eve_act = BM_mesh_active_vert_get(bm);
+  mr->crease_ofs = CustomData_get_offset(&bm->edata, CD_CREASE);
+  mr->bweight_ofs = CustomData_get_offset(&bm->edata, CD_BWEIGHT);
+#ifdef WITH_FREESTYLE
+  mr->freestyle_edge_ofs = CustomData_get_offset(&bm->edata, CD_FREESTYLE_EDGE);
+  mr->freestyle_face_ofs = CustomData_get_offset(&bm->pdata, CD_FREESTYLE_FACE);
+#endif
+  mr->v_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
+  mr->e_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
+  mr->p_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+}
+
+/**
+ * For material assignments we want indices for triangles that share a common material to be laid
+ * out contiguously in memory. To achieve this, we sort the indices based on which material the
+ * coarse polygon was assigned. The sort is performed by offsetting the loops indices so that they
+ * are directly assigned to the right sorted indices.
+ *
+ * \code{.unparsed}
+ * Here is a visual representation, considering four quads:
+ * +---------+---------+---------+---------+
+ * | 3     2 | 7     6 | 11   10 | 15   14 |
+ * |         |         |         |         |
+ * | 0     1 | 4     5 | 8     9 | 12   13 |
+ * +---------+---------+---------+---------+
+ *
+ * If the first and third quads have the same material, we should have:
+ * +---------+---------+---------+---------+
+ * | 3     2 | 11   10 | 7     6 | 15   14 |
+ * |         |         |         |         |
+ * | 0     1 | 8     9 | 4     5 | 12   13 |
+ * +---------+---------+---------+---------+
+ *
+ * So the offsets would be:
+ * +---------+---------+---------+---------+
+ * | 0     0 | 4     4 | -4   -4 | 0     0 |
+ * |         |         |         |         |
+ * | 0     0 | 4     4 | -4   -4 | 0     0 |
+ * +---------+---------+---------+---------+
+ * \endcode
+ *
+ * The offsets are computed not based on the loops indices, but on the number of subdivided
+ * polygons for each coarse polygon. We then only store a single offset for each coarse polygon,
+ * since all sub-faces are contiguous, they all share the same offset.
+ */
+static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
+                                                 Mesh *mesh_eval,
+                                                 uint mat_len)
+{
+  draw_subdiv_cache_free_material_data(cache);
+
+  const int number_of_quads = cache->num_subdiv_loops / 4;
+
+  if (mat_len == 1) {
+    cache->mat_start = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end"));
+    cache->mat_end = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end"));
+    cache->mat_start[0] = 0;
+    cache->mat_end[0] = number_of_quads;
+    return;
+  }
+
+  /* Count number of subdivided polygons for each material. */
+  int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start"));
+  int *subdiv_polygon_offset = cache->subdiv_polygon_offset;
+
+  // TODO: parallel_reduce?
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const MPoly *mpoly = &mesh_eval->mpoly[i];
+    const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
+                                                            subdiv_polygon_offset[i + 1];
+    const int quad_count = next_offset - subdiv_polygon_offset[i];
+    const int mat_index = mpoly->mat_nr;
+    mat_start[mat_index] += quad_count;
+  }
+
+  /* Accumulate offsets. */
+  int ofs = mat_start[0];
+  mat_start[0] = 0;
+  for (uint i = 1; i < mat_len; i++) {
+    int tmp = mat_start[i];
+    mat_start[i] = ofs;
+    ofs += tmp;
+  }
+
+  /* Compute per polygon offsets. */
+  int *mat_end = static_cast<int *>(MEM_dupallocN(mat_start));
+  int *per_polygon_mat_offset = static_cast<int *>(
+      MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset"));
+
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const MPoly *mpoly = &mesh_eval->mpoly[i];
+    const int mat_index = mpoly->mat_nr;
+    const int single_material_index = subdiv_polygon_offset[i];
+    const int material_offset = mat_end[mat_index];
+    const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
+                                                            subdiv_polygon_offset[i + 1];
+    const int quad_count = next_offset - subdiv_polygon_offset[i];
+    mat_end[mat_index] += quad_count;
+
+    per_polygon_mat_offset[i] = material_offset - single_material_index;
+  }
+
+  cache->polygon_mat_offset = draw_subdiv_build_origindex_buffer(per_polygon_mat_offset,
+                                                                 mesh_eval->totpoly);
+  cache->mat_start = mat_start;
+  cache->mat_end = mat_end;
+
+  MEM_freeN(per_polygon_mat_offset);
+}
+
+static bool draw_subdiv_create_requested_buffers(const Scene *scene,
+                                                 Object *ob,
+                                                 Mesh *mesh,
+                                                 struct MeshBatchCache *batch_cache,
+                                                 MeshBufferCache *mbc,
+                                                 const ToolSettings *toolsettings,
+                                                 OpenSubdiv_EvaluatorCache *evaluator_cache)
+{
+  SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob);
+  BLI_assert(smd);
+
+  const bool is_final_render = DRW_state_is_scene_render();
+
+  SubdivSettings settings;
+  BKE_subsurf_modifier_subdiv_settings_init(&settings, smd, is_final_render);
+
+  if (settings.level == 0) {
+    return false;
+  }
+
+  Mesh *mesh_eval = mesh;
+  BMesh *bm = nullptr;
+  if (mesh->edit_mesh) {
+    mesh_eval = mesh->edit_mesh->mesh_eval_final;
+    bm = mesh->edit_mesh->bm;
+  }
+
+  BKE_subsurf_modifier_ensure_runtime(smd);
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &settings, mesh_eval, true);
+  if (!subdiv) {
+    return false;
+  }
+
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) {
+    return false;
+  }
+
+  DRWSubdivCache *draw_cache = mesh_batch_cache_ensure_subdiv_cache(batch_cache);
+  if (!draw_subdiv_build_cache(draw_cache, subdiv, mesh_eval, scene, smd, is_final_render)) {
+    return false;
+  }
+
+  const bool optimal_display = (smd->flags & eSubsurfModifierFlag_ControlEdges);
+
+  draw_cache->bm = bm;
+  draw_cache->mesh = mesh_eval;
+  draw_cache->subdiv = subdiv;
+  draw_cache->optimal_display = optimal_display;
+  draw_cache->num_subdiv_triangles = tris_count_from_number_of_loops(draw_cache->num_subdiv_loops);
+  /* We can only evaluate limit normals if the patches are adaptive. */
+  draw_cache->do_limit_normals = settings.is_adaptive;
+
+  if (DRW_ibo_requested(mbc->buff.ibo.tris)) {
+    draw_subdiv_cache_ensure_mat_offsets(draw_cache, mesh_eval, batch_cache->mat_len);
+  }
+
+  draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval);
+
+  mesh_buffer_cache_create_requested_subdiv(batch_cache, mbc, draw_cache, toolsettings);
+
+  return true;
+}
+
+static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr;
+
+void DRW_create_subdivision(const Scene *scene,
+                            Object *ob,
+                            Mesh *mesh,
+                            struct MeshBatchCache *batch_cache,
+                            MeshBufferCache *mbc,
+                            const ToolSettings *toolsettings)
+{
+  if (g_evaluator_cache == nullptr) {
+    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE);
+  }
+
+#undef TIME_SUBDIV
+
+#ifdef TIME_SUBDIV
+  const double begin_time = PIL_check_seconds_timer();
+#endif
+
+  if (!draw_subdiv_create_requested_buffers(
+          scene, ob, mesh, batch_cache, mbc, toolsettings, g_evaluator_cache)) {
+    return;
+  }
+
+#ifdef TIME_SUBDIV
+  const double end_time = PIL_check_seconds_timer();
+  fprintf(stderr, "Time to update subdivision: %f\n", end_time - begin_time);
+  fprintf(stderr, "Maximum FPS: %f\n", 1.0 / (end_time - begin_time));
+#endif
+}
+
+void DRW_subdiv_free()
+{
+  for (int i = 0; i < NUM_SHADERS; ++i) {
+    GPU_shader_free(g_subdiv_shaders[i]);
+  }
+
+  DRW_cache_free_old_subdiv();
+
+  if (g_evaluator_cache) {
+    openSubdiv_deleteEvaluatorCache(g_evaluator_cache);
+    g_evaluator_cache = nullptr;
+  }
+}
+
+static LinkNode *gpu_subdiv_free_queue = nullptr;
+static ThreadMutex gpu_subdiv_queue_mutex = BLI_MUTEX_INITIALIZER;
+
+void DRW_subdiv_cache_free(Subdiv *subdiv)
+{
+  BLI_mutex_lock(&gpu_subdiv_queue_mutex);
+  BLI_linklist_prepend(&gpu_subdiv_free_queue, subdiv);
+  BLI_mutex_unlock(&gpu_subdiv_queue_mutex);
+}
+
+void DRW_cache_free_old_subdiv()
+{
+  if (gpu_subdiv_free_queue == nullptr) {
+    return;
+  }
+
+  BLI_mutex_lock(&gpu_subdiv_queue_mutex);
+
+  while (gpu_subdiv_free_queue != nullptr) {
+    Subdiv *subdiv = static_cast<Subdiv *>(BLI_linklist_pop(&gpu_subdiv_free_queue));
+    /* Set the type to CPU so that we do actually free the cache. */
+    subdiv->evaluator->type = OPENSUBDIV_EVALUATOR_CPU;
+    BKE_subdiv_free(subdiv);
+  }
+
+  BLI_mutex_unlock(&gpu_subdiv_queue_mutex);
+}
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 930fb6eabef..0bf6468f7cc 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -52,6 +52,7 @@
 #include "BKE_pointcache.h"
 #include "BKE_pointcloud.h"
 #include "BKE_screen.h"
+#include "BKE_subdiv_modifier.h"
 #include "BKE_volume.h"
 
 #include "DNA_camera_types.h"
@@ -90,6 +91,7 @@
 #include "draw_manager_testing.h"
 #include "draw_manager_text.h"
 #include "draw_shader.h"
+#include "draw_subdivision.h"
 #include "draw_texture_pool.h"
 
 /* only for callbacks */
@@ -2975,6 +2977,8 @@ void DRW_engines_register(void)
 
     BKE_volume_batch_cache_dirty_tag_cb = DRW_volume_batch_cache_dirty_tag;
     BKE_volume_batch_cache_free_cb = DRW_volume_batch_cache_free;
+
+    BKE_subsurf_modifier_free_gpu_cache_cb = DRW_subdiv_cache_free;
   }
 }
 
diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h
new file mode 100644
index 00000000000..f60ec7afc77
--- /dev/null
+++ b/source/blender/draw/intern/draw_subdivision.h
@@ -0,0 +1,231 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "BLI_sys_types.h"
+
+struct BMesh;
+struct GPUIndexBuf;
+struct GPUUniformBuf;
+struct GPUVertBuf;
+struct Mesh;
+struct MeshBatchCache;
+struct MeshBufferCache;
+struct MeshRenderData;
+struct Object;
+struct Scene;
+struct Subdiv;
+struct ToolSettings;
+
+/* -------------------------------------------------------------------- */
+/** \name DRWPatchMap
+ *
+ * This is a GPU version of the OpenSubDiv PatchMap. The quad tree and the patch handles are copied
+ * to GPU buffers in order to lookup the right patch for a given set of patch coordinates.
+ * \{ */
+
+typedef struct DRWPatchMap {
+  struct GPUVertBuf *patch_map_handles;
+  struct GPUVertBuf *patch_map_quadtree;
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+} DRWPatchMap;
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivCache
+ *
+ * This holds the various buffers used to evaluate and render subdivision through OpenGL.
+ * \{ */
+
+typedef struct DRWSubdivCache {
+  struct Mesh *mesh;
+  struct BMesh *bm;
+  struct Subdiv *subdiv;
+  bool optimal_display;
+  bool do_limit_normals;
+
+  /* Coordinates used to evaluate patches for UVs, positions, and normals. */
+  struct GPUVertBuf *patch_coords;
+  /* Coordinates used to evaluate patches for the face centers (or face dots) in edit-mode. */
+  struct GPUVertBuf *fdots_patch_coords;
+
+  /* Resolution used to generate the patch coordinates. */
+  int resolution;
+
+  /* Number of subdivided loops, also the number of patch coordinates since we have one coordinate
+   * but quad corner/vertex. */
+  uint num_subdiv_loops;
+  uint num_subdiv_edges;
+  uint num_subdiv_triangles;
+  uint num_subdiv_verts;
+  uint num_subdiv_quads;
+
+  /* Number of polygons in the coarse mesh, notably used to compute a coarse polygon index given a
+   * subdivision loop index. */
+  int num_coarse_poly;
+
+  /* Maps subdivision loop to subdivided vertex index. */
+  int *subdiv_loop_subdiv_vert_index;
+  /* Maps subdivision loop to original coarse poly index. */
+  int *subdiv_loop_poly_index;
+
+  /* Indices of faces adjacent to the vertices, ordered by vertex index, with no particular
+   * winding. */
+  struct GPUVertBuf *subdiv_vertex_face_adjacency;
+  /* The difference between value (i + 1) and (i) gives the number of faces adjacent to vertex (i).
+   */
+  struct GPUVertBuf *subdiv_vertex_face_adjacency_offsets;
+
+  /* Maps subdivision loop to original coarse vertex index, only really useful for edit mode. */
+  struct GPUVertBuf *verts_orig_index;
+  /* Maps subdivision loop to original coarse edge index, only really useful for edit mode. */
+  struct GPUVertBuf *edges_orig_index;
+
+  /* Owned by #Subdiv. Indexed by coarse polygon index, difference between value (i + 1) and (i)
+   * gives the number of ptex faces for coarse polygon (i).  */
+  int *face_ptex_offset;
+  /* Vertex buffer for face_ptex_offset. */
+  struct GPUVertBuf *face_ptex_offset_buffer;
+
+  int *subdiv_polygon_offset;
+  struct GPUVertBuf *subdiv_polygon_offset_buffer;
+
+  /* Contains the start loop index and the smooth flag for each coarse polygon. */
+  struct GPUVertBuf *extra_coarse_face_data;
+
+  /* Computed for ibo.points, one value per subdivided vertex, mapping coarse vertices ->
+   * subdivided loop */
+  int *point_indices;
+
+  /* Material offsets. */
+  int *mat_start;
+  int *mat_end;
+  struct GPUVertBuf *polygon_mat_offset;
+
+  DRWPatchMap gpu_patch_map;
+
+  /* UBO to store settings for the various compute shaders. */
+  struct GPUUniformBuf *ubo;
+} DRWSubdivCache;
+
+/* Only frees the data of the cache, caller is responsible to free the cache itself if necessary.
+ */
+void draw_subdiv_cache_free(DRWSubdivCache *cache);
+
+/** \} */
+
+void DRW_create_subdivision(const struct Scene *scene,
+                            struct Object *ob,
+                            struct Mesh *mesh,
+                            struct MeshBatchCache *batch_cache,
+                            struct MeshBufferCache *mbc,
+                            const struct ToolSettings *toolsettings);
+
+void DRW_subdiv_cache_free(struct Subdiv *subdiv);
+
+void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache,
+                                       struct MeshRenderData *mr,
+                                       const struct ToolSettings *toolsettings);
+
+void draw_subdiv_init_origindex_buffer(struct GPUVertBuf *buffer,
+                                       int *vert_origindex,
+                                       uint num_loops,
+                                       uint loose_len);
+
+struct GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops);
+
+/* Compute shader functions. */
+
+void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache,
+                                          struct GPUVertBuf *mask_vbo,
+                                          struct GPUVertBuf *face_set_vbo,
+                                          struct GPUVertBuf *sculpt_data);
+
+void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache,
+                                    struct GPUVertBuf *pos_nor,
+                                    struct GPUVertBuf *face_adjacency_offsets,
+                                    struct GPUVertBuf *face_adjacency_lists,
+                                    struct GPUVertBuf *vertex_normals);
+
+void draw_subdiv_finalize_normals(const DRWSubdivCache *cache,
+                                  struct GPUVertBuf *vertex_normals,
+                                  struct GPUVertBuf *subdiv_loop_subdiv_vert_index,
+                                  struct GPUVertBuf *pos_nor);
+
+void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache,
+                                 struct GPUVertBuf *pos_nor,
+                                 const bool do_limit_normals);
+
+void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
+                                    struct GPUVertBuf *src_data,
+                                    struct GPUVertBuf *dst_buffer,
+                                    int dimensions,
+                                    int dst_offset);
+
+void draw_subdiv_extract_uvs(const DRWSubdivCache *cache,
+                             struct GPUVertBuf *uvs,
+                             const int face_varying_channel,
+                             const int dst_offset);
+
+void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache,
+                                       struct GPUVertBuf *pos_nor,
+                                       struct GPUVertBuf *edge_idx,
+                                       struct GPUVertBuf *edge_fac);
+
+void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache,
+                                   struct GPUIndexBuf *subdiv_tris,
+                                   const int material_count);
+
+void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache,
+                                    struct GPUIndexBuf *lines_indices);
+
+void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache,
+                                          struct GPUIndexBuf *lines_indices,
+                                          uint num_loose_edges);
+
+void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache,
+                                     struct GPUVertBuf *fdots_pos,
+                                     struct GPUVertBuf *fdots_nor,
+                                     struct GPUIndexBuf *fdots_indices);
+
+void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache,
+                                   struct GPUVertBuf *pos_nor,
+                                   struct GPUVertBuf *lnor);
+
+void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache,
+                                                  struct GPUVertBuf *coarse_data,
+                                                  struct GPUVertBuf *subdiv_data);
+
+void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache,
+                                                   struct GPUVertBuf *pos_nor,
+                                                   struct GPUVertBuf *uvs,
+                                                   int uvs_offset,
+                                                   struct GPUVertBuf *stretch_angles);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.h b/source/blender/draw/intern/mesh_extractors/extract_mesh.h
index 7d21804c08f..35cc2cf986e 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh.h
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.h
@@ -39,6 +39,8 @@
 extern "C" {
 #endif
 
+struct DRWSubdivCache;
+
 #define MIN_RANGE_LEN 1024
 
 /* ---------------------------------------------------------------------- */
@@ -203,6 +205,11 @@ typedef void(ExtractLVertMeshFn)(const MeshRenderData *mr,
                                  const MVert *mv,
                                  const int lvert_index,
                                  void *data);
+typedef void(ExtractLooseGeomSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                       const MeshRenderData *mr,
+                                       const MeshExtractLooseGeom *loose_geom,
+                                       void *buffer,
+                                       void *data);
 typedef void(ExtractInitFn)(const MeshRenderData *mr,
                             struct MeshBatchCache *cache,
                             void *buffer,
@@ -213,6 +220,18 @@ typedef void(ExtractFinishFn)(const MeshRenderData *mr,
                               void *data);
 typedef void(ExtractTaskReduceFn)(void *userdata, void *task_userdata);
 
+typedef void(ExtractInitSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                  const MeshRenderData *mr,
+                                  struct MeshBatchCache *cache,
+                                  void *buf,
+                                  void *data);
+typedef void(ExtractIterSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                  const MeshRenderData *mr,
+                                  void *data);
+typedef void(ExtractFinishSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                    void *buf,
+                                    void *data);
+
 typedef struct MeshExtract {
   /** Executed on main thread and return user data for iteration functions. */
   ExtractInitFn *init;
@@ -225,9 +244,14 @@ typedef struct MeshExtract {
   ExtractLEdgeMeshFn *iter_ledge_mesh;
   ExtractLVertBMeshFn *iter_lvert_bm;
   ExtractLVertMeshFn *iter_lvert_mesh;
+  ExtractLooseGeomSubdivFn *iter_loose_geom_subdiv;
   /** Executed on one worker thread after all elements iterations. */
   ExtractTaskReduceFn *task_reduce;
   ExtractFinishFn *finish;
+  /** Executed on main thread for subdivision evaluation. */
+  ExtractInitSubdivFn *init_subdiv;
+  ExtractIterSubdivFn *iter_subdiv;
+  ExtractFinishSubdivFn *finish_subdiv;
   /** Used to request common data. */
   eMRDataType data_type;
   size_t data_size;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
index 4cc9a875f79..6a1691e8634 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 /* ---------------------------------------------------------------------- */
 /** \name Extract Edit UV Triangles Indices
@@ -94,6 +96,57 @@ static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            MeshBatchCache *UNUSED(cache),
+                                            void *UNUSED(buf),
+                                            void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(&data->elb,
+                    GPU_PRIM_TRIS,
+                    subdiv_cache->num_subdiv_triangles,
+                    subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_tris_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+    const uint loop_idx = i * 4;
+    const int poly_origindex = subdiv_loop_poly_index[loop_idx];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    edituv_tri_add(data,
+                   BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0,
+                   BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                   loop_idx,
+                   loop_idx + 1,
+                   loop_idx + 2);
+
+    edituv_tri_add(data,
+                   BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0,
+                   BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                   loop_idx,
+                   loop_idx + 2,
+                   loop_idx + 3);
+  }
+}
+
+static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                              void *buf,
+                                              void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_tris()
 {
   MeshExtract extractor = {nullptr};
@@ -101,6 +154,9 @@ constexpr MeshExtract create_extractor_edituv_tris()
   extractor.iter_looptri_bm = extract_edituv_tris_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_edituv_tris_iter_looptri_mesh;
   extractor.finish = extract_edituv_tris_finish;
+  extractor.init_subdiv = extract_edituv_tris_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_tris_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_tris_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
@@ -184,6 +240,56 @@ static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *mr,
+                                             MeshBatchCache *UNUSED(cache),
+                                             void *UNUSED(buf),
+                                             void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(
+      &data->elb, GPU_PRIM_LINES, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_lines_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *mr,
+                                             void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+
+    uint start_loop_idx = i * 4;
+    uint end_loop_idx = (i + 1) * 4;
+
+    const int poly_origindex = subdiv_loop_poly_index[start_loop_idx];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
+      const int edge_origindex = subdiv_loop_edge_index[loop_idx];
+      const bool real_edge = (edge_origindex != -1 &&
+                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE);
+      edituv_edge_add(data,
+                      BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) != 0 || !real_edge,
+                      BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) != 0,
+                      loop_idx,
+                      (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
+    }
+  }
+}
+
+static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                               void *buf,
+                                               void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_lines()
 {
   MeshExtract extractor = {nullptr};
@@ -191,6 +297,9 @@ constexpr MeshExtract create_extractor_edituv_lines()
   extractor.iter_poly_bm = extract_edituv_lines_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_lines_iter_poly_mesh;
   extractor.finish = extract_edituv_lines_finish;
+  extractor.init_subdiv = extract_edituv_lines_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_lines_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_lines_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
@@ -268,6 +377,50 @@ static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *mr,
+                                              MeshBatchCache *UNUSED(cache),
+                                              void *UNUSED(buf),
+                                              void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(
+      &data->elb, GPU_PRIM_POINTS, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_points_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *mr,
+                                              void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    const bool real_vert = (mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) &&
+                            vert_origindex != -1 &&
+                            mr->v_origindex[vert_origindex] != ORIGINDEX_NONE);
+    edituv_point_add(data,
+                     (BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) || !real_vert,
+                     BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                     i);
+  }
+}
+
+static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                                void *buf,
+                                                void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_points()
 {
   MeshExtract extractor = {nullptr};
@@ -275,6 +428,9 @@ constexpr MeshExtract create_extractor_edituv_points()
   extractor.iter_poly_bm = extract_edituv_points_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_points_iter_poly_mesh;
   extractor.finish = extract_edituv_points_finish;
+  extractor.init_subdiv = extract_edituv_points_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_points_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_points_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
index 54f5611106f..3d9729dea56 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -155,6 +157,33 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(elb, ibo);
 }
 
+static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                      const MeshRenderData *mr,
+                                      struct MeshBatchCache *UNUSED(cache),
+                                      void *buffer,
+                                      void *UNUSED(data))
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  GPU_indexbuf_init_build_on_device(ibo,
+                                    subdiv_cache->num_subdiv_loops * 2 + mr->edge_loose_len * 2);
+
+  draw_subdiv_build_lines_buffer(subdiv_cache, ibo);
+}
+
+static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *UNUSED(mr),
+                                            const MeshExtractLooseGeom *loose_geom,
+                                            void *buffer,
+                                            void *UNUSED(data))
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  draw_subdiv_build_lines_loose_buffer(subdiv_cache, ibo, static_cast<uint>(loose_geom->edge_len));
+}
+
 constexpr MeshExtract create_extractor_lines()
 {
   MeshExtract extractor = {nullptr};
@@ -163,6 +192,8 @@ constexpr MeshExtract create_extractor_lines()
   extractor.iter_poly_mesh = extract_lines_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_lines_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh;
+  extractor.init_subdiv = extract_lines_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_lines_loose_geom_subdiv;
   extractor.task_reduce = extract_lines_task_reduce;
   extractor.finish = extract_lines_finish;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
index e7dabfa9ee2..6855feb51ed 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
@@ -26,6 +26,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -44,6 +45,18 @@ struct MeshExtract_LineAdjacency_Data {
   uint *vert_to_loop;
 };
 
+static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data,
+                                     uint vert_len,
+                                     uint loop_len,
+                                     uint tess_edge_len)
+{
+  data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * vert_len, __func__));
+
+  GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, loop_len);
+  data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len);
+  data->is_manifold = true;
+}
+
 static void extract_lines_adjacency_init(const MeshRenderData *mr,
                                          struct MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(buf),
@@ -55,11 +68,7 @@ static void extract_lines_adjacency_init(const MeshRenderData *mr,
   uint tess_edge_len = mr->loop_len + mr->tri_len - mr->poly_len;
 
   MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(tls_data);
-  data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * mr->vert_len, __func__));
-
-  GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, mr->loop_len);
-  data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len);
-  data->is_manifold = true;
+  line_adjacency_data_init(data, mr->vert_len, mr->loop_len, tess_edge_len);
 }
 
 BLI_INLINE void lines_adjacency_triangle(
@@ -171,6 +180,56 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
   MEM_freeN(data->vert_to_loop);
 }
 
+static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *UNUSED(mr),
+                                                struct MeshBatchCache *UNUSED(cache),
+                                                void *UNUSED(buf),
+                                                void *_data)
+{
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+
+  /* For each polygon there is (loop + triangle - 1) edges. Since we only have quads, and a quad
+   * is split into 2 triangles, we have (loop + 2 - 1) = (loop + 1) edges for each quad, or in
+   * total: (number_of_loops + number_of_quads). */
+  const uint tess_len = subdiv_cache->num_subdiv_loops + subdiv_cache->num_subdiv_quads;
+  line_adjacency_data_init(
+      data, tess_len, subdiv_cache->num_subdiv_verts, subdiv_cache->num_subdiv_loops);
+}
+
+static void extract_lines_adjacency_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *UNUSED(mr),
+                                                void *_data)
+{
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+    const uint loop_index = i * 4;
+    const uint l0 = loop_index + 0;
+    const uint l1 = loop_index + 1;
+    const uint l2 = loop_index + 2;
+    const uint l3 = loop_index + 3;
+
+    const uint v0 = subdiv_cache->subdiv_loop_subdiv_vert_index[l0];
+    const uint v1 = subdiv_cache->subdiv_loop_subdiv_vert_index[l1];
+    const uint v2 = subdiv_cache->subdiv_loop_subdiv_vert_index[l2];
+    const uint v3 = subdiv_cache->subdiv_loop_subdiv_vert_index[l3];
+
+    lines_adjacency_triangle(v0, v1, v2, l0, l1, l2, data);
+    lines_adjacency_triangle(v0, v2, v3, l0, l2, l3, data);
+  }
+}
+
+static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
+                                                  void *buf,
+                                                  void *_data)
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+  BLI_edgehash_free(data->eh, nullptr);
+  MEM_freeN(data->vert_to_loop);
+}
+
 #undef NO_EDGE
 
 constexpr MeshExtract create_extractor_lines_adjacency()
@@ -180,6 +239,9 @@ constexpr MeshExtract create_extractor_lines_adjacency()
   extractor.iter_looptri_bm = extract_lines_adjacency_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_lines_adjacency_iter_looptri_mesh;
   extractor.finish = extract_lines_adjacency_finish;
+  extractor.init_subdiv = extract_lines_adjacency_init_subdiv;
+  extractor.iter_subdiv = extract_lines_adjacency_iter_subdiv;
+  extractor.finish_subdiv = extract_lines_adjacency_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_LineAdjacency_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
index 01e14a004ed..19167772a42 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
@@ -25,6 +25,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -155,6 +156,74 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(elb, ibo);
 }
 
+static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                       const MeshRenderData *UNUSED(mr),
+                                       struct MeshBatchCache *UNUSED(cache),
+                                       void *UNUSED(buffer),
+                                       void *data)
+{
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
+  /* Copy the points as the data upload will free them. */
+  elb->data = (uint *)MEM_dupallocN(subdiv_cache->point_indices);
+  elb->index_len = subdiv_cache->num_subdiv_verts;
+  elb->index_min = 0;
+  elb->index_max = subdiv_cache->num_subdiv_loops - 1;
+  elb->prim_type = GPU_PRIM_POINTS;
+}
+
+static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *UNUSED(mr),
+                                             const MeshExtractLooseGeom *loose_geom,
+                                             void *UNUSED(buffer),
+                                             void *data)
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
+
+  elb->data = static_cast<uint32_t *>(
+      MEM_reallocN(elb->data, sizeof(uint) * (subdiv_cache->num_subdiv_loops + loop_loose_len)));
+
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    if (elb->data[loose_edge->v1] == -1u) {
+      elb->data[loose_edge->v1] = offset;
+    }
+    if (elb->data[loose_edge->v2] == -1u) {
+      elb->data[loose_edge->v2] = offset + 1;
+    }
+    elb->index_max += 2;
+    elb->index_len += 2;
+    offset += 2;
+  }
+
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    if (elb->data[loose_geom->verts[i]] == -1u) {
+      elb->data[loose_geom->verts[i]] = offset;
+    }
+    elb->index_max += 1;
+    elb->index_len += 1;
+    offset += 1;
+  }
+}
+
+static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
+                                         void *buf,
+                                         void *_userdata)
+{
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_points()
 {
   MeshExtract extractor = {nullptr};
@@ -167,6 +236,9 @@ constexpr MeshExtract create_extractor_points()
   extractor.iter_lvert_mesh = extract_points_iter_lvert_mesh;
   extractor.task_reduce = extract_points_task_reduce;
   extractor.finish = extract_points_finish;
+  extractor.init_subdiv = extract_points_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_points_loose_geom_subdiv;
+  extractor.finish_subdiv = extract_points_finish_subdiv;
   extractor.use_threading = true;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(GPUIndexBufBuilder);
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
index 54e733d3d86..b1ace8bc6c9 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_from)
@@ -123,10 +125,37 @@ static void extract_tris_finish(const MeshRenderData *mr,
   }
 }
 
+static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  /* Initialize the index buffer, it was already allocated, it will be filled on the device. */
+  GPU_indexbuf_init_build_on_device(ibo, subdiv_cache->num_subdiv_triangles * 3);
+
+  if (cache->tris_per_mat) {
+    for (int i = 0; i < cache->mat_len; i++) {
+      if (cache->tris_per_mat[i] == nullptr) {
+        cache->tris_per_mat[i] = GPU_indexbuf_calloc();
+      }
+
+      /* Multiply by 6 since we have 2 triangles per quad. */
+      const int start = subdiv_cache->mat_start[i] * 6;
+      const int len = (subdiv_cache->mat_end[i] - subdiv_cache->mat_start[i]) * 6;
+      GPU_indexbuf_create_subrange_in_place(cache->tris_per_mat[i], ibo, start, len);
+    }
+  }
+
+  draw_subdiv_build_tris_buffer(subdiv_cache, ibo, cache->mat_len);
+}
+
 constexpr MeshExtract create_extractor_tris()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_tris_init;
+  extractor.init_subdiv = extract_tris_init_subdiv;
   extractor.iter_poly_bm = extract_tris_iter_poly_bm;
   extractor.iter_poly_mesh = extract_tris_iter_poly_mesh;
   extractor.task_reduce = extract_tris_mat_task_reduce;
@@ -214,6 +243,7 @@ constexpr MeshExtract create_extractor_tris_single_mat()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_tris_single_mat_init;
+  extractor.init_subdiv = extract_tris_init_subdiv;
   extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh;
   extractor.task_reduce = extract_tris_mat_task_reduce;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
index 8a5a8134ca7..ea702e5efdd 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
@@ -32,6 +32,7 @@
 
 #include "BKE_attribute.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -153,7 +154,9 @@ static GPUVertCompType get_comp_type_for_type(CustomDataType type)
 
 static void init_vbo_for_attribute(const MeshRenderData *mr,
                                    GPUVertBuf *vbo,
-                                   const DRW_AttributeRequest &request)
+                                   const DRW_AttributeRequest &request,
+                                   bool build_on_device,
+                                   uint32_t len)
 {
   GPUVertCompType comp_type = get_comp_type_for_type(request.cd_type);
   GPUVertFetchMode fetch_mode = get_fetch_mode_for_type(request.cd_type);
@@ -184,8 +187,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
     }
   }
 
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, static_cast<uint32_t>(mr->loop_len));
+  if (build_on_device) {
+    GPU_vertbuf_init_build_on_device(vbo, &format, len);
+  }
+  else {
+    GPU_vertbuf_init_with_format(vbo, &format);
+    GPU_vertbuf_data_alloc(vbo, len);
+  }
 }
 
 template<typename AttributeType, typename VBOType>
@@ -309,7 +317,7 @@ static void extract_attr_init(const MeshRenderData *mr,
 
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
 
-  init_vbo_for_attribute(mr, vbo, request);
+  init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
 
   /* TODO(kevindietrich) : float3 is used for scalar attributes as the implicit conversion done by
    * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the
@@ -346,6 +354,68 @@ static void extract_attr_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *mr,
+                                     MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(tls_data),
+                                     int index)
+{
+  const DRW_MeshAttributes *attrs_used = &cache->attr_used;
+  const DRW_AttributeRequest &request = attrs_used->requests[index];
+
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+
+  const uint32_t dimensions = gpu_component_size_for_attribute_type(request.cd_type);
+
+  /* Prepare VBO for coarse data. The compute shader only expects floats. */
+  GPUVertBuf *src_data = GPU_vertbuf_calloc();
+  static GPUVertFormat coarse_format = {0};
+  GPU_vertformat_attr_add(&coarse_format, "data", GPU_COMP_F32, dimensions, GPU_FETCH_FLOAT);
+  GPU_vertbuf_init_with_format_ex(src_data, &coarse_format, GPU_USAGE_STATIC);
+  GPU_vertbuf_data_alloc(src_data, static_cast<uint32_t>(coarse_mesh->totloop));
+
+  switch (request.cd_type) {
+    case CD_PROP_BOOL: {
+      extract_attr_generic<bool, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_INT32: {
+      extract_attr_generic<int32_t, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT: {
+      extract_attr_generic<float, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT2: {
+      extract_attr_generic<float2>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT3: {
+      extract_attr_generic<float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_COLOR: {
+      extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request);
+      break;
+    }
+    default: {
+      BLI_assert(false);
+    }
+  }
+
+  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
+  init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
+
+  /* Ensure data is uploaded properly. */
+  GPU_vertbuf_tag_dirty(src_data);
+  draw_subdiv_interp_custom_data(
+      subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0);
+
+  GPU_vertbuf_discard(src_data);
+}
+
 /* Wrappers around extract_attr_init so we can pass the index of the attribute that we want to
  * extract. The overall API does not allow us to pass this in a convenient way. */
 #define EXTRACT_INIT_WRAPPER(index) \
@@ -353,6 +423,14 @@ static void extract_attr_init(const MeshRenderData *mr,
       const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \
   { \
     extract_attr_init(mr, cache, buf, tls_data, index); \
+  } \
+  static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \
+                                              const MeshRenderData *mr, \
+                                              struct MeshBatchCache *cache, \
+                                              void *buf, \
+                                              void *tls_data) \
+  { \
+    extract_attr_init_subdiv(subdiv_cache, mr, cache, buf, tls_data, index); \
   }
 
 EXTRACT_INIT_WRAPPER(0)
@@ -371,10 +449,12 @@ EXTRACT_INIT_WRAPPER(12)
 EXTRACT_INIT_WRAPPER(13)
 EXTRACT_INIT_WRAPPER(14)
 
-template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn)
+template<int index>
+constexpr MeshExtract create_extractor_attr(ExtractInitFn fn, ExtractInitSubdivFn subdiv_fn)
 {
   MeshExtract extractor = {nullptr};
   extractor.init = fn;
+  extractor.init_subdiv = subdiv_fn;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
@@ -388,7 +468,8 @@ template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn
 
 extern "C" {
 #define CREATE_EXTRACTOR_ATTR(index) \
-  blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index)
+  blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index, \
+                                              blender::draw::extract_attr_init_subdiv##index)
 
 const MeshExtract extract_attr[GPU_MAX_ATTR] = {
     CREATE_EXTRACTOR_ATTR(0),
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
index 2e2444a8e3d..5ee34d7fdb2 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
@@ -25,6 +25,7 @@
 
 #include "GPU_capabilities.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -216,6 +217,86 @@ static void extract_edge_fac_finish(const MeshRenderData *mr,
   MEM_SAFE_FREE(data->edge_loop_count);
 }
 
+/* Different function than the one used for the non-subdivision case, as we directly take care of
+ * the buggy AMD driver case. */
+static GPUVertFormat *get_subdiv_edge_fac_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    if (GPU_crappy_amd_driver()) {
+      GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    }
+    else {
+      GPU_vertformat_attr_add(&format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    }
+  }
+  return &format;
+}
+
+static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         struct MeshBatchCache *cache,
+                                         void *buffer,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *edge_idx = cache->final.buff.vbo.edge_idx;
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPU_vertbuf_init_build_on_device(
+      vbo, get_subdiv_edge_fac_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+
+  /* Create a temporary buffer for the edge original indices if it was not requested. */
+  const bool has_edge_idx = edge_idx != nullptr;
+  GPUVertBuf *loop_edge_idx = nullptr;
+  if (has_edge_idx) {
+    loop_edge_idx = edge_idx;
+  }
+  else {
+    loop_edge_idx = GPU_vertbuf_calloc();
+    draw_subdiv_init_origindex_buffer(
+        loop_edge_idx,
+        static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)),
+        subdiv_cache->num_subdiv_loops,
+        0);
+  }
+
+  draw_subdiv_build_edge_fac_buffer(subdiv_cache, pos_nor, loop_edge_idx, vbo);
+
+  if (!has_edge_idx) {
+    GPU_vertbuf_discard(loop_edge_idx);
+  }
+}
+
+static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  /* Make sure buffer is active for sending loose data. */
+  GPU_vertbuf_use(vbo);
+
+  uint offset = subdiv_cache->num_subdiv_loops;
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    if (GPU_crappy_amd_driver()) {
+      float loose_edge_fac[2] = {1.0f, 1.0f};
+      GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
+    }
+    else {
+      char loose_edge_fac[2] = {255, 255};
+      GPU_vertbuf_update_sub(vbo, offset * sizeof(char), sizeof(loose_edge_fac), loose_edge_fac);
+    }
+
+    offset += 2;
+  }
+}
+
 constexpr MeshExtract create_extractor_edge_fac()
 {
   MeshExtract extractor = {nullptr};
@@ -224,6 +305,8 @@ constexpr MeshExtract create_extractor_edge_fac()
   extractor.iter_poly_mesh = extract_edge_fac_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_edge_fac_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_edge_fac_iter_ledge_mesh;
+  extractor.init_subdiv = extract_edge_fac_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edge_fac_loose_geom_subdiv;
   extractor.finish = extract_edge_fac_finish;
   extractor.data_type = MR_DATA_POLY_NOR;
   extractor.data_size = sizeof(MeshExtract_EdgeFac_Data);
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
index 5232346e51e..eef64085c95 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
@@ -25,6 +25,8 @@
 
 #include "draw_cache_impl.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -107,19 +109,25 @@ static void mesh_render_data_vert_flag(const MeshRenderData *mr,
   }
 }
 
-static void extract_edit_data_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
-                                   void *buf,
-                                   void *tls_data)
+static GPUVertFormat *get_edit_data_format(void)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* WARNING: Adjust #EditLoopData struct accordingly. */
     GPU_vertformat_attr_add(&format, "data", GPU_COMP_U8, 4, GPU_FETCH_INT);
     GPU_vertformat_alias_add(&format, "flag");
   }
-  GPU_vertbuf_init_with_format(vbo, &format);
+  return &format;
+}
+
+static void extract_edit_data_init(const MeshRenderData *mr,
+                                   struct MeshBatchCache *UNUSED(cache),
+                                   void *buf,
+                                   void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat *format = get_edit_data_format();
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len + mr->loop_loose_len);
   EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
   *(EditLoopData **)tls_data = vbo_data;
@@ -240,6 +248,80 @@ static void extract_edit_data_iter_lvert_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_edit_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                          const MeshRenderData *mr,
+                                          MeshBatchCache *UNUSED(cache),
+                                          void *buf,
+                                          void *data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPU_vertbuf_init_with_format(vbo, get_edit_data_format());
+  GPU_vertbuf_data_alloc(vbo, subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+  EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
+  *(EditLoopData **)data = vbo_data;
+}
+
+static void extract_edit_data_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                          const MeshRenderData *mr,
+                                          void *_data)
+{
+  EditLoopData *vbo_data = *(EditLoopData **)_data;
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int edge_origindex = subdiv_loop_edge_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+
+    EditLoopData *edit_loop_data = &vbo_data[i];
+    memset(edit_loop_data, 0, sizeof(EditLoopData));
+
+    if (vert_origindex != -1) {
+      const BMVert *eve = bm_original_vert_get(mr, vert_origindex);
+      if (eve) {
+        mesh_render_data_vert_flag(mr, eve, edit_loop_data);
+      }
+    }
+
+    if (edge_origindex != -1) {
+      const BMEdge *eed = bm_original_edge_get(mr, edge_origindex);
+      if (eed) {
+        mesh_render_data_edge_flag(mr, eed, edit_loop_data);
+      }
+    }
+
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+    /* The -1 parameter is for edit_uvs, which we don't do here. */
+    mesh_render_data_face_flag(mr, efa, -1, edit_loop_data);
+  }
+}
+
+static void extract_edit_data_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *mr,
+                                                const MeshExtractLooseGeom *loose_geom,
+                                                void *UNUSED(buffer),
+                                                void *_data)
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  EditLoopData *vbo_data = *(EditLoopData **)_data;
+
+  for (int ledge_index = 0; ledge_index < loose_geom->edge_len; ledge_index++) {
+    const int offset = subdiv_cache->num_subdiv_loops + ledge_index * 2;
+    EditLoopData *data = &vbo_data[offset];
+    memset(data, 0, sizeof(EditLoopData));
+    BMEdge *eed = bm_original_edge_get(mr, loose_geom->edges[ledge_index]);
+    mesh_render_data_edge_flag(mr, eed, &data[0]);
+    data[1] = data[0];
+    mesh_render_data_vert_flag(mr, eed->v1, &data[0]);
+    mesh_render_data_vert_flag(mr, eed->v2, &data[1]);
+  }
+}
+
 constexpr MeshExtract create_extractor_edit_data()
 {
   MeshExtract extractor = {nullptr};
@@ -250,6 +332,9 @@ constexpr MeshExtract create_extractor_edit_data()
   extractor.iter_ledge_mesh = extract_edit_data_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_edit_data_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_edit_data_iter_lvert_mesh;
+  extractor.init_subdiv = extract_edit_data_init_subdiv;
+  extractor.iter_subdiv = extract_edit_data_iter_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edit_data_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(EditLoopData *);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
index b8494428eed..067d482bc2b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
@@ -25,6 +25,8 @@
 
 #include "draw_cache_impl.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -36,12 +38,11 @@ struct MeshExtract_EditUVData_Data {
   int cd_ofs;
 };
 
-static void extract_edituv_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
-                                     void *buf,
-                                     void *tls_data)
+static void extract_edituv_data_init_common(const MeshRenderData *mr,
+                                            GPUVertBuf *vbo,
+                                            MeshExtract_EditUVData_Data *data,
+                                            uint loop_len)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* WARNING: Adjust #EditLoopData struct accordingly. */
@@ -50,15 +51,23 @@ static void extract_edituv_data_init(const MeshRenderData *mr,
   }
 
   GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, mr->loop_len);
+  GPU_vertbuf_data_alloc(vbo, loop_len);
 
   CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
-
-  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
   data->vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
   data->cd_ofs = CustomData_get_offset(cd_ldata, CD_MLOOPUV);
 }
 
+static void extract_edituv_data_init(const MeshRenderData *mr,
+                                     struct MeshBatchCache *UNUSED(cache),
+                                     void *buf,
+                                     void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
+  extract_edituv_data_init_common(mr, vbo, data, mr->loop_len);
+}
+
 static void extract_edituv_data_iter_poly_bm(const MeshRenderData *mr,
                                              const BMFace *f,
                                              const int UNUSED(f_index),
@@ -119,12 +128,54 @@ static void extract_edituv_data_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_edituv_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            MeshBatchCache *UNUSED(cache),
+                                            void *buf,
+                                            void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
+  extract_edituv_data_init_common(mr, vbo, data, subdiv_cache->num_subdiv_loops);
+}
+
+static void extract_edituv_data_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            void *_data)
+{
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(_data);
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int edge_origindex = subdiv_loop_edge_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+
+    EditLoopData *edit_loop_data = &data->vbo_data[i];
+    memset(edit_loop_data, 0, sizeof(EditLoopData));
+
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    if (vert_origindex != -1 && edge_origindex != -1) {
+      BMEdge *eed = bm_original_edge_get(mr, edge_origindex);
+      /* Loop on an edge endpoint. */
+      BMLoop *l = BM_face_edge_share_loop(efa, eed);
+      mesh_render_data_loop_flag(mr, l, data->cd_ofs, edit_loop_data);
+      mesh_render_data_loop_edge_flag(mr, l, data->cd_ofs, edit_loop_data);
+    }
+  }
+}
+
 constexpr MeshExtract create_extractor_edituv_data()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_data_init;
   extractor.iter_poly_bm = extract_edituv_data_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_data_iter_poly_mesh;
+  extractor.init_subdiv = extract_edituv_data_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_data_iter_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUVData_Data);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
index a947d98f955..0ea4ef5d5db 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -213,12 +215,69 @@ static void extract_edituv_stretch_angle_iter_poly_mesh(const MeshRenderData *mr
   }
 }
 
+static GPUVertFormat *get_edituv_stretch_angle_format_subdiv()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* Waning: adjust #UVStretchAngle struct accordingly. */
+    GPU_vertformat_attr_add(&format, "angle", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "uv_angles", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                     const MeshRenderData *mr,
+                                                     struct MeshBatchCache *cache,
+                                                     void *buffer,
+                                                     void *UNUSED(tls_data))
+{
+  GPUVertBuf *refined_vbo = static_cast<GPUVertBuf *>(buffer);
+
+  GPU_vertbuf_init_build_on_device(
+      refined_vbo, get_edituv_stretch_angle_format_subdiv(), subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  GPUVertBuf *uvs = cache->final.buff.vbo.uv;
+
+  /* UVs are stored contiguouly so we need to compute the offset in the UVs buffer for the active
+   * UV layer. */
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_MESH) ? &mr->me->ldata : &mr->bm->ldata;
+
+  uint32_t uv_layers = cache->cd_used.uv;
+  /* HACK to fix T68857 */
+  if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
+    int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
+    if (layer != -1) {
+      uv_layers |= (1 << layer);
+    }
+  }
+
+  int uvs_offset = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (uv_layers & (1 << i)) {
+      if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) {
+        break;
+      }
+
+      uvs_offset += 1;
+    }
+  }
+
+  /* The data is at `offset * num loops`, and we have 2 values per index. */
+  uvs_offset *= subdiv_cache->num_subdiv_loops * 2;
+
+  draw_subdiv_build_edituv_stretch_angle_buffer(
+      subdiv_cache, pos_nor, uvs, uvs_offset, refined_vbo);
+}
+
 constexpr MeshExtract create_extractor_edituv_edituv_stretch_angle()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_stretch_angle_init;
   extractor.iter_poly_bm = extract_edituv_stretch_angle_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_stretch_angle_iter_poly_mesh;
+  extractor.init_subdiv = extract_edituv_stretch_angle_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_StretchAngle_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
index 3db8cd79af5..3b40b3115f5 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -63,14 +65,12 @@ BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_t
   return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
 }
 
-static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
-                                               struct MeshBatchCache *cache,
-                                               void *buf,
-                                               void *UNUSED(data))
+static void compute_area_ratio(const MeshRenderData *mr,
+                               float *r_area_ratio,
+                               float &r_tot_area,
+                               float &r_tot_uv_area)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   float tot_area = 0.0f, tot_uv_area = 0.0f;
-  float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
 
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     CustomData *cd_ldata = &mr->bm->ldata;
@@ -84,7 +84,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
       float uvarea = BM_face_calc_area_uv(efa, uv_ofs);
       tot_area += area;
       tot_uv_area += uvarea;
-      area_ratio[f] = area_ratio_get(area, uvarea);
+      r_area_ratio[f] = area_ratio_get(area, uvarea);
     }
   }
   else {
@@ -96,12 +96,22 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
       float uvarea = BKE_mesh_calc_poly_uv_area(mp, uv_data);
       tot_area += area;
       tot_uv_area += uvarea;
-      area_ratio[mp_index] = area_ratio_get(area, uvarea);
+      r_area_ratio[mp_index] = area_ratio_get(area, uvarea);
     }
   }
 
-  cache->tot_area = tot_area;
-  cache->tot_uv_area = tot_uv_area;
+  r_tot_area = tot_area;
+  r_tot_uv_area = tot_uv_area;
+}
+
+static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
+                                               struct MeshBatchCache *cache,
+                                               void *buf,
+                                               void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
+  compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area);
 
   /* Convert in place to avoid an extra allocation */
   uint16_t *poly_stretch = (uint16_t *)area_ratio;
@@ -135,11 +145,46 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
   MEM_freeN(area_ratio);
 }
 
+static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                    const MeshRenderData *mr,
+                                                    struct MeshBatchCache *cache,
+                                                    void *buffer,
+                                                    void *UNUSED(data))
+{
+
+  /* Initialise final buffer. */
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+
+  GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops);
+
+  /* Initialize coarse data buffer. */
+
+  GPUVertBuf *coarse_data = GPU_vertbuf_calloc();
+
+  /* We use the same format as we just copy data around. */
+  GPU_vertbuf_init_with_format(coarse_data, &format);
+  GPU_vertbuf_data_alloc(coarse_data, mr->loop_len);
+
+  compute_area_ratio(mr,
+                     static_cast<float *>(GPU_vertbuf_get_data(coarse_data)),
+                     cache->tot_area,
+                     cache->tot_uv_area);
+
+  draw_subdiv_build_edituv_stretch_area_buffer(subdiv_cache, coarse_data, vbo);
+
+  GPU_vertbuf_discard(coarse_data);
+}
+
 constexpr MeshExtract create_extractor_edituv_stretch_area()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_stretch_area_init;
   extractor.finish = extract_edituv_stretch_area_finish;
+  extractor.init_subdiv = extract_edituv_stretch_area_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
index 33f9180e122..f65159f9b95 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
@@ -23,24 +23,40 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
 /** \name Extract Face-dots positions
  * \{ */
 
-static void extract_fdots_pos_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
-                                   void *buf,
-                                   void *tls_data)
+static GPUVertFormat *get_fdots_pos_format()
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
   }
+  return &format;
+}
+
+static GPUVertFormat *get_fdots_nor_format_subdiv()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "norAndFlag", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
 
-  GPU_vertbuf_init_with_format(vbo, &format);
+static void extract_fdots_pos_init(const MeshRenderData *mr,
+                                   struct MeshBatchCache *UNUSED(cache),
+                                   void *buf,
+                                   void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat *format = get_fdots_pos_format();
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->poly_len);
   void *vbo_data = GPU_vertbuf_get_data(vbo);
   *(float(**)[3])tls_data = static_cast<float(*)[3]>(vbo_data);
@@ -97,10 +113,30 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                      const MeshRenderData *UNUSED(mr),
+                                      struct MeshBatchCache *cache,
+                                      void *buffer,
+                                      void *UNUSED(data))
+{
+  /* We "extract" positions, normals, and indices at once. */
+  GPUVertBuf *fdots_pos_vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertBuf *fdots_nor_vbo = cache->final.buff.vbo.fdots_nor;
+  GPUIndexBuf *fdots_pos_ibo = cache->final.buff.ibo.fdots;
+
+  GPU_vertbuf_init_build_on_device(
+      fdots_nor_vbo, get_fdots_nor_format_subdiv(), subdiv_cache->num_coarse_poly);
+  GPU_vertbuf_init_build_on_device(
+      fdots_pos_vbo, get_fdots_pos_format(), subdiv_cache->num_coarse_poly);
+  GPU_indexbuf_init_build_on_device(fdots_pos_ibo, subdiv_cache->num_coarse_poly);
+  draw_subdiv_build_fdots_buffers(subdiv_cache, fdots_pos_vbo, fdots_nor_vbo, fdots_pos_ibo);
+}
+
 constexpr MeshExtract create_extractor_fdots_pos()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_fdots_pos_init;
+  extractor.init_subdiv = extract_fdots_init_subdiv;
   extractor.iter_poly_bm = extract_fdots_pos_iter_poly_bm;
   extractor.iter_poly_mesh = extract_fdots_pos_iter_poly_mesh;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
index 3c3ac7a7a0a..d30c38ef050 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
@@ -23,6 +23,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -107,10 +109,34 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static GPUVertFormat *get_subdiv_lnor_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "lnor");
+  }
+  return &format;
+}
+
+static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  BLI_assert(pos_nor);
+  GPU_vertbuf_init_build_on_device(vbo, get_subdiv_lnor_format(), subdiv_cache->num_subdiv_loops);
+  draw_subdiv_build_lnor_buffer(subdiv_cache, pos_nor, vbo);
+}
+
 constexpr MeshExtract create_extractor_lnor()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_lnor_init;
+  extractor.init_subdiv = extract_lnor_init_subdiv;
   extractor.iter_poly_bm = extract_lnor_iter_poly_bm;
   extractor.iter_poly_mesh = extract_lnor_iter_poly_mesh;
   extractor.data_type = MR_DATA_LOOP_NOR;
@@ -210,6 +236,7 @@ constexpr MeshExtract create_extractor_lnor_hq()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_lnor_hq_init;
+  extractor.init_subdiv = extract_lnor_init_subdiv;
   extractor.iter_poly_bm = extract_lnor_hq_iter_poly_bm;
   extractor.iter_poly_mesh = extract_lnor_hq_iter_poly_mesh;
   extractor.data_type = MR_DATA_LOOP_NOR;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
index eb9a138590c..00ed4ca6359 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -194,6 +196,123 @@ static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr),
   MEM_freeN(data->normals);
 }
 
+static GPUVertFormat *get_pos_nor_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "vnor");
+  }
+  return &format;
+}
+
+static GPUVertFormat *get_normals_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "lnor");
+  }
+  return &format;
+}
+
+static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                        const MeshRenderData *mr,
+                                        struct MeshBatchCache *UNUSED(cache),
+                                        void *buffer,
+                                        void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  const bool do_limit_normals = subdiv_cache->do_limit_normals;
+
+  /* Initialize the vertex buffer, it was already allocated. */
+  GPU_vertbuf_init_build_on_device(
+      vbo, get_pos_nor_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+
+  draw_subdiv_extract_pos_nor(subdiv_cache, vbo, do_limit_normals);
+
+  if (!do_limit_normals) {
+    /* We cannot evaluate vertex normals using the limit surface, so compute them manually. */
+    GPUVertBuf *subdiv_loop_subdiv_vert_index = draw_subdiv_build_origindex_buffer(
+        subdiv_cache->subdiv_loop_subdiv_vert_index, subdiv_cache->num_subdiv_loops);
+
+    GPUVertBuf *vertex_normals = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_build_on_device(
+        vertex_normals, get_normals_format(), subdiv_cache->num_subdiv_verts);
+
+    draw_subdiv_accumulate_normals(subdiv_cache,
+                                   vbo,
+                                   subdiv_cache->subdiv_vertex_face_adjacency_offsets,
+                                   subdiv_cache->subdiv_vertex_face_adjacency,
+                                   vertex_normals);
+
+    draw_subdiv_finalize_normals(subdiv_cache, vertex_normals, subdiv_loop_subdiv_vert_index, vbo);
+
+    GPU_vertbuf_discard(vertex_normals);
+    GPU_vertbuf_discard(subdiv_loop_subdiv_vert_index);
+  }
+}
+
+static void extract_pos_nor_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *UNUSED(mr),
+                                              const MeshExtractLooseGeom *loose_geom,
+                                              void *buffer,
+                                              void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+  const MVert *coarse_verts = coarse_mesh->mvert;
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  /* TODO(kevindietrich) : replace this when compressed normals are supported. */
+  struct SubdivPosNorLoop {
+    float pos[3];
+    float nor[3];
+    float flag;
+  };
+
+  SubdivPosNorLoop edge_data[2];
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    const MVert *loose_vert1 = &coarse_verts[loose_edge->v1];
+    const MVert *loose_vert2 = &coarse_verts[loose_edge->v2];
+
+    copy_v3_v3(edge_data[0].pos, loose_vert1->co);
+    normal_short_to_float_v3(edge_data[0].nor, loose_vert1->no);
+    edge_data[0].flag = 0.0f;
+
+    copy_v3_v3(edge_data[1].pos, loose_vert2->co);
+    normal_short_to_float_v3(edge_data[1].nor, loose_vert2->no);
+    edge_data[1].flag = 0.0f;
+
+    GPU_vertbuf_update_sub(
+        vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop) * 2, &edge_data);
+
+    offset += 2;
+  }
+
+  SubdivPosNorLoop vert_data;
+  vert_data.flag = 0.0f;
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    const MVert *loose_vertex = &coarse_verts[loose_geom->verts[i]];
+
+    copy_v3_v3(vert_data.pos, loose_vertex->co);
+    normal_short_to_float_v3(vert_data.nor, loose_vertex->no);
+
+    GPU_vertbuf_update_sub(
+        vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop), &vert_data);
+
+    offset += 1;
+  }
+}
+
 constexpr MeshExtract create_extractor_pos_nor()
 {
   MeshExtract extractor = {nullptr};
@@ -205,6 +324,8 @@ constexpr MeshExtract create_extractor_pos_nor()
   extractor.iter_lvert_bm = extract_pos_nor_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_pos_nor_iter_lvert_mesh;
   extractor.finish = extract_pos_nor_finish;
+  extractor.init_subdiv = extract_pos_nor_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_pos_nor_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_PosNor_Data);
   extractor.use_threading = true;
@@ -391,6 +512,7 @@ constexpr MeshExtract create_extractor_pos_nor_hq()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_pos_nor_hq_init;
+  extractor.init_subdiv = extract_pos_nor_init_subdiv;
   extractor.iter_poly_bm = extract_pos_nor_hq_iter_poly_bm;
   extractor.iter_poly_mesh = extract_pos_nor_hq_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_pos_nor_hq_iter_ledge_bm;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
index fd91bc5258f..753fbe7e0e2 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
@@ -27,6 +27,7 @@
 
 #include "BKE_paint.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -35,13 +36,23 @@ namespace blender::draw {
 /** \name Extract Sculpt Data
  * \{ */
 
+static GPUVertFormat *get_sculpt_data_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
 static void extract_sculpt_data_init(const MeshRenderData *mr,
                                      struct MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(tls_data))
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
+  GPUVertFormat *format = get_sculpt_data_format();
 
   CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata : &mr->me->vdata;
@@ -50,12 +61,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
   float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
   int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS);
 
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
-  }
-
-  GPU_vertbuf_init_with_format(vbo, &format);
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len);
 
   struct gpuSculptData {
@@ -121,10 +127,99 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            struct MeshBatchCache *UNUSED(cache),
+                                            void *buffer,
+                                            void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  Mesh *coarse_mesh = mr->me;
+  CustomData *cd_vdata = &coarse_mesh->vdata;
+  CustomData *cd_pdata = &coarse_mesh->pdata;
+
+  /* First, interpolate mask if available. */
+  GPUVertBuf *mask_vbo = nullptr;
+  GPUVertBuf *subdiv_mask_vbo = nullptr;
+  float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
+
+  if (cd_mask) {
+    GPUVertFormat mask_format = {0};
+    GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+
+    mask_vbo = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_with_format(mask_vbo, &mask_format);
+    GPU_vertbuf_data_alloc(mask_vbo, coarse_mesh->totloop);
+    float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo));
+
+    for (int i = 0; i < coarse_mesh->totpoly; i++) {
+      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+
+      for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
+           loop_index++) {
+        const MLoop *ml = &coarse_mesh->mloop[loop_index];
+        *v_mask++ = cd_mask[ml->v];
+      }
+    }
+
+    subdiv_mask_vbo = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_build_on_device(
+        subdiv_mask_vbo, &mask_format, subdiv_cache->num_subdiv_loops);
+
+    draw_subdiv_interp_custom_data(subdiv_cache, mask_vbo, subdiv_mask_vbo, 1, 0);
+  }
+
+  /* Then, gather face sets. */
+  GPUVertFormat face_set_format = {0};
+  GPU_vertformat_attr_add(&face_set_format, "msk", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+
+  GPUVertBuf *face_set_vbo = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(face_set_vbo, &face_set_format);
+  GPU_vertbuf_data_alloc(face_set_vbo, subdiv_cache->num_subdiv_loops);
+
+  struct gpuFaceSet {
+    uint8_t color[4];
+  };
+
+  gpuFaceSet *face_sets = (gpuFaceSet *)GPU_vertbuf_get_data(face_set_vbo);
+  int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS);
+
+  GPUVertFormat *format = get_sculpt_data_format();
+  GPU_vertbuf_init_build_on_device(vbo, format, subdiv_cache->num_subdiv_loops);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int mp_index = subdiv_loop_poly_index[i];
+
+    uchar face_set_color[4] = {UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX};
+    if (cd_face_set) {
+      const int face_set_id = cd_face_set[mp_index];
+      /* Skip for the default color Face Set to render it white. */
+      if (face_set_id != coarse_mesh->face_sets_color_default) {
+        BKE_paint_face_set_overlay_color_get(
+            face_set_id, coarse_mesh->face_sets_color_seed, face_set_color);
+      }
+    }
+    copy_v3_v3_uchar(face_sets->color, face_set_color);
+    face_sets++;
+  }
+
+  /* Finally, interleave mask and face sets. */
+  draw_subdiv_build_sculpt_data_buffer(subdiv_cache, subdiv_mask_vbo, face_set_vbo, vbo);
+
+  if (mask_vbo) {
+    GPU_vertbuf_discard(mask_vbo);
+    GPU_vertbuf_discard(subdiv_mask_vbo);
+  }
+  GPU_vertbuf_discard(face_set_vbo);
+}
+
 constexpr MeshExtract create_extractor_sculpt_data()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_sculpt_data_init;
+  extractor.init_subdiv = extract_sculpt_data_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
index 5ac30dd3be9..33c27b45627 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
@@ -21,6 +21,7 @@
  * \ingroup draw
  */
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -196,12 +197,104 @@ static void extract_vert_idx_iter_lvert_mesh(const MeshRenderData *mr,
   (*(uint32_t **)data)[offset + lvert_index] = v_orig;
 }
 
+static void extract_vert_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  /* Each element points to an element in the ibo.points. */
+  draw_subdiv_init_origindex_buffer(vbo,
+                                    subdiv_cache->subdiv_loop_subdiv_vert_index,
+                                    subdiv_cache->num_subdiv_loops,
+                                    mr->loop_loose_len);
+}
+
+static void extract_vert_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo);
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    vert_idx_data[offset] = loose_edge->v1;
+    vert_idx_data[offset + 1] = loose_edge->v2;
+    offset += 2;
+  }
+
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    vert_idx_data[offset] = loose_geom->verts[i];
+    offset += 1;
+  }
+}
+
+static void extract_edge_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  draw_subdiv_init_origindex_buffer(
+      vbo,
+      static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)),
+      subdiv_cache->num_subdiv_loops,
+      mr->edge_loose_len * 2);
+}
+
+static void extract_edge_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo);
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    vert_idx_data[offset] = loose_geom->edges[i];
+    vert_idx_data[offset + 1] = loose_geom->edges[i];
+    offset += 2;
+  }
+}
+
+static void extract_poly_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *UNUSED(mr),
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  draw_subdiv_init_origindex_buffer(
+      vbo, subdiv_cache->subdiv_loop_poly_index, subdiv_cache->num_subdiv_loops, 0);
+}
+
 constexpr MeshExtract create_extractor_poly_idx()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_select_idx_init;
   extractor.iter_poly_bm = extract_poly_idx_iter_poly_bm;
   extractor.iter_poly_mesh = extract_poly_idx_iter_poly_mesh;
+  extractor.init_subdiv = extract_poly_idx_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
@@ -217,6 +310,8 @@ constexpr MeshExtract create_extractor_edge_idx()
   extractor.iter_poly_mesh = extract_edge_idx_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_edge_idx_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_edge_idx_iter_ledge_mesh;
+  extractor.init_subdiv = extract_edge_idx_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edge_idx_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
@@ -234,6 +329,8 @@ constexpr MeshExtract create_extractor_vert_idx()
   extractor.iter_ledge_mesh = extract_vert_idx_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_vert_idx_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_vert_idx_iter_lvert_mesh;
+  extractor.init_subdiv = extract_vert_idx_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_vert_idx_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
index af279b08a59..6e9d8ef6926 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
@@ -23,6 +23,7 @@
 
 #include "BLI_string.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -31,25 +32,27 @@ namespace blender::draw {
 /** \name Extract UV  layers
  * \{ */
 
-static void extract_uv_init(const MeshRenderData *mr,
-                            struct MeshBatchCache *cache,
-                            void *buf,
-                            void *UNUSED(tls_data))
+/* Initialize the vertex format to be used for UVs. Return true if any UV layer is
+ * found, false otherwise. */
+static bool mesh_extract_uv_format_init(GPUVertFormat *format,
+                                        struct MeshBatchCache *cache,
+                                        CustomData *cd_ldata,
+                                        eMRExtractType extract_type,
+                                        uint32_t &r_uv_layers)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-  GPU_vertformat_deinterleave(&format);
+  GPU_vertformat_deinterleave(format);
 
-  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   uint32_t uv_layers = cache->cd_used.uv;
   /* HACK to fix T68857 */
-  if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
+  if (extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
     int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
     if (layer != -1) {
       uv_layers |= (1 << layer);
     }
   }
 
+  r_uv_layers = uv_layers;
+
   for (int i = 0; i < MAX_MTFACE; i++) {
     if (uv_layers & (1 << i)) {
       char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
@@ -58,30 +61,47 @@ static void extract_uv_init(const MeshRenderData *mr,
       GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
       /* UV layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "u%s", attr_safe_name);
-      GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+      GPU_vertformat_attr_add(format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
       /* Auto layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-      GPU_vertformat_alias_add(&format, attr_name);
+      GPU_vertformat_alias_add(format, attr_name);
       /* Active render layer name. */
       if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "u");
+        GPU_vertformat_alias_add(format, "u");
       }
       /* Active display layer name. */
       if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "au");
+        GPU_vertformat_alias_add(format, "au");
         /* Alias to `pos` for edit uvs. */
-        GPU_vertformat_alias_add(&format, "pos");
+        GPU_vertformat_alias_add(format, "pos");
       }
       /* Stencil mask uv layer name. */
       if (i == CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "mu");
+        GPU_vertformat_alias_add(format, "mu");
       }
     }
   }
 
+  if (format->attr_len == 0) {
+    GPU_vertformat_attr_add(format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    return false;
+  }
+
+  return true;
+}
+
+static void extract_uv_init(const MeshRenderData *mr,
+                            struct MeshBatchCache *cache,
+                            void *buf,
+                            void *UNUSED(tls_data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat format = {0};
+
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   int v_len = mr->loop_len;
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  uint32_t uv_layers = cache->cd_used.uv;
+  if (!mesh_extract_uv_format_init(&format, cache, cd_ldata, mr->extract_type, uv_layers)) {
     /* VBO will not be used, only allocate minimum of memory. */
     v_len = 1;
   }
@@ -116,10 +136,45 @@ static void extract_uv_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                   const MeshRenderData *UNUSED(mr),
+                                   struct MeshBatchCache *cache,
+                                   void *buffer,
+                                   void *UNUSED(data))
+{
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertFormat format = {0};
+
+  uint v_len = subdiv_cache->num_subdiv_loops;
+  uint uv_layers;
+  if (!mesh_extract_uv_format_init(
+          &format, cache, &coarse_mesh->ldata, MR_EXTRACT_MESH, uv_layers)) {
+    // TODO(kevindietrich): handle this more gracefully.
+    v_len = 1;
+  }
+
+  GPU_vertbuf_init_build_on_device(vbo, &format, v_len);
+
+  if (uv_layers == 0) {
+    return;
+  }
+
+  /* Index of the UV layer in the compact buffer. Used UV layers are stored in a single buffer. */
+  int pack_layer_index = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (uv_layers & (1 << i)) {
+      const int offset = (int)subdiv_cache->num_subdiv_loops * pack_layer_index++;
+      draw_subdiv_extract_uvs(subdiv_cache, vbo, i, offset);
+    }
+  }
+}
+
 constexpr MeshExtract create_extractor_uv()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_uv_init;
+  extractor.init_subdiv = extract_uv_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
index f8878eb2617..ea7810bcf6b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
@@ -25,6 +25,7 @@
 
 #include "BLI_string.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -33,17 +34,14 @@ namespace blender::draw {
 /** \name Extract VCol
  * \{ */
 
-static void extract_vcol_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data))
+/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */
+static void init_vcol_format(GPUVertFormat *format,
+                             const MeshBatchCache *cache,
+                             CustomData *cd_ldata)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-  GPU_vertformat_deinterleave(&format);
+  GPU_vertformat_deinterleave(format);
 
-  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
-  uint32_t vcol_layers = cache->cd_used.vcol;
+  const uint32_t vcol_layers = cache->cd_used.vcol;
 
   for (int i = 0; i < MAX_MCOL; i++) {
     if (vcol_layers & (1 << i)) {
@@ -52,31 +50,56 @@ static void extract_vcol_init(const MeshRenderData *mr,
       GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
       BLI_snprintf(attr_name, sizeof(attr_name), "c%s", attr_safe_name);
-      GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+      GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
       if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPCOL)) {
-        GPU_vertformat_alias_add(&format, "c");
+        GPU_vertformat_alias_add(format, "c");
       }
       if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL)) {
-        GPU_vertformat_alias_add(&format, "ac");
+        GPU_vertformat_alias_add(format, "ac");
       }
 
       /* Gather number of auto layers. */
       /* We only do `vcols` that are not overridden by `uvs`. */
       if (CustomData_get_named_layer_index(cd_ldata, CD_MLOOPUV, layer_name) == -1) {
         BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-        GPU_vertformat_alias_add(&format, attr_name);
+        GPU_vertformat_alias_add(format, attr_name);
       }
     }
   }
+}
+
+/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision
+ * case. */
+static GPUVertFormat *get_coarse_vcol_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "c");
+    GPU_vertformat_alias_add(&format, "ac");
+  }
+  return &format;
+}
+
+using gpuMeshVcol = struct gpuMeshVcol {
+  ushort r, g, b, a;
+};
+
+static void extract_vcol_init(const MeshRenderData *mr,
+                              struct MeshBatchCache *cache,
+                              void *buf,
+                              void *UNUSED(tls_data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat format = {0};
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
+  const uint32_t vcol_layers = cache->cd_used.vcol;
+  init_vcol_format(&format, cache, cd_ldata);
 
   GPU_vertbuf_init_with_format(vbo, &format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len);
 
-  using gpuMeshVcol = struct gpuMeshVcol {
-    ushort r, g, b, a;
-  };
-
   gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo);
 
   for (int i = 0; i < MAX_MCOL; i++) {
@@ -111,10 +134,64 @@ static void extract_vcol_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+
+  GPUVertFormat format = {0};
+  init_vcol_format(&format, cache, &coarse_mesh->ldata);
+
+  GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *src_data = GPU_vertbuf_calloc();
+  /* Dynamic as we upload and interpolate layers one at a time. */
+  GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC);
+
+  GPU_vertbuf_data_alloc(src_data, coarse_mesh->totloop);
+
+  gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data);
+
+  const CustomData *cd_ldata = &coarse_mesh->ldata;
+
+  const uint vcol_layers = cache->cd_used.vcol;
+
+  /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in
+   * a single buffer. */
+  int pack_layer_index = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (vcol_layers & (1 << i)) {
+      /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */
+      const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++;
+      const MLoopCol *mloopcol = (MLoopCol *)CustomData_get_layer_n(cd_ldata, CD_MLOOPCOL, i);
+
+      gpuMeshVcol *vcol = mesh_vcol;
+
+      for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, mloopcol++) {
+        vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
+        vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
+        vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
+        vcol->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
+      }
+
+      /* Ensure data is uploaded properly. */
+      GPU_vertbuf_tag_dirty(src_data);
+      draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset);
+    }
+  }
+
+  GPU_vertbuf_discard(src_data);
+}
+
 constexpr MeshExtract create_extractor_vcol()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_vcol_init;
+  extractor.init_subdiv = extract_vcol_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
index bdb1410a755..bb8853b8154 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
@@ -25,6 +25,7 @@
 
 #include "BKE_deform.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -167,10 +168,57 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                        const MeshRenderData *UNUSED(mr),
+                                        struct MeshBatchCache *cache,
+                                        void *buffer,
+                                        void *UNUSED(data))
+{
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "weight", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+  GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *coarse_weights = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(coarse_weights, &format);
+  GPU_vertbuf_data_alloc(coarse_weights, coarse_mesh->totloop);
+  float *coarse_weights_data = static_cast<float *>(GPU_vertbuf_get_data(coarse_weights));
+
+  const DRW_MeshWeightState *wstate = &cache->weight_state;
+  const MDeformVert *dverts = static_cast<const MDeformVert *>(
+      CustomData_get_layer(&coarse_mesh->vdata, CD_MDEFORMVERT));
+
+  for (int i = 0; i < coarse_mesh->totpoly; i++) {
+    const MPoly *mpoly = &coarse_mesh->mpoly[i];
+
+    for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
+         loop_index++) {
+      const MLoop *ml = &coarse_mesh->mloop[loop_index];
+
+      if (dverts != nullptr) {
+        const MDeformVert *dvert = &dverts[ml->v];
+        coarse_weights_data[loop_index] = evaluate_vertex_weight(dvert, wstate);
+      }
+      else {
+        coarse_weights_data[loop_index] = evaluate_vertex_weight(nullptr, wstate);
+      }
+    }
+  }
+
+  draw_subdiv_interp_custom_data(subdiv_cache, coarse_weights, vbo, 1, 0);
+
+  GPU_vertbuf_discard(coarse_weights);
+}
+
 constexpr MeshExtract create_extractor_weights()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_weights_init;
+  extractor.init_subdiv = extract_weights_init_subdiv;
   extractor.iter_poly_bm = extract_weights_iter_poly_bm;
   extractor.iter_poly_mesh = extract_weights_iter_poly_mesh;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
new file mode 100644
index 00000000000..36c3970d9a0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
@@ -0,0 +1,230 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly restrict buffer sourceBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint src_data[];
+#else
+  float src_data[];
+#endif
+};
+
+layout(std430, binding = 2) readonly restrict buffer facePTexOffset
+{
+  uint face_ptex_offset[];
+};
+
+layout(std430, binding = 3) readonly restrict buffer patchCoords
+{
+  BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 5) writeonly restrict buffer destBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint dst_data[];
+#else
+  float dst_data[];
+#endif
+};
+
+struct Vertex {
+  float vertex_data[DIMENSIONS];
+};
+
+void clear(inout Vertex v)
+{
+  for (int i = 0; i < DIMENSIONS; i++) {
+    v.vertex_data[i] = 0.0;
+  }
+}
+
+Vertex read_vertex(uint index)
+{
+  Vertex result;
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint base_index = index * 2;
+  if (DIMENSIONS == 4) {
+    uint xy = src_data[base_index];
+    uint zw = src_data[base_index + 1];
+
+    float x = float((xy >> 16) & 0xffff) / 65535.0;
+    float y = float(xy & 0xffff) / 65535.0;
+    float z = float((zw >> 16) & 0xffff) / 65535.0;
+    float w = float(zw & 0xffff) / 65535.0;
+
+    result.vertex_data[0] = x;
+    result.vertex_data[1] = y;
+    result.vertex_data[2] = z;
+    result.vertex_data[3] = w;
+  }
+  else {
+    /* This case is unsupported for now. */
+    clear(result);
+  }
+#else
+  uint base_index = index * DIMENSIONS;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    result.vertex_data[i] = src_data[base_index + i];
+  }
+#endif
+  return result;
+}
+
+void write_vertex(uint index, Vertex v)
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint base_index = dst_offset + index * 2;
+  if (DIMENSIONS == 4) {
+    uint x = uint(v.vertex_data[0] * 65535.0);
+    uint y = uint(v.vertex_data[1] * 65535.0);
+    uint z = uint(v.vertex_data[2] * 65535.0);
+    uint w = uint(v.vertex_data[3] * 65535.0);
+
+    uint xy = x << 16 | y;
+    uint zw = z << 16 | w;
+
+    dst_data[base_index] = xy;
+    dst_data[base_index + 1] = zw;
+  }
+  else {
+    /* This case is unsupported for now. */
+    dst_data[base_index] = 0;
+  }
+#else
+  uint base_index = dst_offset + index * DIMENSIONS;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    dst_data[base_index + i] = v.vertex_data[i];
+  }
+#endif
+}
+
+Vertex interp_vertex(Vertex v0, Vertex v1, Vertex v2, Vertex v3, vec2 uv)
+{
+  Vertex result;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    float e = mix(v0.vertex_data[i], v1.vertex_data[i], uv.x);
+    float f = mix(v2.vertex_data[i], v3.vertex_data[i], uv.x);
+    result.vertex_data[i] = mix(e, f, uv.y);
+  }
+  return result;
+}
+
+void add_with_weight(inout Vertex v0, Vertex v1, float weight)
+{
+  for (int i = 0; i < DIMENSIONS; i++) {
+    v0.vertex_data[i] += v1.vertex_data[i] * weight;
+  }
+}
+
+Vertex average(Vertex v0, Vertex v1)
+{
+  Vertex result;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    result.vertex_data[i] = (v0.vertex_data[i] + v1.vertex_data[i]) * 0.5;
+  }
+  return result;
+}
+
+uint get_vertex_count(uint coarse_polygon)
+{
+  uint number_of_patches = face_ptex_offset[coarse_polygon + 1] - face_ptex_offset[coarse_polygon];
+  if (number_of_patches == 1) {
+    /* If there is only one patch for the current coarse polygon, then it is a quad. */
+    return 4;
+  }
+  /* Otherwise, the number of patches is the number of vertices. */
+  return number_of_patches;
+}
+
+uint get_polygon_corner_index(uint coarse_polygon, uint patch_index)
+{
+  uint patch_offset = face_ptex_offset[coarse_polygon];
+  return patch_index - patch_offset;
+}
+
+uint get_loop_start(uint coarse_polygon)
+{
+  return extra_coarse_face_data[coarse_polygon] & coarse_face_loopstart_mask;
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  /* Find which coarse polygon we came from. */
+  uint coarse_polygon = coarse_polygon_index_from_subdiv_quad_index(quad_index, coarse_poly_count);
+  uint loop_start = get_loop_start(coarse_polygon);
+
+  /* Find the number of vertices for the coarse polygon. */
+  Vertex v0, v1, v2, v3;
+  clear(v0);
+  clear(v1);
+  clear(v2);
+  clear(v3);
+
+  uint number_of_vertices = get_vertex_count(coarse_polygon);
+  if (number_of_vertices == 4) {
+    /* Interpolate the src data. */
+    v0 = read_vertex(loop_start + 0);
+    v1 = read_vertex(loop_start + 1);
+    v2 = read_vertex(loop_start + 2);
+    v3 = read_vertex(loop_start + 3);
+  }
+  else {
+    /* Interpolate the src data for the center. */
+    uint loop_end = loop_start + number_of_vertices - 1;
+    Vertex center_value;
+    clear(center_value);
+
+    float weight = 1.0 / float(number_of_vertices);
+
+    for (uint l = loop_start; l < loop_end; l++) {
+      add_with_weight(center_value, read_vertex(l), weight);
+    }
+
+    /* Interpolate between the previous and next corner for the middle values for the edges. */
+    uint patch_index = uint(patch_coords[start_loop_index].patch_index);
+    uint current_coarse_corner = get_polygon_corner_index(coarse_polygon, patch_index);
+    uint next_coarse_corner = (current_coarse_corner + 1) % number_of_vertices;
+    uint prev_coarse_corner = (current_coarse_corner + number_of_vertices - 1) %
+                              number_of_vertices;
+
+    v0 = read_vertex(loop_start);
+    v1 = average(v0, read_vertex(loop_start + next_coarse_corner));
+    v3 = average(v0, read_vertex(loop_start + prev_coarse_corner));
+
+    /* Interpolate between the current value, and the ones for the center and mid-edges. */
+    v2 = center_value;
+  }
+
+  /* Do a linear interpolation of the data based on the UVs for each loop of this subdivided quad.
+   */
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    BlenderPatchCoord co = patch_coords[loop_index];
+    vec2 uv = decode_uv(co.encoded_uv);
+    /* NOTE: v2 and v3 are reversed to stay consistent with the interpolation weight on the x-axis:
+     *
+     * v3 +-----+ v2
+     *    |     |
+     *    |     |
+     * v0 +-----+ v1
+     *
+     * otherwise, weight would be `1.0 - uv.x` for `v2 <-> v3`, but `uv.x` for `v0 <-> v1`.
+     */
+    Vertex result = interp_vertex(v0, v1, v3, v2, uv);
+    write_vertex(loop_index, result);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
new file mode 100644
index 00000000000..f11c0f6427e
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
@@ -0,0 +1,57 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputEdgeOrigIndex
+{
+  int input_origindex[];
+};
+
+layout(std430, binding = 1) writeonly buffer outputLinesIndices
+{
+  uint output_lines[];
+};
+
+#ifndef LINES_LOOSE
+void emit_line(uint line_offset, uint start_loop_index, uint corner_index)
+{
+  uint vertex_index = start_loop_index + corner_index;
+
+  if (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display) {
+    output_lines[line_offset + 0] = 0xffffffff;
+    output_lines[line_offset + 1] = 0xffffffff;
+  }
+  else {
+    /* Mod 4 so we loop back at the first vertex on the last loop index (3). */
+    uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+
+    output_lines[line_offset + 0] = vertex_index;
+    output_lines[line_offset + 1] = next_vertex_index;
+  }
+}
+#endif
+
+void main()
+{
+  uint index = get_global_invocation_index();
+  if (index >= total_dispatch_size) {
+    return;
+  }
+
+#ifdef LINES_LOOSE
+  /* In the loose lines case, we execute for each line, with two vertices per line. */
+  uint line_offset = edge_loose_offset + index * 2;
+  uint loop_index = num_subdiv_loops + index * 2;
+  output_lines[line_offset] = loop_index;
+  output_lines[line_offset + 1] = loop_index + 1;
+#else
+  /* We execute for each quad, so the start index of the loop is quad_index * 4. */
+  uint start_loop_index = index * 4;
+  /* We execute for each quad, so the start index of the line is quad_index * 8 (with 2 vertices
+   * per line). */
+  uint start_line_index = index * 8;
+
+  for (int i = 0; i < 4; i++) {
+    emit_line(start_line_index + i * 2, start_loop_index, i);
+  }
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
new file mode 100644
index 00000000000..3257ebdae17
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -0,0 +1,43 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Generate triangles from subdivision quads indices. */
+
+layout(std430, binding = 1) writeonly buffer outputTriangles
+{
+  uint output_tris[];
+};
+
+#ifndef SINGLE_MATERIAL
+layout(std430, binding = 2) readonly buffer inputPolygonMatOffset
+{
+  int polygon_mat_offset[];
+};
+#endif
+
+void main()
+{
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint loop_index = quad_index * 4;
+
+#ifdef SINGLE_MATERIAL
+  uint triangle_loop_index = quad_index * 6;
+#else
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+  int mat_offset = polygon_mat_offset[coarse_quad_index];
+
+  int triangle_loop_index = (int(quad_index) + mat_offset) * 6;
+#endif
+
+  output_tris[triangle_loop_index + 0] = loop_index + 0;
+  output_tris[triangle_loop_index + 1] = loop_index + 1;
+  output_tris[triangle_loop_index + 2] = loop_index + 2;
+  output_tris[triangle_loop_index + 3] = loop_index + 0;
+  output_tris[triangle_loop_index + 4] = loop_index + 2;
+  output_tris[triangle_loop_index + 5] = loop_index + 3;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
new file mode 100644
index 00000000000..005561964b8
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
@@ -0,0 +1,176 @@
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+/* Uniform block for #DRWSubivUboStorage. */
+layout(std140) uniform shader_data
+{
+  /* Offsets in the buffers data where the source and destination data start. */
+  int src_offset;
+  int dst_offset;
+
+  /* Parameters for the DRWPatchMap. */
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+
+  /* Coarse topology information. */
+  int coarse_poly_count;
+  uint edge_loose_offset;
+
+  /* Subdiv topology information. */
+  uint num_subdiv_loops;
+
+  /* Subdivision settings. */
+  bool optimal_display;
+
+  /* Sculpt data. */
+  bool has_sculpt_mask;
+
+  /* Masks for the extra coarse face data. */
+  uint coarse_face_select_mask;
+  uint coarse_face_smooth_mask;
+  uint coarse_face_active_mask;
+  uint coarse_face_loopstart_mask;
+
+  /* Total number of elements to process. */
+  uint total_dispatch_size;
+};
+
+uint get_global_invocation_index()
+{
+  uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
+  return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
+}
+
+/* Structure for #CompressedPatchCoord. */
+struct BlenderPatchCoord {
+  int patch_index;
+  uint encoded_uv;
+};
+
+vec2 decode_uv(uint encoded_uv)
+{
+  float u = float((encoded_uv >> 16) & 0xFFFFu) / 65535.0;
+  float v = float(encoded_uv & 0xFFFFu) / 65535.0;
+  return vec2(u, v);
+}
+
+/* This structure is a carbon copy of OpenSubDiv's PatchTable::PatchHandle. */
+struct PatchHandle {
+  int array_index;
+  int patch_index;
+  int vertex_index;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord. */
+struct PatchCoord {
+  int array_index;
+  int patch_index;
+  int vertex_index;
+  float u;
+  float v;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord.QuadNode.
+ * Each child is a bitfield. */
+struct QuadNode {
+  uvec4 child;
+};
+
+bool is_set(uint i)
+{
+  /* QuadNode.Child.isSet is the first bit of the bitfield. */
+  return (i & 0x1u) != 0;
+}
+
+bool is_leaf(uint i)
+{
+  /* QuadNode.Child.isLeaf is the second bit of the bitfield. */
+  return (i & 0x2u) != 0;
+}
+
+uint get_index(uint i)
+{
+  /* QuadNode.Child.index is made of the remaining bits. */
+  return (i >> 2) & 0x3FFFFFFFu;
+}
+
+/* Duplicate of #PosNorLoop from the mesh extract CPU code.
+ * We do not use a vec3 for the position as it will be padded to a vec4 which is incompatible with
+ * the format.  */
+struct PosNorLoop {
+  float x, y, z;
+  /* TODO(kevindietrich) : figure how to compress properly as GLSL does not have char/short types,
+   * bit operations get tricky. */
+  float nx, ny, nz;
+  float flag;
+};
+
+vec3 get_vertex_pos(PosNorLoop vertex_data)
+{
+  return vec3(vertex_data.x, vertex_data.y, vertex_data.z);
+}
+
+vec3 get_vertex_nor(PosNorLoop vertex_data)
+{
+  return vec3(vertex_data.nx, vertex_data.ny, vertex_data.nz);
+}
+
+void set_vertex_pos(inout PosNorLoop vertex_data, vec3 pos)
+{
+  vertex_data.x = pos.x;
+  vertex_data.y = pos.y;
+  vertex_data.z = pos.z;
+}
+
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor, uint flag)
+{
+  vertex_data.nx = nor.x;
+  vertex_data.ny = nor.y;
+  vertex_data.nz = nor.z;
+  vertex_data.flag = float(flag);
+}
+
+/* Set the vertex normal but preserve the existing flag. This is for when we compute manually the
+ * vertex normals when we cannot use the limit surface, in which case the flag and the normal are
+ * set by two separate compute pass. */
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor)
+{
+  set_vertex_nor(vertex_data, nor, 0);
+}
+
+#define ORIGINDEX_NONE -1
+
+#ifdef SUBDIV_POLYGON_OFFSET
+layout(std430, binding = 0) readonly buffer inputSubdivPolygonOffset
+{
+  uint subdiv_polygon_offset[];
+};
+
+/* Given the index of the subdivision quad, return the index of the corresponding coarse polygon.
+ * This uses subdiv_polygon_offset and since it is a growing list of offsets, we can use binary
+ * search to locate the right index. */
+uint coarse_polygon_index_from_subdiv_quad_index(uint subdiv_quad_index, uint coarse_poly_count)
+{
+  uint first = 0;
+  uint last = coarse_poly_count;
+
+  while (first != last) {
+    uint middle = (first + last) / 2;
+
+    if (subdiv_polygon_offset[middle] < subdiv_quad_index) {
+      first = middle + 1;
+    }
+    else {
+      last = middle;
+    }
+  }
+
+  if (subdiv_polygon_offset[first] == subdiv_quad_index) {
+    return first;
+  }
+
+  return first - 1;
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
new file mode 100644
index 00000000000..575090472b1
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
@@ -0,0 +1,56 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer faceAdjacencyOffsets
+{
+  uint face_adjacency_offsets[];
+};
+
+layout(std430, binding = 2) readonly buffer faceAdjacencyLists
+{
+  uint face_adjacency_lists[];
+};
+
+layout(std430, binding = 3) writeonly buffer vertexNormals
+{
+  vec3 normals[];
+};
+
+void main()
+{
+  uint vertex_index = get_global_invocation_index();
+  if (vertex_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint first_adjacent_face_offset = face_adjacency_offsets[vertex_index];
+  uint number_of_adjacent_faces = face_adjacency_offsets[vertex_index + 1] -
+                                  first_adjacent_face_offset;
+
+  vec3 accumulated_normal = vec3(0.0);
+
+  /* For each adjacent face. */
+  for (uint i = 0; i < number_of_adjacent_faces; i++) {
+    uint adjacent_face = face_adjacency_lists[first_adjacent_face_offset + i];
+    uint start_loop_index = adjacent_face * 4;
+
+    /* Compute face normal. */
+    vec3 adjacent_verts[3];
+    for (uint j = 0; j < 3; j++) {
+      adjacent_verts[j] = get_vertex_pos(pos_nor[start_loop_index + j]);
+    }
+
+    vec3 face_normal = normalize(
+        cross(adjacent_verts[1] - adjacent_verts[0], adjacent_verts[2] - adjacent_verts[0]));
+    accumulated_normal += face_normal;
+  }
+
+  float weight = 1.0 / float(number_of_adjacent_faces);
+  vec3 normal = normalize(accumulated_normal);
+  normals[vertex_index] = normal;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
new file mode 100644
index 00000000000..84cd65d4161
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
@@ -0,0 +1,34 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputNormals
+{
+  vec3 vertex_normals[];
+};
+
+layout(std430, binding = 1) readonly buffer inputSubdivVertLoopMap
+{
+  uint vert_loop_map[];
+};
+
+layout(std430, binding = 2) buffer outputPosNor
+{
+  PosNorLoop pos_nor[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (int i = 0; i < 4; i++) {
+    uint subdiv_vert_index = vert_loop_map[start_loop_index + i];
+    vec3 nor = vertex_normals[subdiv_vert_index];
+    set_vertex_nor(pos_nor[start_loop_index + i], nor);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
new file mode 100644
index 00000000000..5dd7decf663
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
@@ -0,0 +1,416 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Source buffer. */
+layout(std430, binding = 0) buffer src_buffer
+{
+  float srcVertexBuffer[];
+};
+
+/* #DRWPatchMap */
+layout(std430, binding = 1) readonly buffer inputPatchHandles
+{
+  PatchHandle input_patch_handles[];
+};
+
+layout(std430, binding = 2) readonly buffer inputQuadNodes
+{
+  QuadNode quad_nodes[];
+};
+
+layout(std430, binding = 3) readonly buffer inputPatchCoords
+{
+  BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly buffer inputVertOrigIndices
+{
+  int input_vert_origindex[];
+};
+
+/* Patch buffers. */
+layout(std430, binding = 5) buffer patchArray_buffer
+{
+  OsdPatchArray patchArrayBuffer[];
+};
+
+layout(std430, binding = 6) buffer patchIndex_buffer
+{
+  int patchIndexBuffer[];
+};
+
+layout(std430, binding = 7) buffer patchParam_buffer
+{
+  OsdPatchParam patchParamBuffer[];
+};
+
+  /* Output buffer(s). */
+
+#if defined(FVAR_EVALUATION)
+layout(std430, binding = 8) writeonly buffer outputFVarData
+{
+  vec2 output_fvar[];
+};
+#elif defined(FDOTS_EVALUATION)
+/* For face dots, we build the position, normals, and index buffers in one go. */
+
+/* vec3 is padded to vec4, but the format used for fdots does not have any padding. */
+struct FDotVert {
+  float x, y, z;
+};
+
+/* Same here, do not use vec3. */
+struct FDotNor {
+  float x, y, z;
+  float flag;
+};
+
+layout(std430, binding = 8) writeonly buffer outputVertices
+{
+  FDotVert output_verts[];
+};
+
+layout(std430, binding = 9) writeonly buffer outputNormals
+{
+  FDotNor output_nors[];
+};
+
+layout(std430, binding = 10) writeonly buffer outputFdotsIndices
+{
+  uint output_indices[];
+};
+
+layout(std430, binding = 11) readonly buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+#else
+layout(std430, binding = 8) writeonly buffer outputVertexData
+{
+  PosNorLoop output_verts[];
+};
+#endif
+
+vec2 read_vec2(int index)
+{
+  vec2 result;
+  result.x = srcVertexBuffer[index * 2];
+  result.y = srcVertexBuffer[index * 2 + 1];
+  return result;
+}
+
+vec3 read_vec3(int index)
+{
+  vec3 result;
+  result.x = srcVertexBuffer[index * 3];
+  result.y = srcVertexBuffer[index * 3 + 1];
+  result.z = srcVertexBuffer[index * 3 + 2];
+  return result;
+}
+
+OsdPatchArray GetPatchArray(int arrayIndex)
+{
+  return patchArrayBuffer[arrayIndex];
+}
+
+OsdPatchParam GetPatchParam(int patchIndex)
+{
+  return patchParamBuffer[patchIndex];
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch Coordinate lookup. Return an OsdPatchCoord for the given patch_index and uvs.
+ * This code is a port of the OpenSubdiv PatchMap lookup code.
+ */
+
+PatchHandle bogus_patch_handle()
+{
+  PatchHandle ret;
+  ret.array_index = -1;
+  ret.vertex_index = -1;
+  ret.patch_index = -1;
+  return ret;
+}
+
+int transformUVToQuadQuadrant(float median, inout float u, inout float v)
+{
+  int uHalf = (u >= median) ? 1 : 0;
+  if (uHalf != 0)
+    u -= median;
+
+  int vHalf = (v >= median) ? 1 : 0;
+  if (vHalf != 0)
+    v -= median;
+
+  return (vHalf << 1) | uHalf;
+}
+
+int transformUVToTriQuadrant(float median, inout float u, inout float v, inout bool rotated)
+{
+
+  if (!rotated) {
+    if (u >= median) {
+      u -= median;
+      return 1;
+    }
+    if (v >= median) {
+      v -= median;
+      return 2;
+    }
+    if ((u + v) >= median) {
+      rotated = true;
+      return 3;
+    }
+    return 0;
+  }
+  else {
+    if (u < median) {
+      v -= median;
+      return 1;
+    }
+    if (v < median) {
+      u -= median;
+      return 2;
+    }
+    u -= median;
+    v -= median;
+    if ((u + v) < median) {
+      rotated = false;
+      return 3;
+    }
+    return 0;
+  }
+}
+
+PatchHandle find_patch(int face_index, float u, float v)
+{
+  if (face_index < min_patch_face || face_index > max_patch_face) {
+    return bogus_patch_handle();
+  }
+
+  QuadNode node = quad_nodes[face_index - min_patch_face];
+
+  if (!is_set(node.child[0])) {
+    return bogus_patch_handle();
+  }
+
+  float median = 0.5;
+  bool tri_rotated = false;
+
+  for (int depth = 0; depth <= max_depth; ++depth, median *= 0.5) {
+    int quadrant = (patches_are_triangular != 0) ?
+                       transformUVToTriQuadrant(median, u, v, tri_rotated) :
+                       transformUVToQuadQuadrant(median, u, v);
+
+    if (is_leaf(node.child[quadrant])) {
+      return input_patch_handles[get_index(node.child[quadrant])];
+    }
+
+    node = quad_nodes[get_index(node.child[quadrant])];
+  }
+}
+
+OsdPatchCoord bogus_patch_coord(int face_index, float u, float v)
+{
+  OsdPatchCoord coord;
+  coord.arrayIndex = 0;
+  coord.patchIndex = face_index;
+  coord.vertIndex = 0;
+  coord.s = u;
+  coord.t = v;
+  return coord;
+}
+
+OsdPatchCoord GetPatchCoord(int face_index, float u, float v)
+{
+  PatchHandle patch_handle = find_patch(face_index, u, v);
+
+  if (patch_handle.array_index == -1) {
+    return bogus_patch_coord(face_index, u, v);
+  }
+
+  OsdPatchCoord coord;
+  coord.arrayIndex = patch_handle.array_index;
+  coord.patchIndex = patch_handle.patch_index;
+  coord.vertIndex = patch_handle.vertex_index;
+  coord.s = u;
+  coord.t = v;
+  return coord;
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch evaluation. Note that the 1st and 2nd derivatives are always computed, although we
+ * only return and use the 1st derivatives if adaptive patches are used. This could
+ * perhaps be optimized.
+ */
+
+#if defined(FVAR_EVALUATION)
+void evaluate_patches_limits(int patch_index, float u, float v, inout vec2 dst)
+{
+  OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+  OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+  OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+  int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+  float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+  int nPoints = OsdEvaluatePatchBasis(
+      patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+  int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+  for (int cv = 0; cv < nPoints; ++cv) {
+    int index = patchIndexBuffer[indexBase + cv];
+    vec2 src_fvar = read_vec2(src_offset + index);
+    dst += src_fvar * wP[cv];
+  }
+}
+#else
+void evaluate_patches_limits(
+    int patch_index, float u, float v, inout vec3 dst, inout vec3 du, inout vec3 dv)
+{
+  OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+  OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+  OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+  int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+  float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+  int nPoints = OsdEvaluatePatchBasis(
+      patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+  int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+  for (int cv = 0; cv < nPoints; ++cv) {
+    int index = patchIndexBuffer[indexBase + cv];
+    vec3 src_vertex = read_vec3(index);
+
+    dst += src_vertex * wP[cv];
+    du += src_vertex * wDu[cv];
+    dv += src_vertex * wDv[cv];
+  }
+}
+#endif
+
+/* ------------------------------------------------------------------------------
+ * Entry point.
+ */
+
+#if defined(FVAR_EVALUATION)
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    vec2 fvar = vec2(0.0);
+
+    BlenderPatchCoord patch_co = patch_coords[loop_index];
+    vec2 uv = decode_uv(patch_co.encoded_uv);
+
+    evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, fvar);
+    output_fvar[dst_offset + loop_index] = fvar;
+  }
+}
+#elif defined(FDOTS_EVALUATION)
+bool is_face_selected(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0;
+}
+
+bool is_face_active(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_active_mask) != 0;
+}
+
+float get_face_flag(uint coarse_quad_index)
+{
+  if (is_face_active(coarse_quad_index)) {
+    return -1.0;
+  }
+
+  if (is_face_selected(coarse_quad_index)) {
+    return 1.0;
+  }
+
+  return 0.0;
+}
+
+void main()
+{
+  /* We execute for each coarse quad. */
+  uint coarse_quad_index = get_global_invocation_index();
+  if (coarse_quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  BlenderPatchCoord patch_co = patch_coords[coarse_quad_index];
+  vec2 uv = decode_uv(patch_co.encoded_uv);
+
+  vec3 pos = vec3(0.0);
+  vec3 du = vec3(0.0);
+  vec3 dv = vec3(0.0);
+  evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+  vec3 nor = normalize(cross(du, dv));
+
+  FDotVert vert;
+  vert.x = pos.x;
+  vert.y = pos.y;
+  vert.z = pos.z;
+
+  FDotNor fnor;
+  fnor.x = nor.x;
+  fnor.y = nor.y;
+  fnor.z = nor.z;
+  fnor.flag = get_face_flag(coarse_quad_index);
+
+  output_verts[coarse_quad_index] = vert;
+  output_nors[coarse_quad_index] = fnor;
+  output_indices[coarse_quad_index] = coarse_quad_index;
+}
+#else
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    vec3 pos = vec3(0.0);
+    vec3 du = vec3(0.0);
+    vec3 dv = vec3(0.0);
+
+    BlenderPatchCoord patch_co = patch_coords[loop_index];
+    vec2 uv = decode_uv(patch_co.encoded_uv);
+
+    evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+
+#  if defined(LIMIT_NORMALS)
+    vec3 nor = normalize(cross(du, dv));
+#  else
+    /* This will be computed later. */
+    vec3 nor = vec3(0.0);
+#  endif
+
+    int origindex = input_vert_origindex[loop_index];
+    uint flag = 0;
+    if (origindex == -1) {
+      flag = -1;
+    }
+
+    PosNorLoop vertex_data;
+    set_vertex_pos(vertex_data, pos);
+    set_vertex_nor(vertex_data, nor, flag);
+    output_verts[loop_index] = vertex_data;
+  }
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
new file mode 100644
index 00000000000..6c76cd41ca4
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
@@ -0,0 +1,97 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputEdgeIndex
+{
+  uint input_edge_index[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputEdgeFactors
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  float output_edge_fac[];
+#else
+  uint output_edge_fac[];
+#endif
+};
+
+void write_vec4(uint index, vec4 edge_facs)
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  for (uint i = 0; i < 4; i++) {
+    output_edge_fac[index + i] = edge_facs[i];
+  }
+#else
+  /* Use same scaling as in extract_edge_fac_iter_poly_mesh. */
+  uint a = uint(clamp(edge_facs.x * 253.0 + 1.0, 0.0, 255.0));
+  uint b = uint(clamp(edge_facs.y * 253.0 + 1.0, 0.0, 255.0));
+  uint c = uint(clamp(edge_facs.z * 253.0 + 1.0, 0.0, 255.0));
+  uint d = uint(clamp(edge_facs.w * 253.0 + 1.0, 0.0, 255.0));
+  uint packed_edge_fac = a << 24 | b << 16 | c << 8 | d;
+  output_edge_fac[index] = packed_edge_fac;
+#endif
+}
+
+/* From extract_mesh_vbo_edge_fac.cc, keep in sync! */
+float loop_edge_factor_get(vec3 f_no, vec3 v_co, vec3 v_no, vec3 v_next_co)
+{
+  vec3 evec = v_next_co - v_co;
+  vec3 enor = normalize(cross(v_no, evec));
+  float d = abs(dot(enor, f_no));
+  /* Re-scale to the slider range. */
+  d *= (1.0 / 0.065);
+  return clamp(d, 0.0, 1.0);
+}
+
+float compute_line_factor(uint start_loop_index, uint corner_index, vec3 face_normal)
+{
+  uint vertex_index = start_loop_index + corner_index;
+  uint edge_index = input_edge_index[vertex_index];
+
+  if (edge_index == -1 && optimal_display) {
+    return 0.0;
+  }
+
+  /* Mod 4 so we loop back at the first vertex on the last loop index (3), but only the corner
+   * index needs to be wrapped. */
+  uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+  vec3 vertex_pos = get_vertex_pos(pos_nor[vertex_index]);
+  vec3 vertex_nor = get_vertex_nor(pos_nor[vertex_index]);
+  vec3 next_vertex_pos = get_vertex_pos(pos_nor[next_vertex_index]);
+  return loop_edge_factor_get(face_normal, vertex_pos, vertex_nor, next_vertex_pos);
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  /* First compute the face normal, we need it to compute the bihedral edge angle. */
+  vec3 v0 = get_vertex_pos(pos_nor[start_loop_index + 0]);
+  vec3 v1 = get_vertex_pos(pos_nor[start_loop_index + 1]);
+  vec3 v2 = get_vertex_pos(pos_nor[start_loop_index + 2]);
+  vec3 face_normal = normalize(cross(v1 - v0, v2 - v0));
+
+  vec4 edge_facs = vec4(0.0);
+  for (int i = 0; i < 4; i++) {
+    edge_facs[i] = compute_line_factor(start_loop_index, i, face_normal);
+  }
+
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  write_vec4(start_loop_index, edge_facs);
+#else
+  /* When packed into bytes, the index is the same as for the quad. */
+  write_vec4(quad_index, edge_facs);
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
new file mode 100644
index 00000000000..ea73b9482d3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
@@ -0,0 +1,80 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVerts
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputUVs
+{
+  vec2 uvs[];
+};
+
+/* Mirror of #UVStretchAngle in the C++ code, but using floats until proper data compression
+ * is implemented for all subdivision data. */
+struct UVStretchAngle {
+  float angle;
+  float uv_angle0;
+  float uv_angle1;
+};
+
+layout(std430, binding = 2) writeonly buffer outputStretchAngles
+{
+  UVStretchAngle uv_stretches[];
+};
+
+#define M_PI 3.1415926535897932
+#define M_1_PI 0.31830988618379067154
+
+/* Adapted from BLI_math_vector.h */
+float angle_normalized_v3v3(vec3 v1, vec3 v2)
+{
+  /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */
+  bool q = (dot(v1, v2) >= 0.0);
+  vec3 v = (q) ? (v1 - v2) : (v1 + v2);
+  float a = 2.0 * asin(length(v) / 2.0);
+  return (q) ? a : M_PI - a;
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint i = 0; i < 4; i++) {
+    uint cur_loop_index = start_loop_index + i;
+    uint next_loop_index = start_loop_index + (i + 1) % 4;
+    uint prev_loop_index = start_loop_index + (i + 3) % 4;
+
+    /* Compute 2d edge vectors from UVs. */
+    vec2 cur_uv = uvs[src_offset + cur_loop_index];
+    vec2 next_uv = uvs[src_offset + next_loop_index];
+    vec2 prev_uv = uvs[src_offset + prev_loop_index];
+
+    vec2 norm_uv_edge0 = normalize(prev_uv - cur_uv);
+    vec2 norm_uv_edge1 = normalize(cur_uv - next_uv);
+
+    /* Compute 3d edge vectors from positions. */
+    vec3 cur_pos = get_vertex_pos(pos_nor[cur_loop_index]);
+    vec3 next_pos = get_vertex_pos(pos_nor[next_loop_index]);
+    vec3 prev_pos = get_vertex_pos(pos_nor[prev_loop_index]);
+
+    vec3 norm_pos_edge0 = normalize(prev_pos - cur_pos);
+    vec3 norm_pos_edge1 = normalize(cur_pos - next_pos);
+
+    /* Compute stretches, this logic is adapted from #edituv_get_edituv_stretch_angle.
+     * Keep in sync! */
+    UVStretchAngle stretch;
+    stretch.uv_angle0 = atan(norm_uv_edge0.y, norm_uv_edge0.x) * M_1_PI;
+    stretch.uv_angle1 = atan(norm_uv_edge1.y, norm_uv_edge1.x) * M_1_PI;
+    stretch.angle = angle_normalized_v3v3(norm_pos_edge0, norm_pos_edge1) * M_1_PI;
+
+    uv_stretches[cur_loop_index] = stretch;
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
new file mode 100644
index 00000000000..e897fb3f3c0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
@@ -0,0 +1,31 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputCoarseData
+{
+  float coarse_stretch_area[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputSubdivData
+{
+  float subdiv_stretch_area[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+
+  for (int i = 0; i < 4; i++) {
+    subdiv_stretch_area[start_loop_index + i] = coarse_stretch_area[coarse_quad_index];
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
new file mode 100644
index 00000000000..41a8df3cf82
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
@@ -0,0 +1,52 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 2) readonly buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 3) writeonly buffer outputLoopNormals
+{
+  vec3 output_lnor[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+
+  if ((extra_coarse_face_data[coarse_quad_index] & coarse_face_smooth_mask) != 0) {
+    /* Face is smooth, use vertex normals. */
+    for (int i = 0; i < 4; i++) {
+      PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i];
+      output_lnor[start_loop_index + i] = get_vertex_nor(pos_nor_loop);
+    }
+  }
+  else {
+    /* Face is flat shaded, compute flat face normal from an inscribed triangle. */
+    vec3 verts[3];
+    for (int i = 0; i < 3; i++) {
+      verts[i] = get_vertex_pos(pos_nor[start_loop_index + i]);
+    }
+
+    vec3 face_normal = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+    for (int i = 0; i < 4; i++) {
+      output_lnor[start_loop_index + i] = face_normal;
+    }
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
new file mode 100644
index 00000000000..7182ce57ad3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
@@ -0,0 +1,47 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+struct SculptData {
+  uint face_set_color;
+  float mask;
+};
+
+layout(std430, binding = 0) readonly restrict buffer sculptMask
+{
+  float sculpt_mask[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer faceSetColor
+{
+  uint face_set_color[];
+};
+
+layout(std430, binding = 2) writeonly restrict buffer sculptData
+{
+  SculptData sculpt_data[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    SculptData data;
+    data.face_set_color = face_set_color[loop_index];
+
+    if (has_sculpt_mask) {
+      data.mask = sculpt_mask[loop_index];
+    }
+    else {
+      data.mask = 0.0;
+    }
+
+    sculpt_data[loop_index] = data;
+  }
+}
diff --git a/source/blender/editors/space_view3d/view3d_draw.c b/source/blender/editors/space_view3d/view3d_draw.c
index a7d170982ed..b1f19581543 100644
--- a/source/blender/editors/space_view3d/view3d_draw.c
+++ b/source/blender/editors/space_view3d/view3d_draw.c
@@ -1581,6 +1581,7 @@ void view3d_main_region_draw(const bContext *C, ARegion *region)
 
   view3d_draw_view(C, region);
 
+  DRW_cache_free_old_subdiv();
   DRW_cache_free_old_batches(bmain);
   BKE_image_free_old_gputextures(bmain);
   GPU_pass_cache_garbage_collect();
diff --git a/source/blender/editors/transform/transform_snap_object.c b/source/blender/editors/transform/transform_snap_object.c
index 350d3a2676c..e3a2d1f6531 100644
--- a/source/blender/editors/transform/transform_snap_object.c
+++ b/source/blender/editors/transform/transform_snap_object.c
@@ -146,7 +146,7 @@ struct SnapObjectContext {
  * If NULL the BMesh should be used. */
 static Mesh *mesh_for_snap(Object *ob_eval, eSnapEditType edit_mode_type, bool *r_use_hide)
 {
-  Mesh *me_eval = ob_eval->data;
+  Mesh *me_eval = BKE_object_get_evaluated_mesh(ob_eval);
   bool use_hide = false;
   if (BKE_object_is_in_editmode(ob_eval)) {
     if (edit_mode_type == SNAP_GEOM_EDIT) {
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index 5189fa1ae41..5e67441be27 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -40,6 +40,8 @@ typedef enum eGPUBackendType {
 void GPU_backend_init(eGPUBackendType backend);
 void GPU_backend_exit(void);
 
+eGPUBackendType GPU_backend_get_type(void);
+
 /** Opaque type hiding blender::gpu::Context. */
 typedef struct GPUContext GPUContext;
 
diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h
index e4f1709173e..0f83e590597 100644
--- a/source/blender/gpu/GPU_index_buffer.h
+++ b/source/blender/gpu/GPU_index_buffer.h
@@ -53,6 +53,8 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *, GPUPrimType, uint index_len, uin
 void GPU_indexbuf_init(GPUIndexBufBuilder *, GPUPrimType, uint prim_len, uint vertex_len);
 GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len);
 
+void GPU_indexbuf_init_build_on_device(GPUIndexBuf *elem, uint index_len);
+
 /*
  * Thread safe.
  *
@@ -82,6 +84,16 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *);
 
 void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding);
 
+/* Upload data to the GPU (if not built on the device) and bind the buffer to its default target.
+ */
+void GPU_indexbuf_use(GPUIndexBuf *elem);
+
+/* Partially update the GPUIndexBuf which was already sent to the device, or built directly on the
+ * device. The data needs to be compatible with potential compression applied to the original
+ * indices when the index buffer was built, i.e., if the data was compressed to use shorts instead
+ * of ints, shorts should passed here. */
+void GPU_indexbuf_update_sub(GPUIndexBuf *elem, uint start, uint len, const void *data);
+
 /* Create a sub-range of an existing index-buffer. */
 GPUIndexBuf *GPU_indexbuf_create_subrange(GPUIndexBuf *elem_src, uint start, uint length);
 void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem,
diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h
index 62a495abfb3..43a8e7fc4cb 100644
--- a/source/blender/gpu/GPU_vertex_buffer.h
+++ b/source/blender/gpu/GPU_vertex_buffer.h
@@ -91,6 +91,8 @@ void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts);
 
 void GPU_vertbuf_init_with_format_ex(GPUVertBuf *, const GPUVertFormat *, GPUUsageType);
 
+void GPU_vertbuf_init_build_on_device(GPUVertBuf *verts, GPUVertFormat *format, uint v_len);
+
 #define GPU_vertbuf_init_with_format(verts, format) \
   GPU_vertbuf_init_with_format_ex(verts, format, GPU_USAGE_STATIC)
 
@@ -172,6 +174,7 @@ const GPUVertFormat *GPU_vertbuf_get_format(const GPUVertBuf *verts);
 uint GPU_vertbuf_get_vertex_alloc(const GPUVertBuf *verts);
 uint GPU_vertbuf_get_vertex_len(const GPUVertBuf *verts);
 GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts);
+void GPU_vertbuf_tag_dirty(GPUVertBuf *verts);
 
 /**
  * Should be rename to #GPU_vertbuf_data_upload.
@@ -179,12 +182,14 @@ GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts);
 void GPU_vertbuf_use(GPUVertBuf *);
 void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding);
 
+void GPU_vertbuf_wrap_handle(GPUVertBuf *verts, uint64_t handle);
+
 /**
  * XXX: do not use!
  * This is just a wrapper for the use of the Hair refine workaround.
  * To be used with #GPU_vertbuf_use().
  */
-void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data);
+void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, const void *data);
 
 /* Metrics */
 uint GPU_vertbuf_get_memory_usage(void);
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index 5af15d1bc3d..98714269402 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -186,6 +186,15 @@ void GPU_backend_exit()
   g_backend = nullptr;
 }
 
+eGPUBackendType GPU_backend_get_type()
+{
+  if (g_backend && dynamic_cast<GLBackend *>(g_backend) != nullptr) {
+    return GPU_BACKEND_OPENGL;
+  }
+
+  return GPU_BACKEND_NONE;
+}
+
 GPUBackend *GPUBackend::get()
 {
   return g_backend;
diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc
index 3472cc24a74..895b2a8461b 100644
--- a/source/blender/gpu/intern/gpu_index_buffer.cc
+++ b/source/blender/gpu/intern/gpu_index_buffer.cc
@@ -74,11 +74,16 @@ void GPU_indexbuf_init(GPUIndexBufBuilder *builder,
 GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len)
 {
   GPUIndexBuf *elem_ = GPU_indexbuf_calloc();
-  IndexBuf *elem = unwrap(elem_);
-  elem->init_build_on_device(index_len);
+  GPU_indexbuf_init_build_on_device(elem_, index_len);
   return elem_;
 }
 
+void GPU_indexbuf_init_build_on_device(GPUIndexBuf *elem, uint index_len)
+{
+  IndexBuf *elem_ = unwrap(elem);
+  elem_->init_build_on_device(index_len);
+}
+
 void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from)
 {
   BLI_assert(builder_to->data == builder_from->data);
@@ -410,9 +415,19 @@ int GPU_indexbuf_primitive_len(GPUPrimType prim_type)
   return indices_per_primitive(prim_type);
 }
 
+void GPU_indexbuf_use(GPUIndexBuf *elem)
+{
+  unwrap(elem)->upload_data();
+}
+
 void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding)
 {
   unwrap(elem)->bind_as_ssbo(binding);
 }
 
+void GPU_indexbuf_update_sub(GPUIndexBuf *elem, uint start, uint len, const void *data)
+{
+  unwrap(elem)->update_sub(start, len, data);
+}
+
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_index_buffer_private.hh b/source/blender/gpu/intern/gpu_index_buffer_private.hh
index ed7dd830c8c..adc0145f867 100644
--- a/source/blender/gpu/intern/gpu_index_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_index_buffer_private.hh
@@ -92,11 +92,15 @@ class IndexBuf {
     return is_init_;
   };
 
+  virtual void upload_data(void) = 0;
+
   virtual void bind_as_ssbo(uint binding) = 0;
 
   virtual const uint32_t *read() const = 0;
   uint32_t *unmap(const uint32_t *mapped_memory) const;
 
+  virtual void update_sub(uint start, uint len, const void *data) = 0;
+
  private:
   inline void squeeze_indices_short(uint min_idx, uint max_idx);
   inline uint index_range(uint *r_min, uint *r_max);
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc
index 5ed9648387f..dba31f501f2 100644
--- a/source/blender/gpu/intern/gpu_vertex_buffer.cc
+++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc
@@ -144,6 +144,12 @@ void GPU_vertbuf_init_with_format_ex(GPUVertBuf *verts_,
   unwrap(verts_)->init(format, usage);
 }
 
+void GPU_vertbuf_init_build_on_device(GPUVertBuf *verts, GPUVertFormat *format, uint v_len)
+{
+  GPU_vertbuf_init_with_format_ex(verts, format, GPU_USAGE_DEVICE_ONLY);
+  GPU_vertbuf_data_alloc(verts, v_len);
+}
+
 GPUVertBuf *GPU_vertbuf_duplicate(GPUVertBuf *verts_)
 {
   return wrap(unwrap(verts_)->duplicate());
@@ -313,6 +319,11 @@ GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts)
   return unwrap(verts)->flag;
 }
 
+void GPU_vertbuf_tag_dirty(GPUVertBuf *verts)
+{
+  unwrap(verts)->flag |= GPU_VERTBUF_DATA_DIRTY;
+}
+
 uint GPU_vertbuf_get_memory_usage()
 {
   return VertBuf::memory_usage;
@@ -323,12 +334,17 @@ void GPU_vertbuf_use(GPUVertBuf *verts)
   unwrap(verts)->upload();
 }
 
+void GPU_vertbuf_wrap_handle(GPUVertBuf *verts, uint64_t handle)
+{
+  unwrap(verts)->wrap_handle(handle);
+}
+
 void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding)
 {
   unwrap(verts)->bind_as_ssbo(binding);
 }
 
-void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data)
+void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, const void *data)
 {
   unwrap(verts)->update_sub(start, len, data);
 }
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh
index 9531c2c1a5f..2f46295f45a 100644
--- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh
@@ -68,6 +68,8 @@ class VertBuf {
   void upload(void);
   virtual void bind_as_ssbo(uint binding) = 0;
 
+  virtual void wrap_handle(uint64_t handle) = 0;
+
   VertBuf *duplicate(void);
 
   /* Size of the data allocated. */
@@ -96,7 +98,7 @@ class VertBuf {
     }
   }
 
-  virtual void update_sub(uint start, uint len, void *data) = 0;
+  virtual void update_sub(uint start, uint len, const void *data) = 0;
   virtual const void *read() const = 0;
   virtual void *unmap(const void *mapped_data) const = 0;
 
diff --git a/source/blender/gpu/opengl/gl_index_buffer.cc b/source/blender/gpu/opengl/gl_index_buffer.cc
index e305f765ad9..82bab460ae3 100644
--- a/source/blender/gpu/opengl/gl_index_buffer.cc
+++ b/source/blender/gpu/opengl/gl_index_buffer.cc
@@ -81,4 +81,14 @@ bool GLIndexBuf::is_active() const
   return ibo_id_ == active_ibo_id;
 }
 
+void GLIndexBuf::upload_data()
+{
+  bind();
+}
+
+void GLIndexBuf::update_sub(uint start, uint len, const void *data)
+{
+  glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, start, len, data);
+}
+
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_index_buffer.hh b/source/blender/gpu/opengl/gl_index_buffer.hh
index 0dbdaa6d398..85d52447bc6 100644
--- a/source/blender/gpu/opengl/gl_index_buffer.hh
+++ b/source/blender/gpu/opengl/gl_index_buffer.hh
@@ -61,6 +61,10 @@ class GLIndexBuf : public IndexBuf {
     return (index_type_ == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu;
   }
 
+  void upload_data(void) override;
+
+  void update_sub(uint start, uint len, const void *data) override;
+
  private:
   bool is_active() const;
 
diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.cc b/source/blender/gpu/opengl/gl_vertex_buffer.cc
index ce16a491528..469ac2cf8d6 100644
--- a/source/blender/gpu/opengl/gl_vertex_buffer.cc
+++ b/source/blender/gpu/opengl/gl_vertex_buffer.cc
@@ -49,6 +49,10 @@ void GLVertBuf::resize_data()
 
 void GLVertBuf::release_data()
 {
+  if (is_wrapper_) {
+    return;
+  }
+
   if (vbo_id_ != 0) {
     GLContext::buf_free(vbo_id_);
     vbo_id_ = 0;
@@ -137,6 +141,16 @@ void *GLVertBuf::unmap(const void *mapped_data) const
   return result;
 }
 
+void GLVertBuf::wrap_handle(uint64_t handle)
+{
+  BLI_assert(vbo_id_ == 0);
+  BLI_assert(glIsBuffer(static_cast<uint>(handle)));
+  is_wrapper_ = true;
+  vbo_id_ = static_cast<uint>(handle);
+  /* We assume the data is already on the device, so no need to allocate or send it. */
+  flag = GPU_VERTBUF_DATA_UPLOADED;
+}
+
 bool GLVertBuf::is_active() const
 {
   if (!vbo_id_) {
@@ -147,7 +161,7 @@ bool GLVertBuf::is_active() const
   return vbo_id_ == active_vbo_id;
 }
 
-void GLVertBuf::update_sub(uint start, uint len, void *data)
+void GLVertBuf::update_sub(uint start, uint len, const void *data)
 {
   glBufferSubData(GL_ARRAY_BUFFER, start, len, data);
 }
diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.hh b/source/blender/gpu/opengl/gl_vertex_buffer.hh
index 6c38a2225b3..27e4cc4f8e2 100644
--- a/source/blender/gpu/opengl/gl_vertex_buffer.hh
+++ b/source/blender/gpu/opengl/gl_vertex_buffer.hh
@@ -39,17 +39,22 @@ class GLVertBuf : public VertBuf {
  private:
   /** OpenGL buffer handle. Init on first upload. Immutable after that. */
   GLuint vbo_id_ = 0;
+  /** Defines whether the buffer handle is wrapped by this GLVertBuf, i.e. we do not own it and
+   * should not free it. */
+  bool is_wrapper_ = false;
   /** Size on the GPU. */
   size_t vbo_size_ = 0;
 
  public:
   void bind(void);
 
-  void update_sub(uint start, uint len, void *data) override;
+  void update_sub(uint start, uint len, const void *data) override;
 
   const void *read() const override;
   void *unmap(const void *mapped_data) const override;
 
+  void wrap_handle(uint64_t handle) override;
+
  protected:
   void acquire_data(void) override;
   void resize_data(void) override;
diff --git a/source/blender/makesdna/DNA_mesh_types.h b/source/blender/makesdna/DNA_mesh_types.h
index c053baf9f7e..94e88bdaca6 100644
--- a/source/blender/makesdna/DNA_mesh_types.h
+++ b/source/blender/makesdna/DNA_mesh_types.h
@@ -138,6 +138,15 @@ typedef struct Mesh_Runtime {
   int64_t cd_dirty_loop;
   int64_t cd_dirty_poly;
 
+  /**
+   * Settings for lazily evaluating the subdivision on the CPU if needed. These are
+   * set in the modifier when GPU subdivision can be performed.
+   */
+  char subsurf_apply_render;
+  char subsurf_use_optimal_display;
+  char _pad[2];
+  int subsurf_resolution;
+
 } Mesh_Runtime;
 
 typedef struct Mesh {
@@ -356,7 +365,8 @@ typedef enum eMeshWrapperType {
   ME_WRAPPER_TYPE_MDATA = 0,
   /** Use edit-mesh data (#Mesh.edit_mesh, #Mesh_Runtime.edit_data). */
   ME_WRAPPER_TYPE_BMESH = 1,
-  /* ME_WRAPPER_TYPE_SUBD = 2, */ /* TODO */
+  /** Use subdivision mesh data (#Mesh_Runtime.mesh_eval). */
+  ME_WRAPPER_TYPE_SUBD = 2,
 } eMeshWrapperType;
 
 /** #Mesh.texflag */
diff --git a/source/blender/makesdna/DNA_modifier_types.h b/source/blender/makesdna/DNA_modifier_types.h
index 85cc1361adf..fc041e257b0 100644
--- a/source/blender/makesdna/DNA_modifier_types.h
+++ b/source/blender/makesdna/DNA_modifier_types.h
@@ -196,6 +196,13 @@ typedef enum {
   SUBSURF_BOUNDARY_SMOOTH_PRESERVE_CORNERS = 1,
 } eSubsurfBoundarySmooth;
 
+typedef struct SubsurfRuntimeData {
+  /* Cached subdivision surface descriptor, with topology and settings. */
+  struct Subdiv *subdiv;
+  char set_by_draw_code;
+  char _pad[7];
+} SubsurfRuntimeData;
+
 typedef struct SubsurfModifierData {
   ModifierData modifier;
 
diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h
index c99651f0717..34415308ef6 100644
--- a/source/blender/makesdna/DNA_userdef_types.h
+++ b/source/blender/makesdna/DNA_userdef_types.h
@@ -1145,6 +1145,7 @@ typedef enum eUserpref_GPU_Flag {
   USER_GPU_FLAG_NO_DEPT_PICK = (1 << 0),
   USER_GPU_FLAG_NO_EDIT_MODE_SMOOTH_WIRE = (1 << 1),
   USER_GPU_FLAG_OVERLAY_SMOOTH_WIRE = (1 << 2),
+  USER_GPU_FLAG_SUBDIVISION_EVALUATION = (1 << 3),
 } eUserpref_GPU_Flag;
 
 /** #UserDef.tablet_api */
diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c
index 929cf94615b..71c38311124 100644
--- a/source/blender/makesrna/intern/rna_userdef.c
+++ b/source/blender/makesrna/intern/rna_userdef.c
@@ -182,6 +182,7 @@ static const EnumPropertyItem rna_enum_userdef_viewport_aa_items[] = {
 #  include "BKE_image.h"
 #  include "BKE_main.h"
 #  include "BKE_mesh_runtime.h"
+#  include "BKE_object.h"
 #  include "BKE_paint.h"
 #  include "BKE_pbvh.h"
 #  include "BKE_preferences.h"
@@ -578,6 +579,20 @@ static PointerRNA rna_UserDef_apps_get(PointerRNA *ptr)
   return rna_pointer_inherit_refine(ptr, &RNA_PreferencesApps, ptr->data);
 }
 
+/* Reevaluate objects with a subsurf modifier as the last in their modifiers stacks. */
+static void rna_UserDef_subdivision_update(Main *bmain, Scene *scene, PointerRNA *ptr)
+{
+  Object *ob;
+
+  for (ob = bmain->objects.first; ob; ob = ob->id.next) {
+    if (BKE_object_get_last_subsurf_modifier(ob) != NULL) {
+      DEG_id_tag_update(&ob->id, ID_RECALC_GEOMETRY);
+    }
+  }
+
+  rna_userdef_update(bmain, scene, ptr);
+}
+
 static void rna_UserDef_audio_update(Main *bmain, Scene *UNUSED(scene), PointerRNA *UNUSED(ptr))
 {
   BKE_sound_init(bmain);
@@ -5651,6 +5666,16 @@ static void rna_def_userdef_system(BlenderRNA *brna)
                            "Use the depth buffer for picking 3D View selection "
                            "(without this the front most object may not be selected first)");
 
+  /* GPU subdivision evaluation. */
+
+  prop = RNA_def_property(srna, "use_gpu_subdivision", PROP_BOOLEAN, PROP_NONE);
+  RNA_def_property_boolean_sdna(prop, NULL, "gpu_flag", USER_GPU_FLAG_SUBDIVISION_EVALUATION);
+  RNA_def_property_ui_text(prop,
+                           "GPU Subdivision",
+                           "Enable GPU acceleration for evaluating the last subdivision surface "
+                           "modifiers in the stack");
+  RNA_def_property_update(prop, 0, "rna_UserDef_subdivision_update");
+
   /* Audio */
 
   prop = RNA_def_property(srna, "audio_mixing_buffer", PROP_ENUM, PROP_NONE);
diff --git a/source/blender/modifiers/intern/MOD_subsurf.c b/source/blender/modifiers/intern/MOD_subsurf.c
index 7470f2abb15..00870d076ef 100644
--- a/source/blender/modifiers/intern/MOD_subsurf.c
+++ b/source/blender/modifiers/intern/MOD_subsurf.c
@@ -39,6 +39,7 @@
 #include "DNA_screen_types.h"
 
 #include "BKE_context.h"
+#include "BKE_editmesh.h"
 #include "BKE_mesh.h"
 #include "BKE_scene.h"
 #include "BKE_screen.h"
@@ -46,6 +47,7 @@
 #include "BKE_subdiv_ccg.h"
 #include "BKE_subdiv_deform.h"
 #include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
 #include "BKE_subsurf.h"
 
 #include "UI_interface.h"
@@ -65,11 +67,6 @@
 
 #include "intern/CCGSubSurf.h"
 
-typedef struct SubsurfRuntimeData {
-  /* Cached subdivision surface descriptor, with topology and settings. */
-  struct Subdiv *subdiv;
-} SubsurfRuntimeData;
-
 static void initData(ModifierData *md)
 {
   SubsurfModifierData *smd = (SubsurfModifierData *)md;
@@ -155,37 +152,6 @@ static int subdiv_levels_for_modifier_get(const SubsurfModifierData *smd,
   return get_render_subsurf_level(&scene->r, requested_levels, use_render_params);
 }
 
-static void subdiv_settings_init(SubdivSettings *settings,
-                                 const SubsurfModifierData *smd,
-                                 const ModifierEvalContext *ctx)
-{
-  const bool use_render_params = (ctx->flag & MOD_APPLY_RENDER);
-  const int requested_levels = (use_render_params) ? smd->renderLevels : smd->levels;
-
-  settings->is_simple = (smd->subdivType == SUBSURF_TYPE_SIMPLE);
-  settings->is_adaptive = !(smd->flags & eSubsurfModifierFlag_UseRecursiveSubdivision);
-  settings->level = settings->is_simple ?
-                        1 :
-                        (settings->is_adaptive ? smd->quality : requested_levels);
-  settings->use_creases = (smd->flags & eSubsurfModifierFlag_UseCrease);
-  settings->vtx_boundary_interpolation = BKE_subdiv_vtx_boundary_interpolation_from_subsurf(
-      smd->boundary_smooth);
-  settings->fvar_linear_interpolation = BKE_subdiv_fvar_interpolation_from_uv_smooth(
-      smd->uv_smooth);
-}
-
-/* Main goal of this function is to give usable subdivision surface descriptor
- * which matches settings and topology. */
-static Subdiv *subdiv_descriptor_ensure(SubsurfModifierData *smd,
-                                        const SubdivSettings *subdiv_settings,
-                                        const Mesh *mesh)
-{
-  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
-  Subdiv *subdiv = BKE_subdiv_update_from_mesh(runtime_data->subdiv, subdiv_settings, mesh);
-  runtime_data->subdiv = subdiv;
-  return subdiv;
-}
-
 /* Subdivide into fully qualified mesh. */
 
 static void subdiv_mesh_settings_init(SubdivToMeshSettings *settings,
@@ -240,14 +206,17 @@ static Mesh *subdiv_as_ccg(SubsurfModifierData *smd,
   return result;
 }
 
-static SubsurfRuntimeData *subsurf_ensure_runtime(SubsurfModifierData *smd)
+/* Cache settings for lazy CPU evaluation. */
+
+static void subdiv_cache_cpu_evaluation_settings(const ModifierEvalContext *ctx,
+                                                 Mesh *me,
+                                                 SubsurfModifierData *smd)
 {
-  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
-  if (runtime_data == NULL) {
-    runtime_data = MEM_callocN(sizeof(*runtime_data), "subsurf runtime");
-    smd->modifier.runtime = runtime_data;
-  }
-  return runtime_data;
+  SubdivToMeshSettings mesh_settings;
+  subdiv_mesh_settings_init(&mesh_settings, smd, ctx);
+  me->runtime.subsurf_apply_render = (ctx->flag & MOD_APPLY_RENDER) != 0;
+  me->runtime.subsurf_resolution = mesh_settings.resolution;
+  me->runtime.subsurf_use_optimal_display = mesh_settings.use_optimal_display;
 }
 
 /* Modifier itself. */
@@ -261,12 +230,30 @@ static Mesh *modifyMesh(ModifierData *md, const ModifierEvalContext *ctx, Mesh *
 #endif
   SubsurfModifierData *smd = (SubsurfModifierData *)md;
   SubdivSettings subdiv_settings;
-  subdiv_settings_init(&subdiv_settings, smd, ctx);
+  BKE_subsurf_modifier_subdiv_settings_init(
+      &subdiv_settings, smd, (ctx->flag & MOD_APPLY_RENDER) != 0);
   if (subdiv_settings.level == 0) {
     return result;
   }
-  SubsurfRuntimeData *runtime_data = subsurf_ensure_runtime(smd);
-  Subdiv *subdiv = subdiv_descriptor_ensure(smd, &subdiv_settings, mesh);
+  SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd);
+
+  /* Delay evaluation to the draw code if possible, provided we do not have to apply the modifier.
+   */
+  if ((ctx->flag & MOD_APPLY_TO_BASE_MESH) == 0) {
+    Scene *scene = DEG_get_evaluated_scene(ctx->depsgraph);
+    const bool is_render_mode = (ctx->flag & MOD_APPLY_RENDER) != 0;
+    /* Same check as in `DRW_mesh_batch_cache_create_requested` to keep both code coherent. */
+    const bool is_editmode = (mesh->edit_mesh != NULL) &&
+                             (mesh->edit_mesh->mesh_eval_final != NULL);
+    const int required_mode = BKE_subsurf_modifier_eval_required_mode(is_render_mode, is_editmode);
+    if (BKE_subsurf_modifier_can_do_gpu_subdiv_ex(scene, ctx->object, smd, required_mode, false)) {
+      subdiv_cache_cpu_evaluation_settings(ctx, mesh, smd);
+      return result;
+    }
+  }
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(
+      smd, &subdiv_settings, mesh, false);
   if (subdiv == NULL) {
     /* Happens on bad topology, but also on empty input mesh. */
     return result;
@@ -320,12 +307,14 @@ static void deformMatrices(ModifierData *md,
 
   SubsurfModifierData *smd = (SubsurfModifierData *)md;
   SubdivSettings subdiv_settings;
-  subdiv_settings_init(&subdiv_settings, smd, ctx);
+  BKE_subsurf_modifier_subdiv_settings_init(
+      &subdiv_settings, smd, (ctx->flag & MOD_APPLY_RENDER) != 0);
   if (subdiv_settings.level == 0) {
     return;
   }
-  SubsurfRuntimeData *runtime_data = subsurf_ensure_runtime(smd);
-  Subdiv *subdiv = subdiv_descriptor_ensure(smd, &subdiv_settings, mesh);
+  SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd);
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(
+      smd, &subdiv_settings, mesh, false);
   if (subdiv == NULL) {
     /* Happens on bad topology, but also on empty input mesh. */
     return;
diff --git a/source/blender/windowmanager/intern/wm_init_exit.c b/source/blender/windowmanager/intern/wm_init_exit.c
index 2f87e5789fe..957ec7d800d 100644
--- a/source/blender/windowmanager/intern/wm_init_exit.c
+++ b/source/blender/windowmanager/intern/wm_init_exit.c
@@ -562,6 +562,13 @@ void WM_exit_ex(bContext *C, const bool do_python)
 
   BKE_blender_free(); /* blender.c, does entire library and spacetypes */
                       //  BKE_material_copybuf_free();
+
+  /* Free the GPU subdivision data after the database to ensure that subdivision structs used by
+   * the modifiers were garbage collected. */
+  if (opengl_is_init) {
+    DRW_subdiv_free();
+  }
+
   ANIM_fcurves_copybuf_free();
   ANIM_drivers_copybuf_free();
   ANIM_driver_vars_copybuf_free();