OpenSubDiv: add support for an OpenGL evaluator

This evaluator is used in order to evaluate subdivision at render time, allowing for faster renders of meshes with a subdivision surface modifier placed at the last position in the modifier list. When evaluating the subsurf modifier, we detect whether we can delegate evaluation to the draw code. If so, the subdivision is first evaluated on the GPU using our own custom evaluator (only the coarse data needs to be initially sent to the GPU), then, buffers for the final `MeshBufferCache` are filled on the GPU using a set of compute shaders. However, some buffers are still filled on the CPU side, if doing so on the GPU is impractical (e.g. the line adjacency buffer used for x-ray, whose logic is hardly GPU compatible). This is done at the mesh buffer extraction level so that the result can be readily used in the various OpenGL engines, without having to write custom geometry or tesselation shaders. We use our own subdivision evaluation shaders, instead of OpenSubDiv's vanilla one, in order to control the data layout, and interpolation. For example, we store vertex colors as compressed 16-bit integers, while OpenSubDiv's default evaluator only work for float types. In order to still access the modified geometry on the CPU side, for use in modifiers or transform operators, a dedicated wrapper type is added `MESH_WRAPPER_TYPE_SUBD`. Subdivision will be lazily evaluated via `BKE_object_get_evaluated_mesh` which will create such a wrapper if possible. If the final subdivision surface is not needed on the CPU side, `BKE_object_get_evaluated_mesh_no_subsurf` should be used. Enabling or disabling GPU subdivision can be done through the user preferences (under Viewport -> Subdivision). See patch description for benchmarks. Reviewed By: campbellbarton, jbakker, fclem, brecht, #eevee_viewport Differential Revision: https://developer.blender.org/D12406
author: Kévin Dietrich <kevin.dietrich@mailoo.org> 2021-12-27 18:34:47 +0300
committer: Kévin Dietrich <kevin.dietrich@mailoo.org> 2021-12-27 18:35:54 +0300
commit: eed45d2a239a2a18a2420ba15dfb55e0f8dc5630 (patch)
tree: aa55ce966caa8e28db4853d7d755003ed249805b /source/blender
parent: 31e120ef4997583332aa9b5af93521e7e666e9f3 (diff)
84 files changed, 5799 insertions, 200 deletions
diff --git a/source/blender/blenkernel/BKE_mesh_wrapper.h b/source/blender/blenkernel/BKE_mesh_wrapper.h
index 2fe264fd0f7..12e8fd71503 100644
--- a/source/blender/blenkernel/BKE_mesh_wrapper.h
+++ b/source/blender/blenkernel/BKE_mesh_wrapper.h
@@ -22,6 +22,7 @@
 struct BMEditMesh;
 struct CustomData_MeshMasks;
 struct Mesh;
+struct Object;
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,6 +52,8 @@ void BKE_mesh_wrapper_vert_coords_copy_with_mat4(const struct Mesh *me,
                                                  int vert_coords_len,
                                                  const float mat[4][4]);
 
+struct Mesh *BKE_mesh_wrapper_ensure_subdivision(const struct Object *ob, struct Mesh *me);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/blenkernel/BKE_object.h b/source/blender/blenkernel/BKE_object.h
index 03565bd3bda..a7d39598e54 100644
--- a/source/blender/blenkernel/BKE_object.h
+++ b/source/blender/blenkernel/BKE_object.h
@@ -48,6 +48,7 @@ struct RegionView3D;
 struct RigidBodyWorld;
 struct Scene;
 struct ShaderFxData;
+struct SubsurfModifierData;
 struct View3D;
 struct ViewLayer;
 
@@ -512,6 +513,7 @@ bool BKE_object_obdata_texspace_get(struct Object *ob,
                                     float **r_loc,
                                     float **r_size);
 
+struct Mesh *BKE_object_get_evaluated_mesh_no_subsurf(const struct Object *object);
 /** Get evaluated mesh for given object. */
 struct Mesh *BKE_object_get_evaluated_mesh(const struct Object *object);
 /**
@@ -712,6 +714,15 @@ void BKE_object_modifiers_lib_link_common(void *userData,
                                           struct ID **idpoin,
                                           int cb_flag);
 
+/**
+ * Return the last subsurf modifier of an object, this does not check whether modifiers on top of
+ * it are disabled. Return NULL if no such modifier is found.
+ *
+ * This does not check if the modifier is enabled as it is assumed that the caller verified that it
+ * is enabled for its evaluation mode.
+ */
+struct SubsurfModifierData *BKE_object_get_last_subsurf_modifier(const struct Object *ob);
+
 void BKE_object_replace_data_on_shallow_copy(struct Object *ob, struct ID *new_data);
 
 struct PartEff;
diff --git a/source/blender/blenkernel/BKE_subdiv.h b/source/blender/blenkernel/BKE_subdiv.h
index 2fb27fad30d..169a4337f6a 100644
--- a/source/blender/blenkernel/BKE_subdiv.h
+++ b/source/blender/blenkernel/BKE_subdiv.h
@@ -188,7 +188,16 @@ typedef struct Subdiv {
   /* Cached values, are not supposed to be accessed directly. */
   struct {
     /* Indexed by base face index, element indicates total number of ptex
-     * faces created for preceding base faces. */
+     * faces created for preceding base faces. This also stores the final
+     * ptex offset (the total number of PTex faces) at the end of the array
+     * so that algorithms can compute the number of ptex faces for a given
+     * face by computing the delta with the offset for the next face without
+     * using a separate data structure, e.g.:
+     *
+     * const int num_face_ptex_faces = face_ptex_offset[i + 1] - face_ptex_offset[i];
+     *
+     * In total this array has a size of `num base faces + 1`.
+     */
     int *face_ptex_offset;
   } cache_;
 } Subdiv;
@@ -257,6 +266,9 @@ void BKE_subdiv_displacement_detach(Subdiv *subdiv);
 
 /* ============================ TOPOLOGY HELPERS ============================ */
 
+/* For each element in the array, this stores the total number of ptex faces up to that element,
+ * with the total number of ptex faces being the last element in the array. The array is of length
+ * `base face count + 1`. */
 int *BKE_subdiv_face_ptex_offset_get(Subdiv *subdiv);
 
 /* =========================== PTEX FACES AND GRIDS ========================= */
diff --git a/source/blender/blenkernel/BKE_subdiv_eval.h b/source/blender/blenkernel/BKE_subdiv_eval.h
index 0b61e62c89c..177d5f386a8 100644
--- a/source/blender/blenkernel/BKE_subdiv_eval.h
+++ b/source/blender/blenkernel/BKE_subdiv_eval.h
@@ -31,15 +31,25 @@ extern "C" {
 
 struct Mesh;
 struct Subdiv;
+struct OpenSubdiv_EvaluatorCache;
+
+typedef enum eSubdivEvaluatorType {
+  SUBDIV_EVALUATOR_TYPE_CPU,
+  SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE,
+} eSubdivEvaluatorType;
 
 /* Returns true if evaluator is ready for use. */
-bool BKE_subdiv_eval_begin(struct Subdiv *subdiv);
+bool BKE_subdiv_eval_begin(struct Subdiv *subdiv,
+                           eSubdivEvaluatorType evaluator_type,
+                           struct OpenSubdiv_EvaluatorCache *evaluator_cache);
 
 /* coarse_vertex_cos is an optional argument which allows to override coordinates of the coarse
  * mesh. */
 bool BKE_subdiv_eval_begin_from_mesh(struct Subdiv *subdiv,
                                      const struct Mesh *mesh,
-                                     const float (*coarse_vertex_cos)[3]);
+                                     const float (*coarse_vertex_cos)[3],
+                                     eSubdivEvaluatorType evaluator_type,
+                                     struct OpenSubdiv_EvaluatorCache *evaluator_cache);
 bool BKE_subdiv_eval_refine_from_mesh(struct Subdiv *subdiv,
                                       const struct Mesh *mesh,
                                       const float (*coarse_vertex_cos)[3]);
diff --git a/source/blender/blenkernel/BKE_subdiv_foreach.h b/source/blender/blenkernel/BKE_subdiv_foreach.h
index 3f74299455d..f63e23917ef 100644
--- a/source/blender/blenkernel/BKE_subdiv_foreach.h
+++ b/source/blender/blenkernel/BKE_subdiv_foreach.h
@@ -38,7 +38,8 @@ typedef bool (*SubdivForeachTopologyInformationCb)(const struct SubdivForeachCon
                                                    const int num_vertices,
                                                    const int num_edges,
                                                    const int num_loops,
-                                                   const int num_polygons);
+                                                   const int num_polygons,
+                                                   const int *subdiv_polygon_offset);
 
 typedef void (*SubdivForeachVertexFromCornerCb)(const struct SubdivForeachContext *context,
                                                 void *tls,
diff --git a/source/blender/blenkernel/BKE_subdiv_modifier.h b/source/blender/blenkernel/BKE_subdiv_modifier.h
new file mode 100644
index 00000000000..94068613101
--- /dev/null
+++ b/source/blender/blenkernel/BKE_subdiv_modifier.h
@@ -0,0 +1,71 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2021 by Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bke
+ */
+
+#pragma once
+
+#include "BLI_sys_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Mesh;
+struct Object;
+struct Scene;
+struct Subdiv;
+struct SubdivSettings;
+struct SubsurfModifierData;
+
+void BKE_subsurf_modifier_subdiv_settings_init(struct SubdivSettings *settings,
+                                               const struct SubsurfModifierData *smd,
+                                               const bool use_render_params);
+
+/* If skip_check_is_last is true, we assume that the modifier passed is the last enabled modifier
+ * in the stack. */
+bool BKE_subsurf_modifier_can_do_gpu_subdiv_ex(const struct Scene *scene,
+                                               const struct Object *ob,
+                                               const struct SubsurfModifierData *smd,
+                                               int required_mode,
+                                               bool skip_check_is_last);
+
+bool BKE_subsurf_modifier_can_do_gpu_subdiv(const struct Scene *scene,
+                                            const struct Object *ob,
+                                            const int required_mode);
+
+extern void (*BKE_subsurf_modifier_free_gpu_cache_cb)(struct Subdiv *subdiv);
+
+struct Subdiv *BKE_subsurf_modifier_subdiv_descriptor_ensure(
+    const struct SubsurfModifierData *smd,
+    const struct SubdivSettings *subdiv_settings,
+    const struct Mesh *mesh,
+    const bool for_draw_code);
+
+struct SubsurfRuntimeData *BKE_subsurf_modifier_ensure_runtime(struct SubsurfModifierData *smd);
+
+/* Return the #ModifierMode required for the evaluation of the subsurf modifier, which should be
+ * used to check if the modifier is enabled. */
+int BKE_subsurf_modifier_eval_required_mode(bool is_final_render, bool is_edit_mode);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt
index fe33abd17c0..3c780a933d3 100644
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@@ -275,6 +275,7 @@ set(SRC
   intern/subdiv_eval.c
   intern/subdiv_foreach.c
   intern/subdiv_mesh.c
+  intern/subdiv_modifier.c
   intern/subdiv_stats.c
   intern/subdiv_topology.c
   intern/subsurf_ccg.c
@@ -453,6 +454,7 @@ set(SRC
   BKE_subdiv_eval.h
   BKE_subdiv_foreach.h
   BKE_subdiv_mesh.h
+  BKE_subdiv_modifier.h
   BKE_subdiv_topology.h
   BKE_subsurf.h
   BKE_text.h
diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc
index da5b4ccc764..47ea55be871 100644
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@@ -319,6 +319,7 @@ void BKE_mesh_ensure_normals(Mesh *mesh)
 void BKE_mesh_ensure_normals_for_display(Mesh *mesh)
 {
   switch ((eMeshWrapperType)mesh->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       /* Run code below. */
       break;
diff --git a/source/blender/blenkernel/intern/mesh_wrapper.c b/source/blender/blenkernel/intern/mesh_wrapper.c
index bc1ffeb8cf4..5956f2802b5 100644
--- a/source/blender/blenkernel/intern/mesh_wrapper.c
+++ b/source/blender/blenkernel/intern/mesh_wrapper.c
@@ -36,6 +36,7 @@
 
 #include "DNA_mesh_types.h"
 #include "DNA_meshdata_types.h"
+#include "DNA_modifier_types.h"
 #include "DNA_object_types.h"
 
 #include "BLI_ghash.h"
@@ -50,8 +51,14 @@
 #include "BKE_mesh.h"
 #include "BKE_mesh_runtime.h"
 #include "BKE_mesh_wrapper.h"
+#include "BKE_modifier.h"
+#include "BKE_object.h"
+#include "BKE_subdiv.h"
+#include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
 
 #include "DEG_depsgraph.h"
+#include "DEG_depsgraph_query.h"
 
 Mesh *BKE_mesh_wrapper_from_editmesh_with_coords(BMEditMesh *em,
                                                  const CustomData_MeshMasks *cd_mask_extra,
@@ -106,7 +113,8 @@ static void mesh_wrapper_ensure_mdata_isolated(void *userdata)
   me->runtime.wrapper_type = ME_WRAPPER_TYPE_MDATA;
 
   switch (geom_type_orig) {
-    case ME_WRAPPER_TYPE_MDATA: {
+    case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD: {
       break; /* Quiet warning. */
     }
     case ME_WRAPPER_TYPE_BMESH: {
@@ -157,6 +165,7 @@ bool BKE_mesh_wrapper_minmax(const Mesh *me, float min[3], float max[3])
     case ME_WRAPPER_TYPE_BMESH:
       return BKE_editmesh_cache_calc_minmax(me->edit_mesh, me->runtime.edit_data, min, max);
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return BKE_mesh_minmax(me, min, max);
   }
   BLI_assert_unreachable();
@@ -191,7 +200,8 @@ void BKE_mesh_wrapper_vert_coords_copy(const Mesh *me,
       }
       return;
     }
-    case ME_WRAPPER_TYPE_MDATA: {
+    case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD: {
       BLI_assert(vert_coords_len <= me->totvert);
       const MVert *mvert = me->mvert;
       for (int i = 0; i < vert_coords_len; i++) {
@@ -228,7 +238,8 @@ void BKE_mesh_wrapper_vert_coords_copy_with_mat4(const Mesh *me,
       }
       return;
     }
-    case ME_WRAPPER_TYPE_MDATA: {
+    case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD: {
       BLI_assert(vert_coords_len == me->totvert);
       const MVert *mvert = me->mvert;
       for (int i = 0; i < vert_coords_len; i++) {
@@ -252,6 +263,7 @@ int BKE_mesh_wrapper_vert_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totvert;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totvert;
   }
   BLI_assert_unreachable();
@@ -264,6 +276,7 @@ int BKE_mesh_wrapper_edge_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totedge;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totedge;
   }
   BLI_assert_unreachable();
@@ -276,6 +289,7 @@ int BKE_mesh_wrapper_loop_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totloop;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totloop;
   }
   BLI_assert_unreachable();
@@ -288,6 +302,7 @@ int BKE_mesh_wrapper_poly_len(const Mesh *me)
     case ME_WRAPPER_TYPE_BMESH:
       return me->edit_mesh->bm->totface;
     case ME_WRAPPER_TYPE_MDATA:
+    case ME_WRAPPER_TYPE_SUBD:
       return me->totpoly;
   }
   BLI_assert_unreachable();
@@ -295,3 +310,67 @@ int BKE_mesh_wrapper_poly_len(const Mesh *me)
 }
 
 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name CPU Subdivision Evaluation
+ * \{ */
+
+Mesh *BKE_mesh_wrapper_ensure_subdivision(const Object *ob, Mesh *me)
+{
+  ThreadMutex *mesh_eval_mutex = (ThreadMutex *)me->runtime.eval_mutex;
+  BLI_mutex_lock(mesh_eval_mutex);
+
+  if (me->runtime.wrapper_type == ME_WRAPPER_TYPE_SUBD) {
+    BLI_mutex_unlock(mesh_eval_mutex);
+    return me->runtime.mesh_eval;
+  }
+
+  SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob);
+  if (!smd) {
+    BLI_mutex_unlock(mesh_eval_mutex);
+    return me;
+  }
+
+  const bool apply_render = me->runtime.subsurf_apply_render;
+
+  SubdivSettings subdiv_settings;
+  BKE_subsurf_modifier_subdiv_settings_init(&subdiv_settings, smd, apply_render);
+  if (subdiv_settings.level == 0) {
+    return me;
+  }
+
+  SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd);
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &subdiv_settings, me, false);
+  if (subdiv == NULL) {
+    /* Happens on bad topology, but also on empty input mesh. */
+    return me;
+  }
+
+  SubdivToMeshSettings mesh_settings;
+  mesh_settings.resolution = me->runtime.subsurf_resolution;
+  mesh_settings.use_optimal_display = me->runtime.subsurf_use_optimal_display;
+
+  if (mesh_settings.resolution < 3) {
+    return me;
+  }
+
+  Mesh *subdiv_mesh = BKE_subdiv_to_mesh(subdiv, &mesh_settings, me);
+
+  if (subdiv != runtime_data->subdiv) {
+    BKE_subdiv_free(subdiv);
+  }
+
+  if (subdiv_mesh != me) {
+    if (me->runtime.mesh_eval != NULL) {
+      BKE_id_free(NULL, me->runtime.mesh_eval);
+    }
+    me->runtime.mesh_eval = subdiv_mesh;
+    me->runtime.wrapper_type = ME_WRAPPER_TYPE_SUBD;
+  }
+
+  BLI_mutex_unlock(mesh_eval_mutex);
+  return me->runtime.mesh_eval;
+}
+
+/** \} */
diff --git a/source/blender/blenkernel/intern/modifier.c b/source/blender/blenkernel/intern/modifier.c
index e1d201d7806..f3b6c2544bf 100644
--- a/source/blender/blenkernel/intern/modifier.c
+++ b/source/blender/blenkernel/intern/modifier.c
@@ -970,6 +970,7 @@ static void modwrap_dependsOnNormals(Mesh *me)
       }
       break;
     }
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       BKE_mesh_calc_normals(me);
       break;
diff --git a/source/blender/blenkernel/intern/multires_reshape_smooth.c b/source/blender/blenkernel/intern/multires_reshape_smooth.c
index 3665d01926b..50b4410a28e 100644
--- a/source/blender/blenkernel/intern/multires_reshape_smooth.c
+++ b/source/blender/blenkernel/intern/multires_reshape_smooth.c
@@ -566,7 +566,8 @@ static bool foreach_topology_info(const SubdivForeachContext *foreach_context,
                                   const int num_vertices,
                                   const int num_edges,
                                   const int num_loops,
-                                  const int num_polygons)
+                                  const int num_polygons,
+                                  const int *UNUSED(subdiv_polygon_offset))
 {
   MultiresReshapeSmoothContext *reshape_smooth_context = foreach_context->user_data;
   const int max_edges = reshape_smooth_context->smoothing_type == MULTIRES_SUBDIVIDE_LINEAR ?
@@ -1037,7 +1038,7 @@ static void reshape_subdiv_create(MultiresReshapeSmoothContext *reshape_smooth_c
   converter_init(reshape_smooth_context, &converter);
 
   Subdiv *reshape_subdiv = BKE_subdiv_new_from_converter(settings, &converter);
-  BKE_subdiv_eval_begin(reshape_subdiv);
+  BKE_subdiv_eval_begin(reshape_subdiv, SUBDIV_EVALUATOR_TYPE_CPU, NULL);
 
   reshape_smooth_context->reshape_subdiv = reshape_subdiv;
 
diff --git a/source/blender/blenkernel/intern/multires_reshape_util.c b/source/blender/blenkernel/intern/multires_reshape_util.c
index b7572204182..07a5d7c4a61 100644
--- a/source/blender/blenkernel/intern/multires_reshape_util.c
+++ b/source/blender/blenkernel/intern/multires_reshape_util.c
@@ -65,7 +65,7 @@ Subdiv *multires_reshape_create_subdiv(Depsgraph *depsgraph,
   SubdivSettings subdiv_settings;
   BKE_multires_subdiv_settings_init(&subdiv_settings, mmd);
   Subdiv *subdiv = BKE_subdiv_new_from_mesh(&subdiv_settings, base_mesh);
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     BKE_subdiv_free(subdiv);
     return NULL;
   }
diff --git a/source/blender/blenkernel/intern/multires_reshape_vertcos.c b/source/blender/blenkernel/intern/multires_reshape_vertcos.c
index ed2df1ba8c5..c009349ff1b 100644
--- a/source/blender/blenkernel/intern/multires_reshape_vertcos.c
+++ b/source/blender/blenkernel/intern/multires_reshape_vertcos.c
@@ -114,7 +114,8 @@ static bool multires_reshape_vertcos_foreach_topology_info(
     const int num_vertices,
     const int UNUSED(num_edges),
     const int UNUSED(num_loops),
-    const int UNUSED(num_polygons))
+    const int UNUSED(num_polygons),
+    const int *UNUSED(subdiv_polygon_offset))
 {
   MultiresReshapeAssignVertcosContext *reshape_vertcos_context = foreach_context->user_data;
   if (num_vertices != reshape_vertcos_context->num_vert_coords) {
diff --git a/source/blender/blenkernel/intern/multires_versioning.c b/source/blender/blenkernel/intern/multires_versioning.c
index 4c0d7165cd0..18708c43f26 100644
--- a/source/blender/blenkernel/intern/multires_versioning.c
+++ b/source/blender/blenkernel/intern/multires_versioning.c
@@ -61,7 +61,7 @@ static Subdiv *subdiv_for_simple_to_catmull_clark(Object *object, MultiresModifi
   Subdiv *subdiv = BKE_subdiv_new_from_converter(&subdiv_settings, &converter);
   BKE_subdiv_converter_free(&converter);
 
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     BKE_subdiv_free(subdiv);
     return NULL;
   }
diff --git a/source/blender/blenkernel/intern/object.cc b/source/blender/blenkernel/intern/object.cc
index 6cc6219b7d7..d08ea74d2c6 100644
--- a/source/blender/blenkernel/intern/object.cc
+++ b/source/blender/blenkernel/intern/object.cc
@@ -1773,8 +1773,9 @@ static void object_update_from_subsurf_ccg(Object *object)
   if (!object->runtime.is_data_eval_owned) {
     return;
   }
-  /* Object was never evaluated, so can not have CCG subdivision surface. */
-  Mesh *mesh_eval = BKE_object_get_evaluated_mesh(object);
+  /* Object was never evaluated, so can not have CCG subdivision surface. If it were evaluated, do
+   * not try to compute OpenSubDiv on the CPU as it is not needed here. */
+  Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(object);
   if (mesh_eval == nullptr) {
     return;
   }
@@ -4496,7 +4497,7 @@ bool BKE_object_obdata_texspace_get(Object *ob, char **r_texflag, float **r_loc,
   return true;
 }
 
-Mesh *BKE_object_get_evaluated_mesh(const Object *object)
+Mesh *BKE_object_get_evaluated_mesh_no_subsurf(const Object *object)
 {
   /* First attempt to retrieve the evaluated mesh from the evaluated geometry set. Most
    * object types either store it there or add a reference to it if it's owned elsewhere. */
@@ -4523,6 +4524,20 @@ Mesh *BKE_object_get_evaluated_mesh(const Object *object)
   return nullptr;
 }
 
+Mesh *BKE_object_get_evaluated_mesh(const Object *object)
+{
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(object);
+  if (!mesh) {
+    return nullptr;
+  }
+
+  if (object->data && GS(((const ID *)object->data)->name) == ID_ME) {
+    mesh = BKE_mesh_wrapper_ensure_subdivision(object, mesh);
+  }
+
+  return mesh;
+}
+
 Mesh *BKE_object_get_pre_modified_mesh(const Object *object)
 {
   if (object->type == OB_MESH && object->runtime.data_orig != nullptr) {
@@ -5779,6 +5794,21 @@ void BKE_object_modifiers_lib_link_common(void *userData,
   }
 }
 
+SubsurfModifierData *BKE_object_get_last_subsurf_modifier(const Object *ob)
+{
+  ModifierData *md = (ModifierData *)(ob->modifiers.last);
+
+  while (md) {
+    if (md->type == eModifierType_Subsurf) {
+      break;
+    }
+
+    md = md->prev;
+  }
+
+  return (SubsurfModifierData *)(md);
+}
+
 void BKE_object_replace_data_on_shallow_copy(Object *ob, ID *new_data)
 {
   ob->type = BKE_object_obdata_to_type(new_data);
diff --git a/source/blender/blenkernel/intern/subdiv.c b/source/blender/blenkernel/intern/subdiv.c
index fd32f52351a..45810e29565 100644
--- a/source/blender/blenkernel/intern/subdiv.c
+++ b/source/blender/blenkernel/intern/subdiv.c
@@ -29,6 +29,9 @@
 
 #include "BLI_utildefines.h"
 
+#include "BKE_modifier.h"
+#include "BKE_subdiv_modifier.h"
+
 #include "MEM_guardedalloc.h"
 
 #include "subdiv_converter.h"
@@ -189,6 +192,12 @@ Subdiv *BKE_subdiv_update_from_mesh(Subdiv *subdiv,
 void BKE_subdiv_free(Subdiv *subdiv)
 {
   if (subdiv->evaluator != NULL) {
+    const eOpenSubdivEvaluator evaluator_type = subdiv->evaluator->type;
+    if (evaluator_type != OPENSUBDIV_EVALUATOR_CPU) {
+      /* Let the draw code do the freeing, to ensure that the OpenGL context is valid. */
+      BKE_subsurf_modifier_free_gpu_cache_cb(subdiv);
+      return;
+    }
     openSubdiv_deleteEvaluator(subdiv->evaluator);
   }
   if (subdiv->topology_refiner != NULL) {
@@ -214,12 +223,13 @@ int *BKE_subdiv_face_ptex_offset_get(Subdiv *subdiv)
   }
   const int num_coarse_faces = topology_refiner->getNumFaces(topology_refiner);
   subdiv->cache_.face_ptex_offset = MEM_malloc_arrayN(
-      num_coarse_faces, sizeof(int), "subdiv face_ptex_offset");
+      num_coarse_faces + 1, sizeof(int), "subdiv face_ptex_offset");
   int ptex_offset = 0;
   for (int face_index = 0; face_index < num_coarse_faces; face_index++) {
     const int num_ptex_faces = topology_refiner->getNumFacePtexFaces(topology_refiner, face_index);
     subdiv->cache_.face_ptex_offset[face_index] = ptex_offset;
     ptex_offset += num_ptex_faces;
   }
+  subdiv->cache_.face_ptex_offset[num_coarse_faces] = ptex_offset;
   return subdiv->cache_.face_ptex_offset;
 }
diff --git a/source/blender/blenkernel/intern/subdiv_ccg.c b/source/blender/blenkernel/intern/subdiv_ccg.c
index 77962ec924c..7d876acf776 100644
--- a/source/blender/blenkernel/intern/subdiv_ccg.c
+++ b/source/blender/blenkernel/intern/subdiv_ccg.c
@@ -603,7 +603,8 @@ Mesh *BKE_subdiv_to_ccg_mesh(Subdiv *subdiv,
 {
   /* Make sure evaluator is ready. */
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_CCG);
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, coarse_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     if (coarse_mesh->totpoly) {
       return NULL;
     }
diff --git a/source/blender/blenkernel/intern/subdiv_deform.c b/source/blender/blenkernel/intern/subdiv_deform.c
index 7a2d639e4e5..c385b1b291d 100644
--- a/source/blender/blenkernel/intern/subdiv_deform.c
+++ b/source/blender/blenkernel/intern/subdiv_deform.c
@@ -117,7 +117,8 @@ static bool subdiv_mesh_topology_info(const SubdivForeachContext *foreach_contex
                                       const int UNUSED(num_vertices),
                                       const int UNUSED(num_edges),
                                       const int UNUSED(num_loops),
-                                      const int UNUSED(num_polygons))
+                                      const int UNUSED(num_polygons),
+                                      const int *UNUSED(subdiv_polygon_offset))
 {
   SubdivDeformContext *subdiv_context = foreach_context->user_data;
   subdiv_mesh_prepare_accumulator(subdiv_context, subdiv_context->coarse_mesh->totvert);
@@ -202,7 +203,8 @@ void BKE_subdiv_deform_coarse_vertices(struct Subdiv *subdiv,
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_MESH);
   /* Make sure evaluator is up to date with possible new topology, and that
    * is refined for the new positions of coarse vertices. */
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, vertex_cos)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, coarse_mesh, vertex_cos, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     /* This could happen in two situations:
      * - OpenSubdiv is disabled.
      * - Something totally bad happened, and OpenSubdiv rejected our
diff --git a/source/blender/blenkernel/intern/subdiv_eval.c b/source/blender/blenkernel/intern/subdiv_eval.c
index 0001eb8a205..9733a1498a6 100644
--- a/source/blender/blenkernel/intern/subdiv_eval.c
+++ b/source/blender/blenkernel/intern/subdiv_eval.c
@@ -28,6 +28,7 @@
 
 #include "BLI_bitmap.h"
 #include "BLI_math_vector.h"
+#include "BLI_task.h"
 #include "BLI_utildefines.h"
 
 #include "BKE_customdata.h"
@@ -38,7 +39,28 @@
 #include "opensubdiv_evaluator_capi.h"
 #include "opensubdiv_topology_refiner_capi.h"
 
-bool BKE_subdiv_eval_begin(Subdiv *subdiv)
+/* ============================  Helper Function ============================ */
+
+static eOpenSubdivEvaluator opensubdiv_evalutor_from_subdiv_evaluator_type(
+    eSubdivEvaluatorType evaluator_type)
+{
+  switch (evaluator_type) {
+    case SUBDIV_EVALUATOR_TYPE_CPU: {
+      return OPENSUBDIV_EVALUATOR_CPU;
+    }
+    case SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE: {
+      return OPENSUBDIV_EVALUATOR_GLSL_COMPUTE;
+    }
+  }
+  BLI_assert_msg(0, "Unknown evaluator type");
+  return OPENSUBDIV_EVALUATOR_CPU;
+}
+
+/* ======================  Main Subdivision Evaluation ====================== */
+
+bool BKE_subdiv_eval_begin(Subdiv *subdiv,
+                           eSubdivEvaluatorType evaluator_type,
+                           OpenSubdiv_EvaluatorCache *evaluator_cache)
 {
   BKE_subdiv_stats_reset(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE);
   if (subdiv->topology_refiner == NULL) {
@@ -47,8 +69,11 @@ bool BKE_subdiv_eval_begin(Subdiv *subdiv)
     return false;
   }
   if (subdiv->evaluator == NULL) {
+    eOpenSubdivEvaluator opensubdiv_evaluator_type =
+        opensubdiv_evalutor_from_subdiv_evaluator_type(evaluator_type);
     BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE);
-    subdiv->evaluator = openSubdiv_createEvaluatorFromTopologyRefiner(subdiv->topology_refiner);
+    subdiv->evaluator = openSubdiv_createEvaluatorFromTopologyRefiner(
+        subdiv->topology_refiner, opensubdiv_evaluator_type, evaluator_cache);
     BKE_subdiv_stats_end(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE);
     if (subdiv->evaluator == NULL) {
       return false;
@@ -80,6 +105,9 @@ static void set_coarse_positions(Subdiv *subdiv,
       BLI_BITMAP_ENABLE(vertex_used_map, loop->v);
     }
   }
+  /* Use a temporary buffer so we do not upload vertices one at a time to the GPU. */
+  float(*buffer)[3] = MEM_mallocN(sizeof(float[3]) * mesh->totvert, "subdiv tmp coarse positions");
+  int manifold_vertex_count = 0;
   for (int vertex_index = 0, manifold_vertex_index = 0; vertex_index < mesh->totvert;
        vertex_index++) {
     if (!BLI_BITMAP_TEST_BOOL(vertex_used_map, vertex_index)) {
@@ -93,13 +121,49 @@ static void set_coarse_positions(Subdiv *subdiv,
       const MVert *vertex = &mvert[vertex_index];
       vertex_co = vertex->co;
     }
-    subdiv->evaluator->setCoarsePositions(subdiv->evaluator, vertex_co, manifold_vertex_index, 1);
+    copy_v3_v3(&buffer[manifold_vertex_index][0], vertex_co);
     manifold_vertex_index++;
+    manifold_vertex_count++;
   }
+  subdiv->evaluator->setCoarsePositions(
+      subdiv->evaluator, &buffer[0][0], 0, manifold_vertex_count);
   MEM_freeN(vertex_used_map);
+  MEM_freeN(buffer);
+}
+
+/* Context which is used to fill face varying data in parallel. */
+typedef struct FaceVaryingDataFromUVContext {
+  OpenSubdiv_TopologyRefiner *topology_refiner;
+  const Mesh *mesh;
+  const MLoopUV *mloopuv;
+  float (*buffer)[2];
+  int layer_index;
+} FaceVaryingDataFromUVContext;
+
+static void set_face_varying_data_from_uv_task(void *__restrict userdata,
+                                               const int face_index,
+                                               const TaskParallelTLS *__restrict UNUSED(tls))
+{
+  FaceVaryingDataFromUVContext *ctx = userdata;
+  OpenSubdiv_TopologyRefiner *topology_refiner = ctx->topology_refiner;
+  const int layer_index = ctx->layer_index;
+  const Mesh *mesh = ctx->mesh;
+  const MPoly *mpoly = &mesh->mpoly[face_index];
+  const MLoopUV *mluv = &ctx->mloopuv[mpoly->loopstart];
+
+  /* TODO(sergey): OpenSubdiv's C-API converter can change winding of
+   * loops of a face, need to watch for that, to prevent wrong UVs assigned.
+   */
+  const int num_face_vertices = topology_refiner->getNumFaceVertices(topology_refiner, face_index);
+  const int *uv_indices = topology_refiner->getFaceFVarValueIndices(
+      topology_refiner, face_index, layer_index);
+  for (int vertex_index = 0; vertex_index < num_face_vertices; vertex_index++, mluv++) {
+    copy_v2_v2(ctx->buffer[uv_indices[vertex_index]], mluv->uv);
+  }
 }
 
 static void set_face_varying_data_from_uv(Subdiv *subdiv,
+                                          const Mesh *mesh,
                                           const MLoopUV *mloopuv,
                                           const int layer_index)
 {
@@ -107,25 +171,37 @@ static void set_face_varying_data_from_uv(Subdiv *subdiv,
   OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
   const int num_faces = topology_refiner->getNumFaces(topology_refiner);
   const MLoopUV *mluv = mloopuv;
-  /* TODO(sergey): OpenSubdiv's C-API converter can change winding of
-   * loops of a face, need to watch for that, to prevent wrong UVs assigned.
-   */
-  for (int face_index = 0; face_index < num_faces; face_index++) {
-    const int num_face_vertices = topology_refiner->getNumFaceVertices(topology_refiner,
-                                                                       face_index);
-    const int *uv_indices = topology_refiner->getFaceFVarValueIndices(
-        topology_refiner, face_index, layer_index);
-    for (int vertex_index = 0; vertex_index < num_face_vertices; vertex_index++, mluv++) {
-      evaluator->setFaceVaryingData(evaluator, layer_index, mluv->uv, uv_indices[vertex_index], 1);
-    }
-  }
+
+  const int num_fvar_values = topology_refiner->getNumFVarValues(topology_refiner, layer_index);
+  /* Use a temporary buffer so we do not upload UVs one at a time to the GPU. */
+  float(*buffer)[2] = MEM_mallocN(sizeof(float[2]) * num_fvar_values, "temp UV storage");
+
+  FaceVaryingDataFromUVContext ctx;
+  ctx.topology_refiner = topology_refiner;
+  ctx.layer_index = layer_index;
+  ctx.mloopuv = mluv;
+  ctx.mesh = mesh;
+  ctx.buffer = buffer;
+
+  TaskParallelSettings parallel_range_settings;
+  BLI_parallel_range_settings_defaults(&parallel_range_settings);
+  parallel_range_settings.min_iter_per_thread = 1;
+
+  BLI_task_parallel_range(
+      0, num_faces, &ctx, set_face_varying_data_from_uv_task, &parallel_range_settings);
+
+  evaluator->setFaceVaryingData(evaluator, layer_index, &buffer[0][0], 0, num_fvar_values);
+
+  MEM_freeN(buffer);
 }
 
 bool BKE_subdiv_eval_begin_from_mesh(Subdiv *subdiv,
                                      const Mesh *mesh,
-                                     const float (*coarse_vertex_cos)[3])
+                                     const float (*coarse_vertex_cos)[3],
+                                     eSubdivEvaluatorType evaluator_type,
+                                     OpenSubdiv_EvaluatorCache *evaluator_cache)
 {
-  if (!BKE_subdiv_eval_begin(subdiv)) {
+  if (!BKE_subdiv_eval_begin(subdiv, evaluator_type, evaluator_cache)) {
     return false;
   }
   return BKE_subdiv_eval_refine_from_mesh(subdiv, mesh, coarse_vertex_cos);
@@ -146,7 +222,7 @@ bool BKE_subdiv_eval_refine_from_mesh(Subdiv *subdiv,
   const int num_uv_layers = CustomData_number_of_layers(&mesh->ldata, CD_MLOOPUV);
   for (int layer_index = 0; layer_index < num_uv_layers; layer_index++) {
     const MLoopUV *mloopuv = CustomData_get_layer_n(&mesh->ldata, CD_MLOOPUV, layer_index);
-    set_face_varying_data_from_uv(subdiv, mloopuv, layer_index);
+    set_face_varying_data_from_uv(subdiv, mesh, mloopuv, layer_index);
   }
   /* Update evaluator to the new coarse geometry. */
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_EVALUATOR_REFINE);
diff --git a/source/blender/blenkernel/intern/subdiv_foreach.c b/source/blender/blenkernel/intern/subdiv_foreach.c
index 061c196df2a..69bead27fe6 100644
--- a/source/blender/blenkernel/intern/subdiv_foreach.c
+++ b/source/blender/blenkernel/intern/subdiv_foreach.c
@@ -1877,7 +1877,8 @@ bool BKE_subdiv_foreach_subdiv_geometry(Subdiv *subdiv,
                                 ctx.num_subdiv_vertices,
                                 ctx.num_subdiv_edges,
                                 ctx.num_subdiv_loops,
-                                ctx.num_subdiv_polygons)) {
+                                ctx.num_subdiv_polygons,
+                                ctx.subdiv_polygon_offset)) {
       subdiv_foreach_ctx_free(&ctx);
       return false;
     }
diff --git a/source/blender/blenkernel/intern/subdiv_mesh.c b/source/blender/blenkernel/intern/subdiv_mesh.c
index e5c7d13edab..1f31d0543ad 100644
--- a/source/blender/blenkernel/intern/subdiv_mesh.c
+++ b/source/blender/blenkernel/intern/subdiv_mesh.c
@@ -514,7 +514,8 @@ static bool subdiv_mesh_topology_info(const SubdivForeachContext *foreach_contex
                                       const int num_vertices,
                                       const int num_edges,
                                       const int num_loops,
-                                      const int num_polygons)
+                                      const int num_polygons,
+                                      const int *UNUSED(subdiv_polygon_offset))
 {
   /* Multires grid data will be applied or become invalid after subdivision,
    * so don't try to preserve it and use memory. */
@@ -1193,7 +1194,8 @@ Mesh *BKE_subdiv_to_mesh(Subdiv *subdiv,
   BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_MESH);
   /* Make sure evaluator is up to date with possible new topology, and that
    * it is refined for the new positions of coarse vertices. */
-  if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, NULL)) {
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, coarse_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) {
     /* This could happen in two situations:
      * - OpenSubdiv is disabled.
      * - Something totally bad happened, and OpenSubdiv rejected our
diff --git a/source/blender/blenkernel/intern/subdiv_modifier.c b/source/blender/blenkernel/intern/subdiv_modifier.c
new file mode 100644
index 00000000000..bafcb631f59
--- /dev/null
+++ b/source/blender/blenkernel/intern/subdiv_modifier.c
@@ -0,0 +1,162 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2021 by Blender Foundation.
+ * All rights reserved.
+ */
+
+#include "BKE_subdiv_modifier.h"
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_mesh_types.h"
+#include "DNA_modifier_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+#include "DNA_userdef_types.h"
+
+#include "BKE_modifier.h"
+#include "BKE_subdiv.h"
+
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
+
+#include "opensubdiv_capi.h"
+
+void BKE_subsurf_modifier_subdiv_settings_init(SubdivSettings *settings,
+                                               const SubsurfModifierData *smd,
+                                               const bool use_render_params)
+{
+  const int requested_levels = (use_render_params) ? smd->renderLevels : smd->levels;
+
+  settings->is_simple = (smd->subdivType == SUBSURF_TYPE_SIMPLE);
+  settings->is_adaptive = !(smd->flags & eSubsurfModifierFlag_UseRecursiveSubdivision);
+  settings->level = settings->is_simple ?
+                        1 :
+                        (settings->is_adaptive ? smd->quality : requested_levels);
+  settings->use_creases = (smd->flags & eSubsurfModifierFlag_UseCrease);
+  settings->vtx_boundary_interpolation = BKE_subdiv_vtx_boundary_interpolation_from_subsurf(
+      smd->boundary_smooth);
+  settings->fvar_linear_interpolation = BKE_subdiv_fvar_interpolation_from_uv_smooth(
+      smd->uv_smooth);
+}
+
+static ModifierData *modifier_get_last_enabled_for_mode(const Scene *scene,
+                                                        const Object *ob,
+                                                        int required_mode)
+{
+  ModifierData *md = ob->modifiers.last;
+
+  while (md) {
+    if (BKE_modifier_is_enabled(scene, md, required_mode)) {
+      break;
+    }
+
+    md = md->prev;
+  }
+
+  return md;
+}
+
+bool BKE_subsurf_modifier_can_do_gpu_subdiv_ex(const Scene *scene,
+                                               const Object *ob,
+                                               const SubsurfModifierData *smd,
+                                               int required_mode,
+                                               bool skip_check_is_last)
+{
+  if ((U.gpu_flag & USER_GPU_FLAG_SUBDIVISION_EVALUATION) == 0) {
+    return false;
+  }
+
+  if (!skip_check_is_last) {
+    ModifierData *md = modifier_get_last_enabled_for_mode(scene, ob, required_mode);
+    if (md != (const ModifierData *)smd) {
+      return false;
+    }
+  }
+
+  /* Only OpenGL is supported for OpenSubdiv evaluation for now. */
+  if (GPU_backend_get_type() != GPU_BACKEND_OPENGL) {
+    return false;
+  }
+
+  if (!GPU_compute_shader_support()) {
+    return false;
+  }
+
+  const int available_evaluators = openSubdiv_getAvailableEvaluators();
+  if ((available_evaluators & OPENSUBDIV_EVALUATOR_GLSL_COMPUTE) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+bool BKE_subsurf_modifier_can_do_gpu_subdiv(const Scene *scene,
+                                            const Object *ob,
+                                            int required_mode)
+{
+  ModifierData *md = modifier_get_last_enabled_for_mode(scene, ob, required_mode);
+
+  if (!md) {
+    return false;
+  }
+
+  if (md->type != eModifierType_Subsurf) {
+    return false;
+  }
+
+  return BKE_subsurf_modifier_can_do_gpu_subdiv_ex(
+      scene, ob, (SubsurfModifierData *)md, required_mode, true);
+}
+
+void (*BKE_subsurf_modifier_free_gpu_cache_cb)(Subdiv *subdiv) = NULL;
+
+/* Main goal of this function is to give usable subdivision surface descriptor
+ * which matches settings and topology. */
+Subdiv *BKE_subsurf_modifier_subdiv_descriptor_ensure(const SubsurfModifierData *smd,
+                                                      const SubdivSettings *subdiv_settings,
+                                                      const Mesh *mesh,
+                                                      const bool for_draw_code)
+{
+  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
+  if (runtime_data->subdiv && runtime_data->set_by_draw_code != for_draw_code) {
+    BKE_subdiv_free(runtime_data->subdiv);
+    runtime_data->subdiv = NULL;
+  }
+  Subdiv *subdiv = BKE_subdiv_update_from_mesh(runtime_data->subdiv, subdiv_settings, mesh);
+  runtime_data->subdiv = subdiv;
+  runtime_data->set_by_draw_code = for_draw_code;
+  return subdiv;
+}
+
+SubsurfRuntimeData *BKE_subsurf_modifier_ensure_runtime(SubsurfModifierData *smd)
+{
+  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
+  if (runtime_data == NULL) {
+    runtime_data = MEM_callocN(sizeof(*runtime_data), "subsurf runtime");
+    smd->modifier.runtime = runtime_data;
+  }
+  return runtime_data;
+}
+
+int BKE_subsurf_modifier_eval_required_mode(bool is_final_render, bool is_edit_mode)
+{
+  if (is_final_render) {
+    return eModifierMode_Render;
+  }
+
+  return eModifierMode_Realtime | (is_edit_mode ? eModifierMode_Editmode : 0);
+}
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 821b6025fff..eea3adc440a 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -44,9 +44,11 @@ set(INC
   ../../../intern/atomic
   ../../../intern/glew-mx
   ../../../intern/guardedalloc
+  ../../../intern/opensubdiv
 
   # dna_type_offsets.h
   ${CMAKE_CURRENT_BINARY_DIR}/../makesdna/intern
+  ${OPENSUBDIV_INCLUDE_DIRS}
 )
 
 set(SRC
@@ -91,6 +93,7 @@ set(SRC
   intern/draw_cache_impl_metaball.c
   intern/draw_cache_impl_particles.c
   intern/draw_cache_impl_pointcloud.c
+  intern/draw_cache_impl_subdivision.cc
   intern/draw_cache_impl_volume.c
   intern/draw_color_management.cc
   intern/draw_common.c
@@ -209,6 +212,7 @@ set(SRC
   intern/draw_manager_testing.h
   intern/draw_manager_text.h
   intern/draw_shader.h
+  intern/draw_subdivision.h
   intern/draw_texture_pool.h
   intern/draw_view.h
   intern/draw_view_data.h
@@ -372,6 +376,18 @@ data_to_c_simple(intern/shaders/common_view_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_fxaa_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_smaa_lib.glsl SRC)
 data_to_c_simple(intern/shaders/common_fullscreen_vert.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_custom_data_interp_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_ibo_lines_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_ibo_tris_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_lib.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_normals_accumulate_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_normals_finalize_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_patch_evaluation_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_lnor_comp.glsl SRC)
+data_to_c_simple(intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl SRC)
 
 data_to_c_simple(engines/gpencil/shaders/gpencil_frag.glsl SRC)
 data_to_c_simple(engines/gpencil/shaders/gpencil_vert.glsl SRC)
diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h
index 98e166ac3a7..132f66ecb1e 100644
--- a/source/blender/draw/DRW_engine.h
+++ b/source/blender/draw/DRW_engine.h
@@ -191,6 +191,10 @@ void DRW_xr_drawing_end(void);
 
 /* For garbage collection */
 void DRW_cache_free_old_batches(struct Main *bmain);
+void DRW_cache_free_old_subdiv(void);
+
+/* For the OpenGL evaluators and garbage collected subdivision data. */
+void DRW_subdiv_free(void);
 
 /* Never use this. Only for closing blender. */
 void DRW_opengl_context_enable_ex(bool restore);
diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c
index 2345a110134..a754e81b949 100644
--- a/source/blender/draw/engines/overlay/overlay_armature.c
+++ b/source/blender/draw/engines/overlay/overlay_armature.c
@@ -589,7 +589,7 @@ static void drw_shgroup_bone_custom_wire(ArmatureDrawContext *ctx,
                                          Object *custom)
 {
   /* See comments in #drw_shgroup_bone_custom_solid. */
-  Mesh *mesh = BKE_object_get_evaluated_mesh(custom);
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(custom);
   if (mesh == NULL) {
     return;
   }
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 03fb3b92277..1110658e3b2 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -923,7 +923,7 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob)
 
 GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob)
 {
-  Mesh *me = BKE_object_get_evaluated_mesh(ob);
+  Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   short type = (me != NULL) ? OB_MESH : ob->type;
 
   switch (type) {
@@ -950,7 +950,7 @@ int DRW_cache_object_material_count_get(struct Object *ob)
 {
   short type = ob->type;
 
-  Mesh *me = BKE_object_get_evaluated_mesh(ob);
+  Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (me != NULL && type != OB_POINTCLOUD) {
     /* Some object types can have one data type in ob->data, but will be rendered as mesh.
      * For point clouds this never happens. Ideally this check would happen at another level
@@ -3021,7 +3021,7 @@ GPUBatch *DRW_cache_surf_surface_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_surface(mesh_eval);
   }
@@ -3034,7 +3034,7 @@ GPUBatch *DRW_cache_surf_edge_wire_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
@@ -3047,7 +3047,7 @@ GPUBatch *DRW_cache_surf_face_wireframe_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_wireframes_face(mesh_eval);
   }
@@ -3059,7 +3059,7 @@ GPUBatch *DRW_cache_surf_edge_detection_get(Object *ob, bool *r_is_manifold)
 {
   BLI_assert(ob->type == OB_SURF);
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_edge_detection(mesh_eval, r_is_manifold);
   }
@@ -3072,7 +3072,7 @@ GPUBatch *DRW_cache_surf_loose_edges_get(Object *ob)
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
@@ -3089,7 +3089,7 @@ GPUBatch **DRW_cache_surf_surface_shaded_get(Object *ob,
   BLI_assert(ob->type == OB_SURF);
 
   struct Curve *cu = ob->data;
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   if (mesh_eval != NULL) {
     return DRW_mesh_batch_cache_get_surface_shaded(mesh_eval, gpumat_array, gpumat_array_len);
   }
@@ -3382,7 +3382,7 @@ GPUBatch *DRW_cache_cursor_get(bool crosshair_lines)
 
 void drw_batch_cache_validate(Object *ob)
 {
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   switch (ob->type) {
     case OB_MESH:
       DRW_mesh_batch_cache_validate((Mesh *)ob->data);
@@ -3431,7 +3431,7 @@ void drw_batch_cache_generate_requested(Object *ob)
                            DRW_object_use_hide_faces(ob)) ||
                           ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob))));
 
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   switch (ob->type) {
     case OB_MESH:
       DRW_mesh_batch_cache_create_requested(
@@ -3470,7 +3470,7 @@ void drw_batch_cache_generate_requested_evaluated_mesh(Object *ob)
                            DRW_object_use_hide_faces(ob)) ||
                           ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob))));
 
-  Mesh *mesh = BKE_object_get_evaluated_mesh(ob);
+  Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(ob);
   DRW_mesh_batch_cache_create_requested(DST.task_graph, ob, mesh, scene, is_paint_mode, use_hide);
 }
 
@@ -3481,7 +3481,7 @@ void drw_batch_cache_generate_requested_delayed(Object *ob)
 
 void DRW_batch_cache_free_old(Object *ob, int ctime)
 {
-  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
+  struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob);
 
   switch (ob->type) {
     case OB_MESH:
diff --git a/source/blender/draw/intern/draw_cache_extract.h b/source/blender/draw/intern/draw_cache_extract.h
index ba42cdf66e7..6de9788b434 100644
--- a/source/blender/draw/intern/draw_cache_extract.h
+++ b/source/blender/draw/intern/draw_cache_extract.h
@@ -22,6 +22,7 @@
 
 #pragma once
 
+struct DRWSubdivCache;
 struct TaskGraph;
 
 #include "DNA_customdata_types.h"
@@ -244,6 +245,13 @@ typedef enum DRWBatchFlag {
 
 BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of bit fields");
 
+typedef struct MeshExtractLooseGeom {
+  int edge_len;
+  int vert_len;
+  int *verts;
+  int *edges;
+} MeshExtractLooseGeom;
+
 /**
  * Data that are kept around between extractions to reduce rebuilding time.
  *
@@ -252,12 +260,7 @@ BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of b
 typedef struct MeshBufferCache {
   MeshBufferList buff;
 
-  struct {
-    int edge_len;
-    int vert_len;
-    int *verts;
-    int *edges;
-  } loose_geom;
+  MeshExtractLooseGeom loose_geom;
 
   struct {
     int *tri_first_index;
@@ -283,6 +286,8 @@ typedef struct MeshBatchCache {
 
   GPUBatch **surface_per_mat;
 
+  struct DRWSubdivCache *subdiv_cache;
+
   DRWBatchFlag batch_requested; /* DRWBatchFlag */
   DRWBatchFlag batch_ready;     /* DRWBatchFlag */
 
@@ -332,9 +337,14 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
                                         const bool do_uvedit,
                                         const bool use_subsurf_fdots,
                                         const Scene *scene,
-                                        const ToolSettings *ts,
+                                        const struct ToolSettings *ts,
                                         const bool use_hide);
 
+void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                               MeshBufferCache *mbc,
+                                               struct DRWSubdivCache *subdiv_cache,
+                                               const struct ToolSettings *ts);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc
index 485b803310c..383a3b05b67 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@@ -42,6 +42,7 @@
 
 #include "draw_cache_extract.h"
 #include "draw_cache_inline.h"
+#include "draw_subdivision.h"
 
 #include "mesh_extractors/extract_mesh.h"
 
@@ -783,6 +784,99 @@ static void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
 
 /** \} */
 
+/* ---------------------------------------------------------------------- */
+/** \name Subdivision Extract Loop
+ * \{ */
+
+static void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                                      MeshBufferCache *mbc,
+                                                      DRWSubdivCache *subdiv_cache,
+                                                      const ToolSettings *ts)
+{
+  /* Create an array containing all the extractors that needs to be executed. */
+  ExtractorRunDatas extractors;
+
+  MeshBufferList *mbuflist = &mbc->buff;
+
+#define EXTRACT_ADD_REQUESTED(type, name) \
+  do { \
+    if (DRW_##type##_requested(mbuflist->type.name)) { \
+      const MeshExtract *extractor = &extract_##name; \
+      extractors.append(extractor); \
+    } \
+  } while (0)
+
+  /* The order in which extractors are added to the list matters somewhat, as some buffers are
+   * reused when building others. */
+  EXTRACT_ADD_REQUESTED(ibo, tris);
+  EXTRACT_ADD_REQUESTED(vbo, pos_nor);
+  EXTRACT_ADD_REQUESTED(vbo, lnor);
+  for (int i = 0; i < GPU_MAX_ATTR; i++) {
+    EXTRACT_ADD_REQUESTED(vbo, attr[i]);
+  }
+
+  /* We use only one extractor for face dots, as the work is done in a single compute shader. */
+  if (DRW_vbo_requested(mbuflist->vbo.fdots_nor) || DRW_vbo_requested(mbuflist->vbo.fdots_pos) ||
+      DRW_ibo_requested(mbuflist->ibo.fdots)) {
+    extractors.append(&extract_fdots_pos);
+  }
+
+  EXTRACT_ADD_REQUESTED(ibo, lines);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_points);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_tris);
+  EXTRACT_ADD_REQUESTED(ibo, edituv_lines);
+  EXTRACT_ADD_REQUESTED(vbo, vert_idx);
+  EXTRACT_ADD_REQUESTED(vbo, edge_idx);
+  EXTRACT_ADD_REQUESTED(vbo, poly_idx);
+  EXTRACT_ADD_REQUESTED(vbo, edge_fac);
+  EXTRACT_ADD_REQUESTED(ibo, points);
+  EXTRACT_ADD_REQUESTED(vbo, edit_data);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_data);
+  /* Make sure UVs are computed before edituv stuffs. */
+  EXTRACT_ADD_REQUESTED(vbo, uv);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_area);
+  EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle);
+  EXTRACT_ADD_REQUESTED(ibo, lines_adjacency);
+  EXTRACT_ADD_REQUESTED(vbo, vcol);
+  EXTRACT_ADD_REQUESTED(vbo, weights);
+  EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
+
+#undef EXTRACT_ADD_REQUESTED
+
+  if (extractors.is_empty()) {
+    return;
+  }
+
+  MeshRenderData mr;
+  draw_subdiv_init_mesh_render_data(subdiv_cache, &mr, ts);
+  mesh_render_data_update_loose_geom(&mr, mbc, MR_ITER_LEDGE | MR_ITER_LVERT, MR_DATA_LOOSE_GEOM);
+
+  void *data_stack = MEM_mallocN(extractors.data_size_total(), __func__);
+  uint32_t data_offset = 0;
+  for (const ExtractorRunData &run_data : extractors) {
+    const MeshExtract *extractor = run_data.extractor;
+    void *buffer = mesh_extract_buffer_get(extractor, mbuflist);
+    void *data = POINTER_OFFSET(data_stack, data_offset);
+
+    extractor->init_subdiv(subdiv_cache, &mr, cache, buffer, data);
+
+    if (extractor->iter_subdiv) {
+      extractor->iter_subdiv(subdiv_cache, &mr, data);
+    }
+
+    if (extractor->iter_loose_geom_subdiv) {
+      extractor->iter_loose_geom_subdiv(subdiv_cache, &mr, &mbc->loose_geom, buffer, data);
+    }
+
+    if (extractor->finish_subdiv) {
+      extractor->finish_subdiv(subdiv_cache, buffer, data);
+    }
+  }
+  MEM_freeN(data_stack);
+}
+
+/** \} */
+
 }  // namespace blender::draw
 
 extern "C" {
@@ -818,4 +912,12 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
                                                     use_hide);
 }
 
+void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
+                                               MeshBufferCache *mbc,
+                                               DRWSubdivCache *subdiv_cache,
+                                               const ToolSettings *ts)
+{
+  blender::draw::mesh_buffer_cache_create_requested_subdiv(cache, mbc, subdiv_cache, ts);
+}
+
 }  // extern "C"
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c
index 82b3b5aee41..1e5ffc14911 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@@ -54,6 +54,7 @@
 #include "BKE_object_deform.h"
 #include "BKE_paint.h"
 #include "BKE_pbvh.h"
+#include "BKE_subdiv_modifier.h"
 
 #include "atomic_ops.h"
 
@@ -69,6 +70,7 @@
 
 #include "draw_cache_extract.h"
 #include "draw_cache_inline.h"
+#include "draw_subdivision.h"
 
 #include "draw_cache_impl.h" /* own include */
 
@@ -380,6 +382,7 @@ static void drw_mesh_attributes_add_request(DRW_MeshAttributes *attrs,
 BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->ldata;
       break;
@@ -395,6 +398,7 @@ BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->pdata;
       break;
@@ -410,6 +414,7 @@ BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->edata;
       break;
@@ -425,6 +430,7 @@ BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
 BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
 {
   switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
     case ME_WRAPPER_TYPE_MDATA:
       return &me->vdata;
       break;
@@ -1037,6 +1043,15 @@ static void mesh_buffer_cache_clear(MeshBufferCache *mbc)
   mbc->poly_sorted.visible_tri_len = 0;
 }
 
+static void mesh_batch_cache_free_subdiv_cache(MeshBatchCache *cache)
+{
+  if (cache->subdiv_cache) {
+    draw_subdiv_cache_free(cache->subdiv_cache);
+    MEM_freeN(cache->subdiv_cache);
+    cache->subdiv_cache = NULL;
+  }
+}
+
 static void mesh_batch_cache_clear(Mesh *me)
 {
   MeshBatchCache *cache = me->runtime.batch_cache;
@@ -1064,6 +1079,8 @@ static void mesh_batch_cache_clear(Mesh *me)
 
   cache->batch_ready = 0;
   drw_mesh_weight_state_clear(&cache->weight_state);
+
+  mesh_batch_cache_free_subdiv_cache(cache);
 }
 
 void DRW_mesh_batch_cache_free(Mesh *me)
@@ -1693,6 +1710,10 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
 
   const bool do_uvcage = is_editmode && !me->edit_mesh->mesh_eval_final->runtime.is_original;
 
+  const int required_mode = BKE_subsurf_modifier_eval_required_mode(DRW_state_is_scene_render(),
+                                                                    is_editmode);
+  const bool do_subdivision = BKE_subsurf_modifier_can_do_gpu_subdiv(scene, ob, required_mode);
+
   MeshBufferList *mbuflist = &cache->final.buff;
 
   /* Initialize batches and request VBO's & IBO's. */
@@ -2038,6 +2059,15 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                                        true);
   }
 
+  if (do_subdivision) {
+    DRW_create_subdivision(scene, ob, me, cache, &cache->final, ts);
+  }
+  else {
+    /* The subsurf modifier may have been recently removed, or another modifier was added after it,
+     * so free any potential subdivision cache as it is not needed anymore. */
+    mesh_batch_cache_free_subdiv_cache(cache);
+  }
+
   mesh_buffer_cache_create_requested(task_graph,
                                      cache,
                                      &cache->final,
diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
new file mode 100644
index 00000000000..5533130212e
--- /dev/null
+++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
@@ -0,0 +1,1932 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#include "draw_subdivision.h"
+
+#include "DNA_mesh_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+
+#include "BKE_editmesh.h"
+#include "BKE_modifier.h"
+#include "BKE_object.h"
+#include "BKE_scene.h"
+#include "BKE_subdiv.h"
+#include "BKE_subdiv_eval.h"
+#include "BKE_subdiv_foreach.h"
+#include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
+
+#include "BLI_linklist.h"
+
+#include "BLI_string.h"
+
+#include "PIL_time.h"
+
+#include "DRW_engine.h"
+#include "DRW_render.h"
+
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_index_buffer.h"
+#include "GPU_state.h"
+#include "GPU_vertex_buffer.h"
+
+#include "opensubdiv_capi.h"
+#include "opensubdiv_capi_type.h"
+#include "opensubdiv_converter_capi.h"
+#include "opensubdiv_evaluator_capi.h"
+#include "opensubdiv_topology_refiner_capi.h"
+
+#include "draw_cache_extract.h"
+#include "draw_cache_impl.h"
+#include "draw_cache_inline.h"
+#include "mesh_extractors/extract_mesh.h"
+
+extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[];
+extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[];
+extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[];
+extern "C" char datatoc_common_subdiv_lib_glsl[];
+extern "C" char datatoc_common_subdiv_normals_accumulate_comp_glsl[];
+extern "C" char datatoc_common_subdiv_normals_finalize_comp_glsl[];
+extern "C" char datatoc_common_subdiv_patch_evaluation_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edge_fac_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_lnor_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_sculpt_data_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl[];
+extern "C" char datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl[];
+
+enum {
+  SHADER_BUFFER_LINES,
+  SHADER_BUFFER_LINES_LOOSE,
+  SHADER_BUFFER_EDGE_FAC,
+  SHADER_BUFFER_LNOR,
+  SHADER_BUFFER_TRIS,
+  SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS,
+  SHADER_BUFFER_NORMALS_ACCUMULATE,
+  SHADER_BUFFER_NORMALS_FINALIZE,
+  SHADER_PATCH_EVALUATION,
+  SHADER_PATCH_EVALUATION_LIMIT_NORMALS,
+  SHADER_PATCH_EVALUATION_FVAR,
+  SHADER_PATCH_EVALUATION_FACE_DOTS,
+  SHADER_COMP_CUSTOM_DATA_INTERP_1D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_2D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_3D,
+  SHADER_COMP_CUSTOM_DATA_INTERP_4D,
+  SHADER_BUFFER_SCULPT_DATA,
+  SHADER_BUFFER_UV_STRETCH_ANGLE,
+  SHADER_BUFFER_UV_STRETCH_AREA,
+
+  NUM_SHADERS,
+};
+
+static GPUShader *g_subdiv_shaders[NUM_SHADERS];
+
+static const char *get_shader_code(int shader_type)
+{
+  switch (shader_type) {
+    case SHADER_BUFFER_LINES:
+    case SHADER_BUFFER_LINES_LOOSE: {
+      return datatoc_common_subdiv_ibo_lines_comp_glsl;
+    }
+    case SHADER_BUFFER_EDGE_FAC: {
+      return datatoc_common_subdiv_vbo_edge_fac_comp_glsl;
+    }
+    case SHADER_BUFFER_LNOR: {
+      return datatoc_common_subdiv_vbo_lnor_comp_glsl;
+    }
+    case SHADER_BUFFER_TRIS:
+    case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: {
+      return datatoc_common_subdiv_ibo_tris_comp_glsl;
+    }
+    case SHADER_BUFFER_NORMALS_ACCUMULATE: {
+      return datatoc_common_subdiv_normals_accumulate_comp_glsl;
+    }
+    case SHADER_BUFFER_NORMALS_FINALIZE: {
+      return datatoc_common_subdiv_normals_finalize_comp_glsl;
+    }
+    case SHADER_PATCH_EVALUATION:
+    case SHADER_PATCH_EVALUATION_LIMIT_NORMALS:
+    case SHADER_PATCH_EVALUATION_FVAR:
+    case SHADER_PATCH_EVALUATION_FACE_DOTS: {
+      return datatoc_common_subdiv_patch_evaluation_comp_glsl;
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_1D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_2D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_3D:
+    case SHADER_COMP_CUSTOM_DATA_INTERP_4D: {
+      return datatoc_common_subdiv_custom_data_interp_comp_glsl;
+    }
+    case SHADER_BUFFER_SCULPT_DATA: {
+      return datatoc_common_subdiv_vbo_sculpt_data_comp_glsl;
+    }
+    case SHADER_BUFFER_UV_STRETCH_ANGLE: {
+      return datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl;
+    }
+    case SHADER_BUFFER_UV_STRETCH_AREA: {
+      return datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl;
+    }
+  }
+  return nullptr;
+}
+
+static const char *get_shader_name(int shader_type)
+{
+  switch (shader_type) {
+    case SHADER_BUFFER_LINES: {
+      return "subdiv lines build";
+    }
+    case SHADER_BUFFER_LINES_LOOSE: {
+      return "subdiv lines loose build";
+    }
+    case SHADER_BUFFER_LNOR: {
+      return "subdiv lnor build";
+    }
+    case SHADER_BUFFER_EDGE_FAC: {
+      return "subdiv edge fac build";
+    }
+    case SHADER_BUFFER_TRIS:
+    case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: {
+      return "subdiv tris";
+    }
+    case SHADER_BUFFER_NORMALS_ACCUMULATE: {
+      return "subdiv normals accumulate";
+    }
+    case SHADER_BUFFER_NORMALS_FINALIZE: {
+      return "subdiv normals finalize";
+    }
+    case SHADER_PATCH_EVALUATION: {
+      return "subdiv patch evaluation";
+    }
+    case SHADER_PATCH_EVALUATION_LIMIT_NORMALS: {
+      return "subdiv patch evaluation limit normals";
+    }
+    case SHADER_PATCH_EVALUATION_FVAR: {
+      return "subdiv patch evaluation face-varying";
+    }
+    case SHADER_PATCH_EVALUATION_FACE_DOTS: {
+      return "subdiv patch evaluation face dots";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_1D: {
+      return "subdiv custom data interp 1D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_2D: {
+      return "subdiv custom data interp 2D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_3D: {
+      return "subdiv custom data interp 3D";
+    }
+    case SHADER_COMP_CUSTOM_DATA_INTERP_4D: {
+      return "subdiv custom data interp 4D";
+    }
+    case SHADER_BUFFER_SCULPT_DATA: {
+      return "subdiv sculpt data";
+    }
+    case SHADER_BUFFER_UV_STRETCH_ANGLE: {
+      return "subdiv uv stretch angle";
+    }
+    case SHADER_BUFFER_UV_STRETCH_AREA: {
+      return "subdiv uv stretch area";
+    }
+  }
+  return nullptr;
+}
+
+static GPUShader *get_patch_evaluation_shader(int shader_type)
+{
+  if (g_subdiv_shaders[shader_type] == nullptr) {
+    const char *compute_code = get_shader_code(shader_type);
+
+    const char *defines = nullptr;
+    if (shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define LIMIT_NORMALS\n";
+    }
+    else if (shader_type == SHADER_PATCH_EVALUATION_FVAR) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define FVAR_EVALUATION\n";
+    }
+    else if (shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"
+          "#define FDOTS_EVALUATION\n";
+    }
+    else {
+      defines =
+          "#define OSD_PATCH_BASIS_GLSL\n"
+          "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
+    }
+
+    /* Merge OpenSubdiv library code with our own library code. */
+    const char *patch_basis_source = openSubdiv_getGLSLPatchBasisSource();
+    const char *subdiv_lib_code = datatoc_common_subdiv_lib_glsl;
+    char *library_code = static_cast<char *>(
+        MEM_mallocN(strlen(patch_basis_source) + strlen(subdiv_lib_code) + 1,
+                    "subdiv patch evaluation library code"));
+    library_code[0] = '\0';
+    strcat(library_code, patch_basis_source);
+    strcat(library_code, subdiv_lib_code);
+
+    g_subdiv_shaders[shader_type] = GPU_shader_create_compute(
+        compute_code, library_code, defines, get_shader_name(shader_type));
+
+    MEM_freeN(library_code);
+  }
+
+  return g_subdiv_shaders[shader_type];
+}
+
+static GPUShader *get_subdiv_shader(int shader_type, const char *defines)
+{
+  if (shader_type == SHADER_PATCH_EVALUATION ||
+      shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS ||
+      shader_type == SHADER_PATCH_EVALUATION_FVAR ||
+      shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) {
+    return get_patch_evaluation_shader(shader_type);
+  }
+  if (g_subdiv_shaders[shader_type] == nullptr) {
+    const char *compute_code = get_shader_code(shader_type);
+    g_subdiv_shaders[shader_type] = GPU_shader_create_compute(
+        compute_code, datatoc_common_subdiv_lib_glsl, defines, get_shader_name(shader_type));
+  }
+  return g_subdiv_shaders[shader_type];
+}
+
+/* -------------------------------------------------------------------- */
+/** Vertex formats used for data transfer from OpenSubdiv, and for data processing on our side.
+ * \{ */
+
+static GPUVertFormat *get_uvs_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "uvs", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+/* Vertex format for `OpenSubdiv::Osd::PatchArray`. */
+static GPUVertFormat *get_patch_array_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "regDesc", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "desc", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "numPatches", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "indexBase", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "stride", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "primitiveIdBase", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format used for the `PatchTable::PatchHandle`. */
+static GPUVertFormat *get_patch_handle_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "vertex_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "array_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "patch_index", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format used for the quad-tree nodes of the PatchMap. */
+static GPUVertFormat *get_quadtree_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "child", GPU_COMP_U32, 4, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format for `OpenSubdiv::Osd::PatchParam`, not really used, it is only for making sure
+ * that the #GPUVertBuf used to wrap the OpenSubdiv patch param buffer is valid. */
+static GPUVertFormat *get_patch_param_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "data", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+/* Vertex format for the patches' vertices index buffer. */
+static GPUVertFormat *get_patch_index_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "data", GPU_COMP_I32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/* Vertex format for the OpenSubdiv vertex buffer. */
+static GPUVertFormat *get_subdiv_vertex_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* We use 4 components for the vectors to account for padding in the compute shaders, where
+     * vec3 is promoted to vec4. */
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+typedef struct CompressedPatchCoord {
+  int ptex_face_index;
+  /* UV coordinate encoded as u << 16 | v, where u and v are quantized on 16-bits. */
+  unsigned int encoded_uv;
+} CompressedPatchCoord;
+
+MINLINE CompressedPatchCoord make_patch_coord(int ptex_face_index, float u, float v)
+{
+  CompressedPatchCoord patch_coord = {
+      ptex_face_index,
+      (static_cast<unsigned int>(u * 65535.0f) << 16) | static_cast<unsigned int>(v * 65535.0f),
+  };
+  return patch_coord;
+}
+
+/* Vertex format used for the #CompressedPatchCoord. */
+static GPUVertFormat *get_blender_patch_coords_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* WARNING! Adjust #CompressedPatchCoord accordingly. */
+    GPU_vertformat_attr_add(&format, "ptex_face_index", GPU_COMP_U32, 1, GPU_FETCH_INT);
+    GPU_vertformat_attr_add(&format, "uv", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+static GPUVertFormat *get_origindex_format(void)
+{
+  static GPUVertFormat format;
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "color", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  }
+  return &format;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities to initialize a OpenSubdiv_Buffer for a GPUVertBuf.
+ * \{ */
+
+static void vertbuf_bind_gpu(const OpenSubdiv_Buffer *buffer)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(buffer->data);
+  GPU_vertbuf_use(verts);
+}
+
+static void *vertbuf_alloc(const OpenSubdiv_Buffer *interface, const uint len)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_data_alloc(verts, len);
+  return GPU_vertbuf_get_data(verts);
+}
+
+static void vertbuf_device_alloc(const OpenSubdiv_Buffer *interface, const uint len)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  /* This assumes that GPU_USAGE_DEVICE_ONLY was used, which won't allocate host memory. */
+  // BLI_assert(GPU_vertbuf_get_usage(verts) == GPU_USAGE_DEVICE_ONLY);
+  GPU_vertbuf_data_alloc(verts, len);
+}
+
+static void vertbuf_wrap_device_handle(const OpenSubdiv_Buffer *interface, uint64_t handle)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_wrap_handle(verts, handle);
+}
+
+static void vertbuf_update_data(const OpenSubdiv_Buffer *interface,
+                                uint start,
+                                uint len,
+                                const void *data)
+{
+  GPUVertBuf *verts = (GPUVertBuf *)(interface->data);
+  GPU_vertbuf_update_sub(verts, start, len, data);
+}
+
+static void opensubdiv_gpu_buffer_init(OpenSubdiv_Buffer *buffer_interface, GPUVertBuf *vertbuf)
+{
+  buffer_interface->data = vertbuf;
+  buffer_interface->bind_gpu = vertbuf_bind_gpu;
+  buffer_interface->buffer_offset = 0;
+  buffer_interface->wrap_device_handle = vertbuf_wrap_device_handle;
+  buffer_interface->alloc = vertbuf_alloc;
+  buffer_interface->device_alloc = vertbuf_device_alloc;
+  buffer_interface->device_update = vertbuf_update_data;
+}
+
+static GPUVertBuf *create_buffer_and_interface(OpenSubdiv_Buffer *interface, GPUVertFormat *format)
+{
+  GPUVertBuf *buffer = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(buffer, format, GPU_USAGE_DEVICE_ONLY);
+  opensubdiv_gpu_buffer_init(interface, buffer);
+  return buffer;
+}
+
+/** \} */
+
+// --------------------------------------------------------
+
+static uint tris_count_from_number_of_loops(const uint number_of_loops)
+{
+  const uint32_t number_of_quads = number_of_loops / 4;
+  return number_of_quads * 2;
+}
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities to build a GPUVertBuf from an origindex buffer.
+ * \{ */
+
+void draw_subdiv_init_origindex_buffer(GPUVertBuf *buffer,
+                                       int *vert_origindex,
+                                       uint num_loops,
+                                       uint loose_len)
+{
+  GPU_vertbuf_init_with_format_ex(buffer, get_origindex_format(), GPU_USAGE_STATIC);
+  GPU_vertbuf_data_alloc(buffer, num_loops + loose_len);
+
+  int *vbo_data = (int *)GPU_vertbuf_get_data(buffer);
+  memcpy(vbo_data, vert_origindex, num_loops * sizeof(int));
+}
+
+GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops)
+{
+  GPUVertBuf *buffer = GPU_vertbuf_calloc();
+  draw_subdiv_init_origindex_buffer(buffer, vert_origindex, num_loops, 0);
+  return buffer;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utilities for DRWPatchMap.
+ * \{ */
+
+static void draw_patch_map_build(DRWPatchMap *gpu_patch_map, Subdiv *subdiv)
+{
+  GPUVertBuf *patch_map_handles = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(patch_map_handles, get_patch_handle_format(), GPU_USAGE_STATIC);
+
+  GPUVertBuf *patch_map_quadtree = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(patch_map_quadtree, get_quadtree_format(), GPU_USAGE_STATIC);
+
+  OpenSubdiv_Buffer patch_map_handles_interface;
+  opensubdiv_gpu_buffer_init(&patch_map_handles_interface, patch_map_handles);
+
+  OpenSubdiv_Buffer patch_map_quad_tree_interface;
+  opensubdiv_gpu_buffer_init(&patch_map_quad_tree_interface, patch_map_quadtree);
+
+  int min_patch_face = 0;
+  int max_patch_face = 0;
+  int max_depth = 0;
+  int patches_are_triangular = 0;
+
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+  evaluator->getPatchMap(evaluator,
+                         &patch_map_handles_interface,
+                         &patch_map_quad_tree_interface,
+                         &min_patch_face,
+                         &max_patch_face,
+                         &max_depth,
+                         &patches_are_triangular);
+
+  gpu_patch_map->patch_map_handles = patch_map_handles;
+  gpu_patch_map->patch_map_quadtree = patch_map_quadtree;
+  gpu_patch_map->min_patch_face = min_patch_face;
+  gpu_patch_map->max_patch_face = max_patch_face;
+  gpu_patch_map->max_depth = max_depth;
+  gpu_patch_map->patches_are_triangular = patches_are_triangular;
+}
+
+static void draw_patch_map_free(DRWPatchMap *gpu_patch_map)
+{
+  GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_handles);
+  GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_quadtree);
+  gpu_patch_map->min_patch_face = 0;
+  gpu_patch_map->max_patch_face = 0;
+  gpu_patch_map->max_depth = 0;
+  gpu_patch_map->patches_are_triangular = 0;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivCache
+ * \{ */
+
+static void draw_subdiv_cache_free_material_data(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->polygon_mat_offset);
+  MEM_SAFE_FREE(cache->mat_start);
+  MEM_SAFE_FREE(cache->mat_end);
+}
+
+static void draw_subdiv_free_edit_mode_cache(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->verts_orig_index);
+  GPU_VERTBUF_DISCARD_SAFE(cache->edges_orig_index);
+  GPU_VERTBUF_DISCARD_SAFE(cache->fdots_patch_coords);
+}
+
+void draw_subdiv_cache_free(DRWSubdivCache *cache)
+{
+  GPU_VERTBUF_DISCARD_SAFE(cache->patch_coords);
+  GPU_VERTBUF_DISCARD_SAFE(cache->face_ptex_offset_buffer);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_polygon_offset_buffer);
+  GPU_VERTBUF_DISCARD_SAFE(cache->extra_coarse_face_data);
+  MEM_SAFE_FREE(cache->subdiv_loop_subdiv_vert_index);
+  MEM_SAFE_FREE(cache->subdiv_loop_poly_index);
+  MEM_SAFE_FREE(cache->point_indices);
+  MEM_SAFE_FREE(cache->subdiv_polygon_offset);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency_offsets);
+  GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency);
+  cache->resolution = 0;
+  cache->num_subdiv_loops = 0;
+  cache->num_coarse_poly = 0;
+  cache->num_subdiv_quads = 0;
+  draw_subdiv_free_edit_mode_cache(cache);
+  draw_subdiv_cache_free_material_data(cache);
+  draw_patch_map_free(&cache->gpu_patch_map);
+  if (cache->ubo) {
+    GPU_uniformbuf_free(cache->ubo);
+    cache->ubo = nullptr;
+  }
+}
+
+/* Flags used in #DRWSubdivCache.extra_coarse_face_data. The flags are packed in the upper bits of
+ * each uint (one per coarse face), #SUBDIV_COARSE_FACE_FLAG_OFFSET tells where they are in the
+ * packed bits. */
+#define SUBDIV_COARSE_FACE_FLAG_SMOOTH 1u
+#define SUBDIV_COARSE_FACE_FLAG_SELECT 2u
+#define SUBDIV_COARSE_FACE_FLAG_ACTIVE 4u
+
+#define SUBDIV_COARSE_FACE_FLAG_OFFSET 29u
+
+#define SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_SMOOTH << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+#define SUBDIV_COARSE_FACE_FLAG_SELECT_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_SELECT << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+#define SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK \
+  (SUBDIV_COARSE_FACE_FLAG_ACTIVE << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+
+#define SUBDIV_COARSE_FACE_LOOP_START_MASK \
+  ~((SUBDIV_COARSE_FACE_FLAG_SMOOTH | SUBDIV_COARSE_FACE_FLAG_SELECT | \
+     SUBDIV_COARSE_FACE_FLAG_ACTIVE) \
+    << SUBDIV_COARSE_FACE_FLAG_OFFSET)
+
+static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cache, Mesh *mesh)
+{
+  if (cache->extra_coarse_face_data == nullptr) {
+    cache->extra_coarse_face_data = GPU_vertbuf_calloc();
+    static GPUVertFormat format;
+    if (format.attr_len == 0) {
+      GPU_vertformat_attr_add(&format, "data", GPU_COMP_U32, 1, GPU_FETCH_INT);
+    }
+    GPU_vertbuf_init_with_format_ex(cache->extra_coarse_face_data, &format, GPU_USAGE_DYNAMIC);
+    GPU_vertbuf_data_alloc(cache->extra_coarse_face_data, mesh->totpoly);
+  }
+
+  uint32_t *flags_data = (uint32_t *)(GPU_vertbuf_get_data(cache->extra_coarse_face_data));
+
+  if (cache->bm) {
+    BMesh *bm = cache->bm;
+    BMFace *f;
+    BMIter iter;
+
+    /* Ensure all current elements follow new customdata layout. */
+    BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
+      const int index = BM_elem_index_get(f);
+      uint32_t flag = 0;
+      if (BM_elem_flag_test(f, BM_ELEM_SMOOTH)) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH;
+      }
+      if (BM_elem_flag_test(f, BM_ELEM_SELECT)) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_SELECT;
+      }
+      if (f == bm->act_face) {
+        flag |= SUBDIV_COARSE_FACE_FLAG_ACTIVE;
+      }
+      const int loopstart = BM_elem_index_get(f->l_first);
+      flags_data[index] = (uint)(loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    }
+  }
+  else {
+    for (int i = 0; i < mesh->totpoly; i++) {
+      uint32_t flag = 0;
+      if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) {
+        flag = SUBDIV_COARSE_FACE_FLAG_SMOOTH;
+      }
+      flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    }
+  }
+
+  /* Make sure updated data is re-uploaded. */
+  GPU_vertbuf_tag_dirty(cache->extra_coarse_face_data);
+}
+
+static DRWSubdivCache *mesh_batch_cache_ensure_subdiv_cache(MeshBatchCache *mbc)
+{
+  DRWSubdivCache *subdiv_cache = mbc->subdiv_cache;
+  if (subdiv_cache == nullptr) {
+    subdiv_cache = static_cast<DRWSubdivCache *>(
+        MEM_callocN(sizeof(DRWSubdivCache), "DRWSubdivCache"));
+  }
+  mbc->subdiv_cache = subdiv_cache;
+  return subdiv_cache;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Subdivision grid traversal.
+ *
+ * Traverse the uniform subdivision grid over coarse faces and gather useful information for
+ * building the draw buffers on the GPU. We primarily gather the patch coordinates for all
+ * subdivision faces, as well as the original coarse indices for each subdivision element (vertex,
+ * face, or edge) which directly maps to its coarse counterpart (note that all subdivision faces
+ * map to a coarse face). This information will then be cached in #DRWSubdivCache for subsequent
+ * reevaluations, as long as the topology does not change.
+ * \{ */
+
+typedef struct DRWCacheBuildingContext {
+  const Mesh *coarse_mesh;
+  const SubdivToMeshSettings *settings;
+
+  DRWSubdivCache *cache;
+
+  /* Pointers into DRWSubdivCache buffers for easier access during traversal. */
+  CompressedPatchCoord *patch_coords;
+  int *subdiv_loop_vert_index;
+  int *subdiv_loop_subdiv_vert_index;
+  int *subdiv_loop_edge_index;
+  int *subdiv_loop_poly_index;
+  int *point_indices;
+
+  /* Temporary buffers used during traversal. */
+  int *vert_origindex_map;
+  int *edge_origindex_map;
+
+  /* Origindex layers from the mesh to directly look up during traversal the origindex from the
+   * base mesh for edit data so that we do not have to handle yet another GPU buffer and do this in
+   * the shaders. */
+  int *v_origindex;
+  int *e_origindex;
+} DRWCacheBuildingContext;
+
+static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context,
+                                         const int num_vertices,
+                                         const int num_edges,
+                                         const int num_loops,
+                                         const int num_polygons,
+                                         const int *subdiv_polygon_offset)
+{
+  if (num_loops == 0) {
+    return false;
+  }
+
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  DRWSubdivCache *cache = ctx->cache;
+
+  /* Set topology information. */
+  cache->num_subdiv_edges = (uint)num_edges;
+  cache->num_subdiv_loops = (uint)num_loops;
+  cache->num_subdiv_verts = (uint)num_vertices;
+  cache->num_subdiv_quads = (uint)num_polygons;
+  cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset));
+
+  /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after
+   * it was sent to the device, since we may use the data while building other buffers on the CPU
+   * side. */
+  cache->patch_coords = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->patch_coords, get_blender_patch_coords_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->patch_coords, cache->num_subdiv_loops);
+
+  cache->verts_orig_index = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->verts_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->verts_orig_index, cache->num_subdiv_loops);
+
+  cache->edges_orig_index = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format_ex(
+      cache->edges_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC);
+  GPU_vertbuf_data_alloc(cache->edges_orig_index, cache->num_subdiv_loops);
+
+  cache->subdiv_loop_subdiv_vert_index = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_subdiv_vert_index"));
+
+  cache->subdiv_loop_poly_index = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_poly_index"));
+
+  cache->point_indices = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "point_indices"));
+  for (int i = 0; i < num_vertices; i++) {
+    cache->point_indices[i] = -1;
+  }
+
+  /* Initialize context pointers and temporary buffers. */
+  ctx->patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(cache->patch_coords);
+  ctx->subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(cache->verts_orig_index);
+  ctx->subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(cache->edges_orig_index);
+  ctx->subdiv_loop_subdiv_vert_index = cache->subdiv_loop_subdiv_vert_index;
+  ctx->subdiv_loop_poly_index = cache->subdiv_loop_poly_index;
+  ctx->point_indices = cache->point_indices;
+
+  ctx->v_origindex = static_cast<int *>(
+      CustomData_get_layer(&ctx->coarse_mesh->vdata, CD_ORIGINDEX));
+
+  ctx->e_origindex = static_cast<int *>(
+      CustomData_get_layer(&ctx->coarse_mesh->edata, CD_ORIGINDEX));
+
+  ctx->vert_origindex_map = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map"));
+  for (int i = 0; i < num_vertices; i++) {
+    ctx->vert_origindex_map[i] = -1;
+  }
+
+  ctx->edge_origindex_map = static_cast<int *>(
+      MEM_mallocN(cache->num_subdiv_edges * sizeof(int), "subdiv_edge_origindex_map"));
+  for (int i = 0; i < num_edges; i++) {
+    ctx->edge_origindex_map[i] = -1;
+  }
+
+  return true;
+}
+
+static void draw_subdiv_vertex_corner_cb(const SubdivForeachContext *foreach_context,
+                                         void *UNUSED(tls),
+                                         const int UNUSED(ptex_face_index),
+                                         const float UNUSED(u),
+                                         const float UNUSED(v),
+                                         const int coarse_vertex_index,
+                                         const int UNUSED(coarse_poly_index),
+                                         const int UNUSED(coarse_corner),
+                                         const int subdiv_vertex_index)
+{
+  BLI_assert(coarse_vertex_index != ORIGINDEX_NONE);
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  ctx->vert_origindex_map[subdiv_vertex_index] = coarse_vertex_index;
+}
+
+static void draw_subdiv_vertex_edge_cb(const SubdivForeachContext *UNUSED(foreach_context),
+                                       void *UNUSED(tls_v),
+                                       const int UNUSED(ptex_face_index),
+                                       const float UNUSED(u),
+                                       const float UNUSED(v),
+                                       const int UNUSED(coarse_edge_index),
+                                       const int UNUSED(coarse_poly_index),
+                                       const int UNUSED(coarse_corner),
+                                       const int UNUSED(subdiv_vertex_index))
+{
+  /* Required if SubdivForeachContext.vertex_corner is also set. */
+}
+
+static void draw_subdiv_edge_cb(const SubdivForeachContext *foreach_context,
+                                void *UNUSED(tls),
+                                const int coarse_edge_index,
+                                const int subdiv_edge_index,
+                                const int UNUSED(subdiv_v1),
+                                const int UNUSED(subdiv_v2))
+{
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+
+  int coarse_index = coarse_edge_index;
+
+  if (coarse_index != -1) {
+    if (ctx->e_origindex) {
+      coarse_index = ctx->e_origindex[coarse_index];
+    }
+  }
+
+  ctx->edge_origindex_map[subdiv_edge_index] = coarse_index;
+}
+
+static void draw_subdiv_loop_cb(const SubdivForeachContext *foreach_context,
+                                void *UNUSED(tls_v),
+                                const int ptex_face_index,
+                                const float u,
+                                const float v,
+                                const int UNUSED(coarse_loop_index),
+                                const int coarse_poly_index,
+                                const int UNUSED(coarse_corner),
+                                const int subdiv_loop_index,
+                                const int subdiv_vertex_index,
+                                const int subdiv_edge_index)
+{
+  DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data);
+  ctx->patch_coords[subdiv_loop_index] = make_patch_coord(ptex_face_index, u, v);
+
+  int coarse_vertex_index = ctx->vert_origindex_map[subdiv_vertex_index];
+
+  if (coarse_vertex_index != -1) {
+    if (ctx->v_origindex) {
+      coarse_vertex_index = ctx->v_origindex[coarse_vertex_index];
+    }
+
+    /* Double check as vorigindex may have modified the index. */
+    if (coarse_vertex_index != -1) {
+      ctx->point_indices[coarse_vertex_index] = subdiv_loop_index;
+    }
+  }
+
+  ctx->subdiv_loop_subdiv_vert_index[subdiv_loop_index] = subdiv_vertex_index;
+  /* For now index the subdiv_edge_index, it will be replaced by the actual coarse edge index
+   * at the end of the traversal as some edges are only then traversed. */
+  ctx->subdiv_loop_edge_index[subdiv_loop_index] = subdiv_edge_index;
+  ctx->subdiv_loop_poly_index[subdiv_loop_index] = coarse_poly_index;
+  ctx->subdiv_loop_vert_index[subdiv_loop_index] = coarse_vertex_index;
+}
+
+static void draw_subdiv_foreach_callbacks(SubdivForeachContext *foreach_context)
+{
+  memset(foreach_context, 0, sizeof(*foreach_context));
+  foreach_context->topology_info = draw_subdiv_topology_info_cb;
+  foreach_context->loop = draw_subdiv_loop_cb;
+  foreach_context->edge = draw_subdiv_edge_cb;
+  foreach_context->vertex_corner = draw_subdiv_vertex_corner_cb;
+  foreach_context->vertex_edge = draw_subdiv_vertex_edge_cb;
+}
+
+static void do_subdiv_traversal(DRWCacheBuildingContext *cache_building_context, Subdiv *subdiv)
+{
+  SubdivForeachContext foreach_context;
+  draw_subdiv_foreach_callbacks(&foreach_context);
+  foreach_context.user_data = cache_building_context;
+
+  BKE_subdiv_foreach_subdiv_geometry(subdiv,
+                                     &foreach_context,
+                                     cache_building_context->settings,
+                                     cache_building_context->coarse_mesh);
+
+  /* Now that traversal is done, we can set up the right original indices for the loop-to-edge map.
+   */
+  for (int i = 0; i < cache_building_context->cache->num_subdiv_loops; i++) {
+    cache_building_context->subdiv_loop_edge_index[i] =
+        cache_building_context
+            ->edge_origindex_map[cache_building_context->subdiv_loop_edge_index[i]];
+  }
+}
+
+static GPUVertBuf *gpu_vertbuf_create_from_format(GPUVertFormat *format, uint len)
+{
+  GPUVertBuf *verts = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(verts, format);
+  GPU_vertbuf_data_alloc(verts, len);
+  return verts;
+}
+
+/* Build maps to hold enough information to tell which face is adjacent to which vertex; those will
+ * be used for computing normals if limit surfaces are unavailable. */
+static void build_vertex_face_adjacency_maps(DRWSubdivCache *cache)
+{
+  /* +1 so that we do not require a special case for the last vertex, this extra offset will
+   * contain the total number of adjacent faces. */
+  cache->subdiv_vertex_face_adjacency_offsets = gpu_vertbuf_create_from_format(
+      get_origindex_format(), cache->num_subdiv_verts + 1);
+
+  int *vertex_offsets = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency_offsets);
+  memset(vertex_offsets, 0, sizeof(int) * cache->num_subdiv_verts + 1);
+
+  for (int i = 0; i < cache->num_subdiv_loops; i++) {
+    vertex_offsets[cache->subdiv_loop_subdiv_vert_index[i]]++;
+  }
+
+  int ofs = vertex_offsets[0];
+  vertex_offsets[0] = 0;
+  for (uint i = 1; i < cache->num_subdiv_verts + 1; i++) {
+    int tmp = vertex_offsets[i];
+    vertex_offsets[i] = ofs;
+    ofs += tmp;
+  }
+
+  cache->subdiv_vertex_face_adjacency = gpu_vertbuf_create_from_format(get_origindex_format(),
+                                                                       cache->num_subdiv_loops);
+  int *adjacent_faces = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency);
+  int *tmp_set_faces = static_cast<int *>(
+      MEM_callocN(sizeof(int) * cache->num_subdiv_verts, "tmp subdiv vertex offset"));
+
+  for (int i = 0; i < cache->num_subdiv_loops / 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      const int subdiv_vertex = cache->subdiv_loop_subdiv_vert_index[i * 4 + j];
+      int first_face_offset = vertex_offsets[subdiv_vertex] + tmp_set_faces[subdiv_vertex];
+      adjacent_faces[first_face_offset] = i;
+      tmp_set_faces[subdiv_vertex] += 1;
+    }
+  }
+
+  MEM_freeN(tmp_set_faces);
+}
+
+static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
+                                    Subdiv *subdiv,
+                                    Mesh *mesh_eval,
+                                    const Scene *scene,
+                                    const SubsurfModifierData *smd,
+                                    const bool is_final_render)
+{
+  const int level = get_render_subsurf_level(&scene->r, smd->levels, is_final_render);
+  SubdivToMeshSettings to_mesh_settings;
+  to_mesh_settings.resolution = (1 << level) + 1;
+  to_mesh_settings.use_optimal_display = false;
+
+  if (cache->resolution != to_mesh_settings.resolution) {
+    /* Resolution changed, we need to rebuild, free any existing cached data. */
+    draw_subdiv_cache_free(cache);
+  }
+
+  /* If the resolution between the cache and the settings match for some reason, check if the patch
+   * coordinates were not already generated. Those coordinates are specific to the resolution, so
+   * they should be null either after initialization, or after freeing if the resolution (or some
+   * other subdivision setting) changed.
+   */
+  if (cache->patch_coords != nullptr) {
+    return true;
+  }
+
+  DRWCacheBuildingContext cache_building_context;
+  cache_building_context.coarse_mesh = mesh_eval;
+  cache_building_context.settings = &to_mesh_settings;
+  cache_building_context.cache = cache;
+
+  do_subdiv_traversal(&cache_building_context, subdiv);
+  if (cache->num_subdiv_loops == 0) {
+    /* Either the traversal failed, or we have an empty mesh, either way we cannot go any further.
+     * The subdiv_polygon_offset cannot then be reliably stored in the cache, so free it directly.
+     */
+    MEM_SAFE_FREE(cache->subdiv_polygon_offset);
+    return false;
+  }
+
+  /* Build buffers for the PatchMap. */
+  draw_patch_map_build(&cache->gpu_patch_map, subdiv);
+
+  cache->face_ptex_offset = BKE_subdiv_face_ptex_offset_get(subdiv);
+
+  // Build patch coordinates for all the face dots
+  cache->fdots_patch_coords = gpu_vertbuf_create_from_format(get_blender_patch_coords_format(),
+                                                             mesh_eval->totpoly);
+  CompressedPatchCoord *blender_fdots_patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(
+      cache->fdots_patch_coords);
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const int ptex_face_index = cache->face_ptex_offset[i];
+    if (mesh_eval->mpoly[i].totloop == 4) {
+      /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */
+      blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f);
+    }
+    else {
+      /* For N-gons, since they are split into quads from the center, and since the center is
+       * chosen to be the top right corner of each quad, the center coordinate of the coarse face
+       * is any one of those top right corners with `u = v = 1.0`. */
+      blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 1.0f, 1.0f);
+    }
+  }
+
+  cache->resolution = to_mesh_settings.resolution;
+
+  cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer(
+      cache->subdiv_polygon_offset, mesh_eval->totpoly);
+
+  cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset,
+                                                                      mesh_eval->totpoly + 1);
+  cache->num_coarse_poly = mesh_eval->totpoly;
+  cache->point_indices = cache_building_context.point_indices;
+
+  build_vertex_face_adjacency_maps(cache);
+
+  /* Cleanup. */
+  MEM_freeN(cache_building_context.vert_origindex_map);
+  MEM_freeN(cache_building_context.edge_origindex_map);
+
+  return true;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivUboStorage.
+ *
+ * Common uniforms for the various shaders.
+ * \{ */
+
+typedef struct DRWSubdivUboStorage {
+  /* Offsets in the buffers data where the source and destination data start. */
+  int src_offset;
+  int dst_offset;
+
+  /* Parameters for the DRWPatchMap. */
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+
+  /* Coarse topology information. */
+  int coarse_poly_count;
+  uint edge_loose_offset;
+
+  /* Refined topology information. */
+  uint num_subdiv_loops;
+
+  /* Subdivision settings, is int in C but bool in the GLSL code, as there, bools have the same
+   * size as ints, so we should use int in C to ensure that the size of the structure is what GLSL
+   * expects. */
+  int optimal_display;
+
+  /* The sculpt mask data layer may be null. */
+  int has_sculpt_mask;
+
+  /* Masks for the extra coarse face data. */
+  uint coarse_face_select_mask;
+  uint coarse_face_smooth_mask;
+  uint coarse_face_active_mask;
+  uint coarse_face_loopstart_mask;
+
+  /* Number of elements to process in the compute shader (can be the coarse quad count, or the
+   * final vertex count, depending on which compute pass we do). This is used to early out in case
+   * of out of bond accesses as compute dispatch are of fixed size. */
+  uint total_dispatch_size;
+} DRWSubdivUboStorage;
+
+static_assert((sizeof(DRWSubdivUboStorage) % 16) == 0,
+              "DRWSubdivUboStorage is not padded to a multiple of the size of vec4");
+
+static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache,
+                                         DRWSubdivUboStorage *ubo,
+                                         const int src_offset,
+                                         const int dst_offset,
+                                         const uint total_dispatch_size,
+                                         const bool has_sculpt_mask)
+{
+  ubo->src_offset = src_offset;
+  ubo->dst_offset = dst_offset;
+  ubo->min_patch_face = cache->gpu_patch_map.min_patch_face;
+  ubo->max_patch_face = cache->gpu_patch_map.max_patch_face;
+  ubo->max_depth = cache->gpu_patch_map.max_depth;
+  ubo->patches_are_triangular = cache->gpu_patch_map.patches_are_triangular;
+  ubo->coarse_poly_count = cache->num_coarse_poly;
+  ubo->optimal_display = cache->optimal_display;
+  ubo->num_subdiv_loops = cache->num_subdiv_loops;
+  ubo->edge_loose_offset = cache->num_subdiv_loops * 2;
+  ubo->has_sculpt_mask = has_sculpt_mask;
+  ubo->coarse_face_smooth_mask = SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK;
+  ubo->coarse_face_select_mask = SUBDIV_COARSE_FACE_FLAG_SELECT_MASK;
+  ubo->coarse_face_active_mask = SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK;
+  ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK;
+  ubo->total_dispatch_size = total_dispatch_size;
+}
+
+static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache,
+                                            GPUShader *shader,
+                                            const int src_offset,
+                                            const int dst_offset,
+                                            const uint total_dispatch_size,
+                                            const bool has_sculpt_mask = false)
+{
+  DRWSubdivUboStorage storage;
+  draw_subdiv_init_ubo_storage(
+      cache, &storage, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask);
+
+  if (!cache->ubo) {
+    const_cast<DRWSubdivCache *>(cache)->ubo = GPU_uniformbuf_create_ex(
+        sizeof(DRWSubdivUboStorage), &storage, "DRWSubdivUboStorage");
+  }
+
+  GPU_uniformbuf_update(cache->ubo, &storage);
+
+  const int location = GPU_shader_get_uniform_block(shader, "shader_data");
+  GPU_uniformbuf_bind(cache->ubo, location);
+}
+
+/** \} */
+
+// --------------------------------------------------------
+
+#define SUBDIV_LOCAL_WORK_GROUP_SIZE 64
+static uint get_dispatch_size(uint elements)
+{
+  return divide_ceil_u(elements, SUBDIV_LOCAL_WORK_GROUP_SIZE);
+}
+
+/* Helper to ensure that the UBO is always initalized before dispatching computes and that the same
+ * number of elements that need to be processed is used for the UBO and the dispatch size.
+ * Use this instead of a raw call to #GPU_compute_dispatch. */
+static void drw_subdiv_compute_dispatch(const DRWSubdivCache *cache,
+                                        GPUShader *shader,
+                                        const int src_offset,
+                                        const int dst_offset,
+                                        uint total_dispatch_size,
+                                        const bool has_sculpt_mask = false)
+{
+  const uint max_res_x = static_cast<uint>(GPU_max_work_group_count(0));
+
+  const uint dispatch_size = get_dispatch_size(total_dispatch_size);
+  uint dispatch_rx = dispatch_size;
+  uint dispatch_ry = 1u;
+  if (dispatch_rx > max_res_x) {
+    /* Since there are some limitations with regards to the maximum work group size (could be as
+     * low as 64k elements per call), we split the number elements into a "2d" number, with the
+     * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum
+     * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements
+     * total, which should be enough. If not, we could also use the 3rd dimension. */
+    /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and
+     * get the smallest rect fitting the requirements. */
+    dispatch_rx = dispatch_ry = ceilf(sqrtf(dispatch_size));
+    /* Avoid a completely empty dispatch line caused by rounding. */
+    if ((dispatch_rx * (dispatch_ry - 1)) >= dispatch_size) {
+      dispatch_ry -= 1;
+    }
+  }
+
+  /* X and Y dimensions may have different limits so the above computation may not be right, but
+   * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore,
+   * we presume it all fits. */
+  BLI_assert(dispatch_ry < static_cast<uint>(GPU_max_work_group_count(1)));
+
+  draw_subdiv_ubo_update_and_bind(
+      cache, shader, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask);
+
+  GPU_compute_dispatch(shader, dispatch_rx, dispatch_ry, 1);
+}
+
+void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache,
+                                 GPUVertBuf *pos_nor,
+                                 const bool do_limit_normals)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface,
+                                                       get_subdiv_vertex_format());
+  evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(
+      do_limit_normals ? SHADER_PATCH_EVALUATION_LIMIT_NORMALS : SHADER_PATCH_EVALUATION);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 8);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_extract_uvs(const DRWSubdivCache *cache,
+                             GPUVertBuf *uvs,
+                             const int face_varying_channel,
+                             const int dst_offset)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface, get_uvs_format());
+  evaluator->wrapFVarSrcBuffer(evaluator, face_varying_channel, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  evaluator->fillFVarPatchArraysBuffer(
+      evaluator, face_varying_channel, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapFVarPatchIndexBuffer(
+      evaluator, face_varying_channel, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapFVarPatchParamBuffer(
+      evaluator, face_varying_channel, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FVAR);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(uvs, 8);
+
+  /* The buffer offset has the stride baked in (which is 2 as we have UVs) so remove the stride by
+   * dividing by 2 */
+  const int src_offset = src_buffer_interface.buffer_offset / 2;
+  drw_subdiv_compute_dispatch(cache, shader, src_offset, dst_offset, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array.
+   * Since it may also be used for computing UV stretches, we also need a barrier on the shader
+   * storage. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_SHADER_STORAGE);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
+                                    GPUVertBuf *src_data,
+                                    GPUVertBuf *dst_data,
+                                    int dimensions,
+                                    int dst_offset)
+{
+  GPUShader *shader = nullptr;
+
+  if (dimensions == 1) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 1\n");
+  }
+  else if (dimensions == 2) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_2D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 2\n");
+  }
+  else if (dimensions == 3) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_3D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 3\n");
+  }
+  else if (dimensions == 4) {
+    shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_4D,
+                               "#define SUBDIV_POLYGON_OFFSET\n"
+                               "#define DIMENSIONS 4\n"
+                               "#define GPU_FETCH_U16_TO_FLOAT\n");
+  }
+  else {
+    /* Crash if dimensions are not supported. */
+  }
+
+  GPU_shader_bind(shader);
+
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(src_data, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->face_ptex_offset_buffer, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 4);
+  GPU_vertbuf_bind_as_ssbo(dst_data, 5);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, dst_offset, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache,
+                                          GPUVertBuf *mask_vbo,
+                                          GPUVertBuf *face_set_vbo,
+                                          GPUVertBuf *sculpt_data)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_SCULPT_DATA, nullptr);
+  GPU_shader_bind(shader);
+
+  if (mask_vbo) {
+    GPU_vertbuf_bind_as_ssbo(mask_vbo, 0);
+  }
+
+  GPU_vertbuf_bind_as_ssbo(face_set_vbo, 1);
+  GPU_vertbuf_bind_as_ssbo(sculpt_data, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads, mask_vbo != nullptr);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache,
+                                    GPUVertBuf *pos_nor,
+                                    GPUVertBuf *face_adjacency_offsets,
+                                    GPUVertBuf *face_adjacency_lists,
+                                    GPUVertBuf *vertex_normals)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_ACCUMULATE, nullptr);
+  GPU_shader_bind(shader);
+
+  int binding_point = 0;
+
+  GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(face_adjacency_offsets, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(face_adjacency_lists, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_verts);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_finalize_normals(const DRWSubdivCache *cache,
+                                  GPUVertBuf *vertex_normals,
+                                  GPUVertBuf *subdiv_loop_subdiv_vert_index,
+                                  GPUVertBuf *pos_nor)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_FINALIZE, nullptr);
+  GPU_shader_bind(shader);
+
+  int binding_point = 0;
+  GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(subdiv_loop_subdiv_vert_index, binding_point++);
+  GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We
+   * also need it for subsequent compute shaders, so a barrier on the shader storage is also
+   * needed. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache,
+                                   GPUIndexBuf *subdiv_tris,
+                                   const int material_count)
+{
+  const bool do_single_material = material_count <= 1;
+
+  const char *defines = "#define SUBDIV_POLYGON_OFFSET\n";
+  if (do_single_material) {
+    defines =
+        "#define SUBDIV_POLYGON_OFFSET\n"
+        "#define SINGLE_MATERIAL\n";
+  }
+
+  GPUShader *shader = get_subdiv_shader(
+      do_single_material ? SHADER_BUFFER_TRIS : SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS, defines);
+  GPU_shader_bind(shader);
+
+  /* Outputs */
+  GPU_indexbuf_bind_as_ssbo(subdiv_tris, 1);
+
+  if (!do_single_material) {
+    GPU_vertbuf_bind_as_ssbo(cache->polygon_mat_offset, 2);
+    /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+    GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+  }
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache,
+                                     GPUVertBuf *fdots_pos,
+                                     GPUVertBuf *fdots_nor,
+                                     GPUIndexBuf *fdots_indices)
+{
+  Subdiv *subdiv = cache->subdiv;
+  OpenSubdiv_Evaluator *evaluator = subdiv->evaluator;
+
+  OpenSubdiv_Buffer src_buffer_interface;
+  GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface,
+                                                       get_subdiv_vertex_format());
+  evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface);
+
+  OpenSubdiv_Buffer patch_arrays_buffer_interface;
+  GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface,
+                                                                get_patch_array_format());
+  opensubdiv_gpu_buffer_init(&patch_arrays_buffer_interface, patch_arrays_buffer);
+  evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface);
+
+  OpenSubdiv_Buffer patch_index_buffer_interface;
+  GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface,
+                                                               get_patch_index_format());
+  evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface);
+
+  OpenSubdiv_Buffer patch_param_buffer_interface;
+  GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface,
+                                                               get_patch_param_format());
+  evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface);
+
+  GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FACE_DOTS);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(src_buffer, 0);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2);
+  GPU_vertbuf_bind_as_ssbo(cache->fdots_patch_coords, 3);
+  GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4);
+  GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5);
+  GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6);
+  GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7);
+  GPU_vertbuf_bind_as_ssbo(fdots_pos, 8);
+  GPU_vertbuf_bind_as_ssbo(fdots_nor, 9);
+  GPU_indexbuf_bind_as_ssbo(fdots_indices, 10);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 11);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_coarse_poly);
+
+  /* This generates two vertex buffers and an index buffer, so we need to put a barrier on the
+   * vertex attributes and element arrays. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+
+  GPU_vertbuf_discard(patch_index_buffer);
+  GPU_vertbuf_discard(patch_param_buffer);
+  GPU_vertbuf_discard(patch_arrays_buffer);
+  GPU_vertbuf_discard(src_buffer);
+}
+
+void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache, GPUIndexBuf *lines_indices)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES, nullptr);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(cache->edges_orig_index, 0);
+  GPU_indexbuf_bind_as_ssbo(lines_indices, 1);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache,
+                                          GPUIndexBuf *lines_indices,
+                                          uint num_loose_edges)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES_LOOSE, "#define LINES_LOOSE\n");
+  GPU_shader_bind(shader);
+
+  GPU_indexbuf_bind_as_ssbo(lines_indices, 1);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, num_loose_edges);
+
+  /* This generates an index buffer, so we need to put a barrier on the element array. */
+  GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache,
+                                       GPUVertBuf *pos_nor,
+                                       GPUVertBuf *edge_idx,
+                                       GPUVertBuf *edge_fac)
+{
+  /* No separate shader for the AMD driver case as we assume that the GPU will not change during
+   * the execution of the program. */
+  const char *defines = GPU_crappy_amd_driver() ? "#define GPU_AMD_DRIVER_BYTE_BUG\n" : nullptr;
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_EDGE_FAC, defines);
+  GPU_shader_bind(shader);
+
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 0);
+  GPU_vertbuf_bind_as_ssbo(edge_idx, 1);
+  GPU_vertbuf_bind_as_ssbo(edge_fac, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache,
+                                   GPUVertBuf *pos_nor,
+                                   GPUVertBuf *lnor)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LNOR, "#define SUBDIV_POLYGON_OFFSET\n");
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 1);
+  GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 2);
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(lnor, 3);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache,
+                                                  GPUVertBuf *coarse_data,
+                                                  GPUVertBuf *subdiv_data)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_AREA,
+                                        "#define SUBDIV_POLYGON_OFFSET\n");
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(coarse_data, 1);
+  /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */
+  GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(subdiv_data, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache,
+                                                   GPUVertBuf *pos_nor,
+                                                   GPUVertBuf *uvs,
+                                                   int uvs_offset,
+                                                   GPUVertBuf *stretch_angles)
+{
+  GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_ANGLE, nullptr);
+  GPU_shader_bind(shader);
+
+  /* Inputs */
+  GPU_vertbuf_bind_as_ssbo(pos_nor, 0);
+  GPU_vertbuf_bind_as_ssbo(uvs, 1);
+
+  /* Outputs */
+  GPU_vertbuf_bind_as_ssbo(stretch_angles, 2);
+
+  drw_subdiv_compute_dispatch(cache, shader, uvs_offset, 0, cache->num_subdiv_quads);
+
+  /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */
+  GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+
+  /* Cleanup. */
+  GPU_shader_unbind();
+}
+
+/* -------------------------------------------------------------------- */
+
+void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache,
+                                       MeshRenderData *mr,
+                                       const ToolSettings *toolsettings)
+{
+  Mesh *mesh = cache->mesh;
+
+  /* Setup required data for loose geometry. */
+  mr->me = mesh;
+  mr->medge = mesh->medge;
+  mr->mvert = mesh->mvert;
+  mr->mpoly = mesh->mpoly;
+  mr->mloop = mesh->mloop;
+  mr->vert_len = mesh->totvert;
+  mr->edge_len = mesh->totedge;
+  mr->poly_len = mesh->totpoly;
+  mr->loop_len = mesh->totloop;
+  mr->extract_type = MR_EXTRACT_MESH;
+
+  /* MeshRenderData is only used for generating edit mode data here. */
+  if (!cache->bm) {
+    return;
+  }
+
+  BMesh *bm = cache->bm;
+  BM_mesh_elem_table_ensure(bm, BM_EDGE | BM_FACE | BM_VERT);
+
+  mr->bm = bm;
+  mr->toolsettings = toolsettings;
+  mr->eed_act = BM_mesh_active_edge_get(bm);
+  mr->efa_act = BM_mesh_active_face_get(bm, false, true);
+  mr->eve_act = BM_mesh_active_vert_get(bm);
+  mr->crease_ofs = CustomData_get_offset(&bm->edata, CD_CREASE);
+  mr->bweight_ofs = CustomData_get_offset(&bm->edata, CD_BWEIGHT);
+#ifdef WITH_FREESTYLE
+  mr->freestyle_edge_ofs = CustomData_get_offset(&bm->edata, CD_FREESTYLE_EDGE);
+  mr->freestyle_face_ofs = CustomData_get_offset(&bm->pdata, CD_FREESTYLE_FACE);
+#endif
+  mr->v_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
+  mr->e_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
+  mr->p_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+}
+
+/**
+ * For material assignments we want indices for triangles that share a common material to be laid
+ * out contiguously in memory. To achieve this, we sort the indices based on which material the
+ * coarse polygon was assigned. The sort is performed by offsetting the loops indices so that they
+ * are directly assigned to the right sorted indices.
+ *
+ * \code{.unparsed}
+ * Here is a visual representation, considering four quads:
+ * +---------+---------+---------+---------+
+ * | 3     2 | 7     6 | 11   10 | 15   14 |
+ * |         |         |         |         |
+ * | 0     1 | 4     5 | 8     9 | 12   13 |
+ * +---------+---------+---------+---------+
+ *
+ * If the first and third quads have the same material, we should have:
+ * +---------+---------+---------+---------+
+ * | 3     2 | 11   10 | 7     6 | 15   14 |
+ * |         |         |         |         |
+ * | 0     1 | 8     9 | 4     5 | 12   13 |
+ * +---------+---------+---------+---------+
+ *
+ * So the offsets would be:
+ * +---------+---------+---------+---------+
+ * | 0     0 | 4     4 | -4   -4 | 0     0 |
+ * |         |         |         |         |
+ * | 0     0 | 4     4 | -4   -4 | 0     0 |
+ * +---------+---------+---------+---------+
+ * \endcode
+ *
+ * The offsets are computed not based on the loops indices, but on the number of subdivided
+ * polygons for each coarse polygon. We then only store a single offset for each coarse polygon,
+ * since all sub-faces are contiguous, they all share the same offset.
+ */
+static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
+                                                 Mesh *mesh_eval,
+                                                 uint mat_len)
+{
+  draw_subdiv_cache_free_material_data(cache);
+
+  const int number_of_quads = cache->num_subdiv_loops / 4;
+
+  if (mat_len == 1) {
+    cache->mat_start = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end"));
+    cache->mat_end = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end"));
+    cache->mat_start[0] = 0;
+    cache->mat_end[0] = number_of_quads;
+    return;
+  }
+
+  /* Count number of subdivided polygons for each material. */
+  int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start"));
+  int *subdiv_polygon_offset = cache->subdiv_polygon_offset;
+
+  // TODO: parallel_reduce?
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const MPoly *mpoly = &mesh_eval->mpoly[i];
+    const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
+                                                            subdiv_polygon_offset[i + 1];
+    const int quad_count = next_offset - subdiv_polygon_offset[i];
+    const int mat_index = mpoly->mat_nr;
+    mat_start[mat_index] += quad_count;
+  }
+
+  /* Accumulate offsets. */
+  int ofs = mat_start[0];
+  mat_start[0] = 0;
+  for (uint i = 1; i < mat_len; i++) {
+    int tmp = mat_start[i];
+    mat_start[i] = ofs;
+    ofs += tmp;
+  }
+
+  /* Compute per polygon offsets. */
+  int *mat_end = static_cast<int *>(MEM_dupallocN(mat_start));
+  int *per_polygon_mat_offset = static_cast<int *>(
+      MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset"));
+
+  for (int i = 0; i < mesh_eval->totpoly; i++) {
+    const MPoly *mpoly = &mesh_eval->mpoly[i];
+    const int mat_index = mpoly->mat_nr;
+    const int single_material_index = subdiv_polygon_offset[i];
+    const int material_offset = mat_end[mat_index];
+    const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
+                                                            subdiv_polygon_offset[i + 1];
+    const int quad_count = next_offset - subdiv_polygon_offset[i];
+    mat_end[mat_index] += quad_count;
+
+    per_polygon_mat_offset[i] = material_offset - single_material_index;
+  }
+
+  cache->polygon_mat_offset = draw_subdiv_build_origindex_buffer(per_polygon_mat_offset,
+                                                                 mesh_eval->totpoly);
+  cache->mat_start = mat_start;
+  cache->mat_end = mat_end;
+
+  MEM_freeN(per_polygon_mat_offset);
+}
+
+static bool draw_subdiv_create_requested_buffers(const Scene *scene,
+                                                 Object *ob,
+                                                 Mesh *mesh,
+                                                 struct MeshBatchCache *batch_cache,
+                                                 MeshBufferCache *mbc,
+                                                 const ToolSettings *toolsettings,
+                                                 OpenSubdiv_EvaluatorCache *evaluator_cache)
+{
+  SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob);
+  BLI_assert(smd);
+
+  const bool is_final_render = DRW_state_is_scene_render();
+
+  SubdivSettings settings;
+  BKE_subsurf_modifier_subdiv_settings_init(&settings, smd, is_final_render);
+
+  if (settings.level == 0) {
+    return false;
+  }
+
+  Mesh *mesh_eval = mesh;
+  BMesh *bm = nullptr;
+  if (mesh->edit_mesh) {
+    mesh_eval = mesh->edit_mesh->mesh_eval_final;
+    bm = mesh->edit_mesh->bm;
+  }
+
+  BKE_subsurf_modifier_ensure_runtime(smd);
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &settings, mesh_eval, true);
+  if (!subdiv) {
+    return false;
+  }
+
+  if (!BKE_subdiv_eval_begin_from_mesh(
+          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) {
+    return false;
+  }
+
+  DRWSubdivCache *draw_cache = mesh_batch_cache_ensure_subdiv_cache(batch_cache);
+  if (!draw_subdiv_build_cache(draw_cache, subdiv, mesh_eval, scene, smd, is_final_render)) {
+    return false;
+  }
+
+  const bool optimal_display = (smd->flags & eSubsurfModifierFlag_ControlEdges);
+
+  draw_cache->bm = bm;
+  draw_cache->mesh = mesh_eval;
+  draw_cache->subdiv = subdiv;
+  draw_cache->optimal_display = optimal_display;
+  draw_cache->num_subdiv_triangles = tris_count_from_number_of_loops(draw_cache->num_subdiv_loops);
+  /* We can only evaluate limit normals if the patches are adaptive. */
+  draw_cache->do_limit_normals = settings.is_adaptive;
+
+  if (DRW_ibo_requested(mbc->buff.ibo.tris)) {
+    draw_subdiv_cache_ensure_mat_offsets(draw_cache, mesh_eval, batch_cache->mat_len);
+  }
+
+  draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval);
+
+  mesh_buffer_cache_create_requested_subdiv(batch_cache, mbc, draw_cache, toolsettings);
+
+  return true;
+}
+
+static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr;
+
+void DRW_create_subdivision(const Scene *scene,
+                            Object *ob,
+                            Mesh *mesh,
+                            struct MeshBatchCache *batch_cache,
+                            MeshBufferCache *mbc,
+                            const ToolSettings *toolsettings)
+{
+  if (g_evaluator_cache == nullptr) {
+    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE);
+  }
+
+#undef TIME_SUBDIV
+
+#ifdef TIME_SUBDIV
+  const double begin_time = PIL_check_seconds_timer();
+#endif
+
+  if (!draw_subdiv_create_requested_buffers(
+          scene, ob, mesh, batch_cache, mbc, toolsettings, g_evaluator_cache)) {
+    return;
+  }
+
+#ifdef TIME_SUBDIV
+  const double end_time = PIL_check_seconds_timer();
+  fprintf(stderr, "Time to update subdivision: %f\n", end_time - begin_time);
+  fprintf(stderr, "Maximum FPS: %f\n", 1.0 / (end_time - begin_time));
+#endif
+}
+
+void DRW_subdiv_free()
+{
+  for (int i = 0; i < NUM_SHADERS; ++i) {
+    GPU_shader_free(g_subdiv_shaders[i]);
+  }
+
+  DRW_cache_free_old_subdiv();
+
+  if (g_evaluator_cache) {
+    openSubdiv_deleteEvaluatorCache(g_evaluator_cache);
+    g_evaluator_cache = nullptr;
+  }
+}
+
+static LinkNode *gpu_subdiv_free_queue = nullptr;
+static ThreadMutex gpu_subdiv_queue_mutex = BLI_MUTEX_INITIALIZER;
+
+void DRW_subdiv_cache_free(Subdiv *subdiv)
+{
+  BLI_mutex_lock(&gpu_subdiv_queue_mutex);
+  BLI_linklist_prepend(&gpu_subdiv_free_queue, subdiv);
+  BLI_mutex_unlock(&gpu_subdiv_queue_mutex);
+}
+
+void DRW_cache_free_old_subdiv()
+{
+  if (gpu_subdiv_free_queue == nullptr) {
+    return;
+  }
+
+  BLI_mutex_lock(&gpu_subdiv_queue_mutex);
+
+  while (gpu_subdiv_free_queue != nullptr) {
+    Subdiv *subdiv = static_cast<Subdiv *>(BLI_linklist_pop(&gpu_subdiv_free_queue));
+    /* Set the type to CPU so that we do actually free the cache. */
+    subdiv->evaluator->type = OPENSUBDIV_EVALUATOR_CPU;
+    BKE_subdiv_free(subdiv);
+  }
+
+  BLI_mutex_unlock(&gpu_subdiv_queue_mutex);
+}
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 930fb6eabef..0bf6468f7cc 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -52,6 +52,7 @@
 #include "BKE_pointcache.h"
 #include "BKE_pointcloud.h"
 #include "BKE_screen.h"
+#include "BKE_subdiv_modifier.h"
 #include "BKE_volume.h"
 
 #include "DNA_camera_types.h"
@@ -90,6 +91,7 @@
 #include "draw_manager_testing.h"
 #include "draw_manager_text.h"
 #include "draw_shader.h"
+#include "draw_subdivision.h"
 #include "draw_texture_pool.h"
 
 /* only for callbacks */
@@ -2975,6 +2977,8 @@ void DRW_engines_register(void)
 
     BKE_volume_batch_cache_dirty_tag_cb = DRW_volume_batch_cache_dirty_tag;
     BKE_volume_batch_cache_free_cb = DRW_volume_batch_cache_free;
+
+    BKE_subsurf_modifier_free_gpu_cache_cb = DRW_subdiv_cache_free;
   }
 }
 
diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h
new file mode 100644
index 00000000000..f60ec7afc77
--- /dev/null
+++ b/source/blender/draw/intern/draw_subdivision.h
@@ -0,0 +1,231 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2021, Blender Foundation.
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "BLI_sys_types.h"
+
+struct BMesh;
+struct GPUIndexBuf;
+struct GPUUniformBuf;
+struct GPUVertBuf;
+struct Mesh;
+struct MeshBatchCache;
+struct MeshBufferCache;
+struct MeshRenderData;
+struct Object;
+struct Scene;
+struct Subdiv;
+struct ToolSettings;
+
+/* -------------------------------------------------------------------- */
+/** \name DRWPatchMap
+ *
+ * This is a GPU version of the OpenSubDiv PatchMap. The quad tree and the patch handles are copied
+ * to GPU buffers in order to lookup the right patch for a given set of patch coordinates.
+ * \{ */
+
+typedef struct DRWPatchMap {
+  struct GPUVertBuf *patch_map_handles;
+  struct GPUVertBuf *patch_map_quadtree;
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+} DRWPatchMap;
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name DRWSubdivCache
+ *
+ * This holds the various buffers used to evaluate and render subdivision through OpenGL.
+ * \{ */
+
+typedef struct DRWSubdivCache {
+  struct Mesh *mesh;
+  struct BMesh *bm;
+  struct Subdiv *subdiv;
+  bool optimal_display;
+  bool do_limit_normals;
+
+  /* Coordinates used to evaluate patches for UVs, positions, and normals. */
+  struct GPUVertBuf *patch_coords;
+  /* Coordinates used to evaluate patches for the face centers (or face dots) in edit-mode. */
+  struct GPUVertBuf *fdots_patch_coords;
+
+  /* Resolution used to generate the patch coordinates. */
+  int resolution;
+
+  /* Number of subdivided loops, also the number of patch coordinates since we have one coordinate
+   * but quad corner/vertex. */
+  uint num_subdiv_loops;
+  uint num_subdiv_edges;
+  uint num_subdiv_triangles;
+  uint num_subdiv_verts;
+  uint num_subdiv_quads;
+
+  /* Number of polygons in the coarse mesh, notably used to compute a coarse polygon index given a
+   * subdivision loop index. */
+  int num_coarse_poly;
+
+  /* Maps subdivision loop to subdivided vertex index. */
+  int *subdiv_loop_subdiv_vert_index;
+  /* Maps subdivision loop to original coarse poly index. */
+  int *subdiv_loop_poly_index;
+
+  /* Indices of faces adjacent to the vertices, ordered by vertex index, with no particular
+   * winding. */
+  struct GPUVertBuf *subdiv_vertex_face_adjacency;
+  /* The difference between value (i + 1) and (i) gives the number of faces adjacent to vertex (i).
+   */
+  struct GPUVertBuf *subdiv_vertex_face_adjacency_offsets;
+
+  /* Maps subdivision loop to original coarse vertex index, only really useful for edit mode. */
+  struct GPUVertBuf *verts_orig_index;
+  /* Maps subdivision loop to original coarse edge index, only really useful for edit mode. */
+  struct GPUVertBuf *edges_orig_index;
+
+  /* Owned by #Subdiv. Indexed by coarse polygon index, difference between value (i + 1) and (i)
+   * gives the number of ptex faces for coarse polygon (i).  */
+  int *face_ptex_offset;
+  /* Vertex buffer for face_ptex_offset. */
+  struct GPUVertBuf *face_ptex_offset_buffer;
+
+  int *subdiv_polygon_offset;
+  struct GPUVertBuf *subdiv_polygon_offset_buffer;
+
+  /* Contains the start loop index and the smooth flag for each coarse polygon. */
+  struct GPUVertBuf *extra_coarse_face_data;
+
+  /* Computed for ibo.points, one value per subdivided vertex, mapping coarse vertices ->
+   * subdivided loop */
+  int *point_indices;
+
+  /* Material offsets. */
+  int *mat_start;
+  int *mat_end;
+  struct GPUVertBuf *polygon_mat_offset;
+
+  DRWPatchMap gpu_patch_map;
+
+  /* UBO to store settings for the various compute shaders. */
+  struct GPUUniformBuf *ubo;
+} DRWSubdivCache;
+
+/* Only frees the data of the cache, caller is responsible to free the cache itself if necessary.
+ */
+void draw_subdiv_cache_free(DRWSubdivCache *cache);
+
+/** \} */
+
+void DRW_create_subdivision(const struct Scene *scene,
+                            struct Object *ob,
+                            struct Mesh *mesh,
+                            struct MeshBatchCache *batch_cache,
+                            struct MeshBufferCache *mbc,
+                            const struct ToolSettings *toolsettings);
+
+void DRW_subdiv_cache_free(struct Subdiv *subdiv);
+
+void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache,
+                                       struct MeshRenderData *mr,
+                                       const struct ToolSettings *toolsettings);
+
+void draw_subdiv_init_origindex_buffer(struct GPUVertBuf *buffer,
+                                       int *vert_origindex,
+                                       uint num_loops,
+                                       uint loose_len);
+
+struct GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops);
+
+/* Compute shader functions. */
+
+void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache,
+                                          struct GPUVertBuf *mask_vbo,
+                                          struct GPUVertBuf *face_set_vbo,
+                                          struct GPUVertBuf *sculpt_data);
+
+void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache,
+                                    struct GPUVertBuf *pos_nor,
+                                    struct GPUVertBuf *face_adjacency_offsets,
+                                    struct GPUVertBuf *face_adjacency_lists,
+                                    struct GPUVertBuf *vertex_normals);
+
+void draw_subdiv_finalize_normals(const DRWSubdivCache *cache,
+                                  struct GPUVertBuf *vertex_normals,
+                                  struct GPUVertBuf *subdiv_loop_subdiv_vert_index,
+                                  struct GPUVertBuf *pos_nor);
+
+void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache,
+                                 struct GPUVertBuf *pos_nor,
+                                 const bool do_limit_normals);
+
+void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
+                                    struct GPUVertBuf *src_data,
+                                    struct GPUVertBuf *dst_buffer,
+                                    int dimensions,
+                                    int dst_offset);
+
+void draw_subdiv_extract_uvs(const DRWSubdivCache *cache,
+                             struct GPUVertBuf *uvs,
+                             const int face_varying_channel,
+                             const int dst_offset);
+
+void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache,
+                                       struct GPUVertBuf *pos_nor,
+                                       struct GPUVertBuf *edge_idx,
+                                       struct GPUVertBuf *edge_fac);
+
+void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache,
+                                   struct GPUIndexBuf *subdiv_tris,
+                                   const int material_count);
+
+void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache,
+                                    struct GPUIndexBuf *lines_indices);
+
+void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache,
+                                          struct GPUIndexBuf *lines_indices,
+                                          uint num_loose_edges);
+
+void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache,
+                                     struct GPUVertBuf *fdots_pos,
+                                     struct GPUVertBuf *fdots_nor,
+                                     struct GPUIndexBuf *fdots_indices);
+
+void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache,
+                                   struct GPUVertBuf *pos_nor,
+                                   struct GPUVertBuf *lnor);
+
+void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache,
+                                                  struct GPUVertBuf *coarse_data,
+                                                  struct GPUVertBuf *subdiv_data);
+
+void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache,
+                                                   struct GPUVertBuf *pos_nor,
+                                                   struct GPUVertBuf *uvs,
+                                                   int uvs_offset,
+                                                   struct GPUVertBuf *stretch_angles);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.h b/source/blender/draw/intern/mesh_extractors/extract_mesh.h
index 7d21804c08f..35cc2cf986e 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh.h
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.h
@@ -39,6 +39,8 @@
 extern "C" {
 #endif
 
+struct DRWSubdivCache;
+
 #define MIN_RANGE_LEN 1024
 
 /* ---------------------------------------------------------------------- */
@@ -203,6 +205,11 @@ typedef void(ExtractLVertMeshFn)(const MeshRenderData *mr,
                                  const MVert *mv,
                                  const int lvert_index,
                                  void *data);
+typedef void(ExtractLooseGeomSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                       const MeshRenderData *mr,
+                                       const MeshExtractLooseGeom *loose_geom,
+                                       void *buffer,
+                                       void *data);
 typedef void(ExtractInitFn)(const MeshRenderData *mr,
                             struct MeshBatchCache *cache,
                             void *buffer,
@@ -213,6 +220,18 @@ typedef void(ExtractFinishFn)(const MeshRenderData *mr,
                               void *data);
 typedef void(ExtractTaskReduceFn)(void *userdata, void *task_userdata);
 
+typedef void(ExtractInitSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                  const MeshRenderData *mr,
+                                  struct MeshBatchCache *cache,
+                                  void *buf,
+                                  void *data);
+typedef void(ExtractIterSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                  const MeshRenderData *mr,
+                                  void *data);
+typedef void(ExtractFinishSubdivFn)(const struct DRWSubdivCache *subdiv_cache,
+                                    void *buf,
+                                    void *data);
+
 typedef struct MeshExtract {
   /** Executed on main thread and return user data for iteration functions. */
   ExtractInitFn *init;
@@ -225,9 +244,14 @@ typedef struct MeshExtract {
   ExtractLEdgeMeshFn *iter_ledge_mesh;
   ExtractLVertBMeshFn *iter_lvert_bm;
   ExtractLVertMeshFn *iter_lvert_mesh;
+  ExtractLooseGeomSubdivFn *iter_loose_geom_subdiv;
   /** Executed on one worker thread after all elements iterations. */
   ExtractTaskReduceFn *task_reduce;
   ExtractFinishFn *finish;
+  /** Executed on main thread for subdivision evaluation. */
+  ExtractInitSubdivFn *init_subdiv;
+  ExtractIterSubdivFn *iter_subdiv;
+  ExtractFinishSubdivFn *finish_subdiv;
   /** Used to request common data. */
   eMRDataType data_type;
   size_t data_size;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
index 4cc9a875f79..6a1691e8634 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 /* ---------------------------------------------------------------------- */
 /** \name Extract Edit UV Triangles Indices
@@ -94,6 +96,57 @@ static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            MeshBatchCache *UNUSED(cache),
+                                            void *UNUSED(buf),
+                                            void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(&data->elb,
+                    GPU_PRIM_TRIS,
+                    subdiv_cache->num_subdiv_triangles,
+                    subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_tris_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+    const uint loop_idx = i * 4;
+    const int poly_origindex = subdiv_loop_poly_index[loop_idx];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    edituv_tri_add(data,
+                   BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0,
+                   BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                   loop_idx,
+                   loop_idx + 1,
+                   loop_idx + 2);
+
+    edituv_tri_add(data,
+                   BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0,
+                   BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                   loop_idx,
+                   loop_idx + 2,
+                   loop_idx + 3);
+  }
+}
+
+static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                              void *buf,
+                                              void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_tris()
 {
   MeshExtract extractor = {nullptr};
@@ -101,6 +154,9 @@ constexpr MeshExtract create_extractor_edituv_tris()
   extractor.iter_looptri_bm = extract_edituv_tris_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_edituv_tris_iter_looptri_mesh;
   extractor.finish = extract_edituv_tris_finish;
+  extractor.init_subdiv = extract_edituv_tris_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_tris_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_tris_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
@@ -184,6 +240,56 @@ static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *mr,
+                                             MeshBatchCache *UNUSED(cache),
+                                             void *UNUSED(buf),
+                                             void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(
+      &data->elb, GPU_PRIM_LINES, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_lines_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *mr,
+                                             void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+
+    uint start_loop_idx = i * 4;
+    uint end_loop_idx = (i + 1) * 4;
+
+    const int poly_origindex = subdiv_loop_poly_index[start_loop_idx];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
+      const int edge_origindex = subdiv_loop_edge_index[loop_idx];
+      const bool real_edge = (edge_origindex != -1 &&
+                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE);
+      edituv_edge_add(data,
+                      BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) != 0 || !real_edge,
+                      BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) != 0,
+                      loop_idx,
+                      (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
+    }
+  }
+}
+
+static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                               void *buf,
+                                               void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_lines()
 {
   MeshExtract extractor = {nullptr};
@@ -191,6 +297,9 @@ constexpr MeshExtract create_extractor_edituv_lines()
   extractor.iter_poly_bm = extract_edituv_lines_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_lines_iter_poly_mesh;
   extractor.finish = extract_edituv_lines_finish;
+  extractor.init_subdiv = extract_edituv_lines_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_lines_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_lines_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
@@ -268,6 +377,50 @@ static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(&data->elb, ibo);
 }
 
+static void extract_edituv_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *mr,
+                                              MeshBatchCache *UNUSED(cache),
+                                              void *UNUSED(buf),
+                                              void *tls_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data);
+  GPU_indexbuf_init(
+      &data->elb, GPU_PRIM_POINTS, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops);
+  data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0;
+}
+
+static void extract_edituv_points_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *mr,
+                                              void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    const bool real_vert = (mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) &&
+                            vert_origindex != -1 &&
+                            mr->v_origindex[vert_origindex] != ORIGINDEX_NONE);
+    edituv_point_add(data,
+                     (BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) || !real_vert,
+                     BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0,
+                     i);
+  }
+}
+
+static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
+                                                void *buf,
+                                                void *_data)
+{
+  MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_edituv_points()
 {
   MeshExtract extractor = {nullptr};
@@ -275,6 +428,9 @@ constexpr MeshExtract create_extractor_edituv_points()
   extractor.iter_poly_bm = extract_edituv_points_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_points_iter_poly_mesh;
   extractor.finish = extract_edituv_points_finish;
+  extractor.init_subdiv = extract_edituv_points_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_points_iter_subdiv;
+  extractor.finish_subdiv = extract_edituv_points_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUvElem_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
index 54f5611106f..3d9729dea56 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -155,6 +157,33 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(elb, ibo);
 }
 
+static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                      const MeshRenderData *mr,
+                                      struct MeshBatchCache *UNUSED(cache),
+                                      void *buffer,
+                                      void *UNUSED(data))
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  GPU_indexbuf_init_build_on_device(ibo,
+                                    subdiv_cache->num_subdiv_loops * 2 + mr->edge_loose_len * 2);
+
+  draw_subdiv_build_lines_buffer(subdiv_cache, ibo);
+}
+
+static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *UNUSED(mr),
+                                            const MeshExtractLooseGeom *loose_geom,
+                                            void *buffer,
+                                            void *UNUSED(data))
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  draw_subdiv_build_lines_loose_buffer(subdiv_cache, ibo, static_cast<uint>(loose_geom->edge_len));
+}
+
 constexpr MeshExtract create_extractor_lines()
 {
   MeshExtract extractor = {nullptr};
@@ -163,6 +192,8 @@ constexpr MeshExtract create_extractor_lines()
   extractor.iter_poly_mesh = extract_lines_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_lines_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh;
+  extractor.init_subdiv = extract_lines_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_lines_loose_geom_subdiv;
   extractor.task_reduce = extract_lines_task_reduce;
   extractor.finish = extract_lines_finish;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
index e7dabfa9ee2..6855feb51ed 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
@@ -26,6 +26,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -44,6 +45,18 @@ struct MeshExtract_LineAdjacency_Data {
   uint *vert_to_loop;
 };
 
+static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data,
+                                     uint vert_len,
+                                     uint loop_len,
+                                     uint tess_edge_len)
+{
+  data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * vert_len, __func__));
+
+  GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, loop_len);
+  data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len);
+  data->is_manifold = true;
+}
+
 static void extract_lines_adjacency_init(const MeshRenderData *mr,
                                          struct MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(buf),
@@ -55,11 +68,7 @@ static void extract_lines_adjacency_init(const MeshRenderData *mr,
   uint tess_edge_len = mr->loop_len + mr->tri_len - mr->poly_len;
 
   MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(tls_data);
-  data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * mr->vert_len, __func__));
-
-  GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, mr->loop_len);
-  data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len);
-  data->is_manifold = true;
+  line_adjacency_data_init(data, mr->vert_len, mr->loop_len, tess_edge_len);
 }
 
 BLI_INLINE void lines_adjacency_triangle(
@@ -171,6 +180,56 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
   MEM_freeN(data->vert_to_loop);
 }
 
+static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *UNUSED(mr),
+                                                struct MeshBatchCache *UNUSED(cache),
+                                                void *UNUSED(buf),
+                                                void *_data)
+{
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+
+  /* For each polygon there is (loop + triangle - 1) edges. Since we only have quads, and a quad
+   * is split into 2 triangles, we have (loop + 2 - 1) = (loop + 1) edges for each quad, or in
+   * total: (number_of_loops + number_of_quads). */
+  const uint tess_len = subdiv_cache->num_subdiv_loops + subdiv_cache->num_subdiv_quads;
+  line_adjacency_data_init(
+      data, tess_len, subdiv_cache->num_subdiv_verts, subdiv_cache->num_subdiv_loops);
+}
+
+static void extract_lines_adjacency_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *UNUSED(mr),
+                                                void *_data)
+{
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) {
+    const uint loop_index = i * 4;
+    const uint l0 = loop_index + 0;
+    const uint l1 = loop_index + 1;
+    const uint l2 = loop_index + 2;
+    const uint l3 = loop_index + 3;
+
+    const uint v0 = subdiv_cache->subdiv_loop_subdiv_vert_index[l0];
+    const uint v1 = subdiv_cache->subdiv_loop_subdiv_vert_index[l1];
+    const uint v2 = subdiv_cache->subdiv_loop_subdiv_vert_index[l2];
+    const uint v3 = subdiv_cache->subdiv_loop_subdiv_vert_index[l3];
+
+    lines_adjacency_triangle(v0, v1, v2, l0, l1, l2, data);
+    lines_adjacency_triangle(v0, v2, v3, l0, l2, l3, data);
+  }
+}
+
+static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
+                                                  void *buf,
+                                                  void *_data)
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
+  GPU_indexbuf_build_in_place(&data->elb, ibo);
+  BLI_edgehash_free(data->eh, nullptr);
+  MEM_freeN(data->vert_to_loop);
+}
+
 #undef NO_EDGE
 
 constexpr MeshExtract create_extractor_lines_adjacency()
@@ -180,6 +239,9 @@ constexpr MeshExtract create_extractor_lines_adjacency()
   extractor.iter_looptri_bm = extract_lines_adjacency_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_lines_adjacency_iter_looptri_mesh;
   extractor.finish = extract_lines_adjacency_finish;
+  extractor.init_subdiv = extract_lines_adjacency_init_subdiv;
+  extractor.iter_subdiv = extract_lines_adjacency_iter_subdiv;
+  extractor.finish_subdiv = extract_lines_adjacency_finish_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_LineAdjacency_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
index 01e14a004ed..19167772a42 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
@@ -25,6 +25,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -155,6 +156,74 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr),
   GPU_indexbuf_build_in_place(elb, ibo);
 }
 
+static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                       const MeshRenderData *UNUSED(mr),
+                                       struct MeshBatchCache *UNUSED(cache),
+                                       void *UNUSED(buffer),
+                                       void *data)
+{
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
+  /* Copy the points as the data upload will free them. */
+  elb->data = (uint *)MEM_dupallocN(subdiv_cache->point_indices);
+  elb->index_len = subdiv_cache->num_subdiv_verts;
+  elb->index_min = 0;
+  elb->index_max = subdiv_cache->num_subdiv_loops - 1;
+  elb->prim_type = GPU_PRIM_POINTS;
+}
+
+static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                             const MeshRenderData *UNUSED(mr),
+                                             const MeshExtractLooseGeom *loose_geom,
+                                             void *UNUSED(buffer),
+                                             void *data)
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
+
+  elb->data = static_cast<uint32_t *>(
+      MEM_reallocN(elb->data, sizeof(uint) * (subdiv_cache->num_subdiv_loops + loop_loose_len)));
+
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    if (elb->data[loose_edge->v1] == -1u) {
+      elb->data[loose_edge->v1] = offset;
+    }
+    if (elb->data[loose_edge->v2] == -1u) {
+      elb->data[loose_edge->v2] = offset + 1;
+    }
+    elb->index_max += 2;
+    elb->index_len += 2;
+    offset += 2;
+  }
+
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    if (elb->data[loose_geom->verts[i]] == -1u) {
+      elb->data[loose_geom->verts[i]] = offset;
+    }
+    elb->index_max += 1;
+    elb->index_len += 1;
+    offset += 1;
+  }
+}
+
+static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
+                                         void *buf,
+                                         void *_userdata)
+{
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata);
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
+  GPU_indexbuf_build_in_place(elb, ibo);
+}
+
 constexpr MeshExtract create_extractor_points()
 {
   MeshExtract extractor = {nullptr};
@@ -167,6 +236,9 @@ constexpr MeshExtract create_extractor_points()
   extractor.iter_lvert_mesh = extract_points_iter_lvert_mesh;
   extractor.task_reduce = extract_points_task_reduce;
   extractor.finish = extract_points_finish;
+  extractor.init_subdiv = extract_points_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_points_loose_geom_subdiv;
+  extractor.finish_subdiv = extract_points_finish_subdiv;
   extractor.use_threading = true;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(GPUIndexBufBuilder);
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
index 54e733d3d86..b1ace8bc6c9 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_from)
@@ -123,10 +125,37 @@ static void extract_tris_finish(const MeshRenderData *mr,
   }
 }
 
+static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer);
+  /* Initialize the index buffer, it was already allocated, it will be filled on the device. */
+  GPU_indexbuf_init_build_on_device(ibo, subdiv_cache->num_subdiv_triangles * 3);
+
+  if (cache->tris_per_mat) {
+    for (int i = 0; i < cache->mat_len; i++) {
+      if (cache->tris_per_mat[i] == nullptr) {
+        cache->tris_per_mat[i] = GPU_indexbuf_calloc();
+      }
+
+      /* Multiply by 6 since we have 2 triangles per quad. */
+      const int start = subdiv_cache->mat_start[i] * 6;
+      const int len = (subdiv_cache->mat_end[i] - subdiv_cache->mat_start[i]) * 6;
+      GPU_indexbuf_create_subrange_in_place(cache->tris_per_mat[i], ibo, start, len);
+    }
+  }
+
+  draw_subdiv_build_tris_buffer(subdiv_cache, ibo, cache->mat_len);
+}
+
 constexpr MeshExtract create_extractor_tris()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_tris_init;
+  extractor.init_subdiv = extract_tris_init_subdiv;
   extractor.iter_poly_bm = extract_tris_iter_poly_bm;
   extractor.iter_poly_mesh = extract_tris_iter_poly_mesh;
   extractor.task_reduce = extract_tris_mat_task_reduce;
@@ -214,6 +243,7 @@ constexpr MeshExtract create_extractor_tris_single_mat()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_tris_single_mat_init;
+  extractor.init_subdiv = extract_tris_init_subdiv;
   extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh;
   extractor.task_reduce = extract_tris_mat_task_reduce;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
index 8a5a8134ca7..ea702e5efdd 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
@@ -32,6 +32,7 @@
 
 #include "BKE_attribute.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -153,7 +154,9 @@ static GPUVertCompType get_comp_type_for_type(CustomDataType type)
 
 static void init_vbo_for_attribute(const MeshRenderData *mr,
                                    GPUVertBuf *vbo,
-                                   const DRW_AttributeRequest &request)
+                                   const DRW_AttributeRequest &request,
+                                   bool build_on_device,
+                                   uint32_t len)
 {
   GPUVertCompType comp_type = get_comp_type_for_type(request.cd_type);
   GPUVertFetchMode fetch_mode = get_fetch_mode_for_type(request.cd_type);
@@ -184,8 +187,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
     }
   }
 
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, static_cast<uint32_t>(mr->loop_len));
+  if (build_on_device) {
+    GPU_vertbuf_init_build_on_device(vbo, &format, len);
+  }
+  else {
+    GPU_vertbuf_init_with_format(vbo, &format);
+    GPU_vertbuf_data_alloc(vbo, len);
+  }
 }
 
 template<typename AttributeType, typename VBOType>
@@ -309,7 +317,7 @@ static void extract_attr_init(const MeshRenderData *mr,
 
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
 
-  init_vbo_for_attribute(mr, vbo, request);
+  init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
 
   /* TODO(kevindietrich) : float3 is used for scalar attributes as the implicit conversion done by
    * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the
@@ -346,6 +354,68 @@ static void extract_attr_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *mr,
+                                     MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(tls_data),
+                                     int index)
+{
+  const DRW_MeshAttributes *attrs_used = &cache->attr_used;
+  const DRW_AttributeRequest &request = attrs_used->requests[index];
+
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+
+  const uint32_t dimensions = gpu_component_size_for_attribute_type(request.cd_type);
+
+  /* Prepare VBO for coarse data. The compute shader only expects floats. */
+  GPUVertBuf *src_data = GPU_vertbuf_calloc();
+  static GPUVertFormat coarse_format = {0};
+  GPU_vertformat_attr_add(&coarse_format, "data", GPU_COMP_F32, dimensions, GPU_FETCH_FLOAT);
+  GPU_vertbuf_init_with_format_ex(src_data, &coarse_format, GPU_USAGE_STATIC);
+  GPU_vertbuf_data_alloc(src_data, static_cast<uint32_t>(coarse_mesh->totloop));
+
+  switch (request.cd_type) {
+    case CD_PROP_BOOL: {
+      extract_attr_generic<bool, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_INT32: {
+      extract_attr_generic<int32_t, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT: {
+      extract_attr_generic<float, float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT2: {
+      extract_attr_generic<float2>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_FLOAT3: {
+      extract_attr_generic<float3>(mr, src_data, request);
+      break;
+    }
+    case CD_PROP_COLOR: {
+      extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request);
+      break;
+    }
+    default: {
+      BLI_assert(false);
+    }
+  }
+
+  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
+  init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
+
+  /* Ensure data is uploaded properly. */
+  GPU_vertbuf_tag_dirty(src_data);
+  draw_subdiv_interp_custom_data(
+      subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0);
+
+  GPU_vertbuf_discard(src_data);
+}
+
 /* Wrappers around extract_attr_init so we can pass the index of the attribute that we want to
  * extract. The overall API does not allow us to pass this in a convenient way. */
 #define EXTRACT_INIT_WRAPPER(index) \
@@ -353,6 +423,14 @@ static void extract_attr_init(const MeshRenderData *mr,
       const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \
   { \
     extract_attr_init(mr, cache, buf, tls_data, index); \
+  } \
+  static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \
+                                              const MeshRenderData *mr, \
+                                              struct MeshBatchCache *cache, \
+                                              void *buf, \
+                                              void *tls_data) \
+  { \
+    extract_attr_init_subdiv(subdiv_cache, mr, cache, buf, tls_data, index); \
   }
 
 EXTRACT_INIT_WRAPPER(0)
@@ -371,10 +449,12 @@ EXTRACT_INIT_WRAPPER(12)
 EXTRACT_INIT_WRAPPER(13)
 EXTRACT_INIT_WRAPPER(14)
 
-template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn)
+template<int index>
+constexpr MeshExtract create_extractor_attr(ExtractInitFn fn, ExtractInitSubdivFn subdiv_fn)
 {
   MeshExtract extractor = {nullptr};
   extractor.init = fn;
+  extractor.init_subdiv = subdiv_fn;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
@@ -388,7 +468,8 @@ template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn
 
 extern "C" {
 #define CREATE_EXTRACTOR_ATTR(index) \
-  blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index)
+  blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index, \
+                                              blender::draw::extract_attr_init_subdiv##index)
 
 const MeshExtract extract_attr[GPU_MAX_ATTR] = {
     CREATE_EXTRACTOR_ATTR(0),
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
index 2e2444a8e3d..5ee34d7fdb2 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
@@ -25,6 +25,7 @@
 
 #include "GPU_capabilities.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -216,6 +217,86 @@ static void extract_edge_fac_finish(const MeshRenderData *mr,
   MEM_SAFE_FREE(data->edge_loop_count);
 }
 
+/* Different function than the one used for the non-subdivision case, as we directly take care of
+ * the buggy AMD driver case. */
+static GPUVertFormat *get_subdiv_edge_fac_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    if (GPU_crappy_amd_driver()) {
+      GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    }
+    else {
+      GPU_vertformat_attr_add(&format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    }
+  }
+  return &format;
+}
+
+static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         struct MeshBatchCache *cache,
+                                         void *buffer,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *edge_idx = cache->final.buff.vbo.edge_idx;
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPU_vertbuf_init_build_on_device(
+      vbo, get_subdiv_edge_fac_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+
+  /* Create a temporary buffer for the edge original indices if it was not requested. */
+  const bool has_edge_idx = edge_idx != nullptr;
+  GPUVertBuf *loop_edge_idx = nullptr;
+  if (has_edge_idx) {
+    loop_edge_idx = edge_idx;
+  }
+  else {
+    loop_edge_idx = GPU_vertbuf_calloc();
+    draw_subdiv_init_origindex_buffer(
+        loop_edge_idx,
+        static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)),
+        subdiv_cache->num_subdiv_loops,
+        0);
+  }
+
+  draw_subdiv_build_edge_fac_buffer(subdiv_cache, pos_nor, loop_edge_idx, vbo);
+
+  if (!has_edge_idx) {
+    GPU_vertbuf_discard(loop_edge_idx);
+  }
+}
+
+static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  /* Make sure buffer is active for sending loose data. */
+  GPU_vertbuf_use(vbo);
+
+  uint offset = subdiv_cache->num_subdiv_loops;
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    if (GPU_crappy_amd_driver()) {
+      float loose_edge_fac[2] = {1.0f, 1.0f};
+      GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
+    }
+    else {
+      char loose_edge_fac[2] = {255, 255};
+      GPU_vertbuf_update_sub(vbo, offset * sizeof(char), sizeof(loose_edge_fac), loose_edge_fac);
+    }
+
+    offset += 2;
+  }
+}
+
 constexpr MeshExtract create_extractor_edge_fac()
 {
   MeshExtract extractor = {nullptr};
@@ -224,6 +305,8 @@ constexpr MeshExtract create_extractor_edge_fac()
   extractor.iter_poly_mesh = extract_edge_fac_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_edge_fac_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_edge_fac_iter_ledge_mesh;
+  extractor.init_subdiv = extract_edge_fac_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edge_fac_loose_geom_subdiv;
   extractor.finish = extract_edge_fac_finish;
   extractor.data_type = MR_DATA_POLY_NOR;
   extractor.data_size = sizeof(MeshExtract_EdgeFac_Data);
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
index 5232346e51e..eef64085c95 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
@@ -25,6 +25,8 @@
 
 #include "draw_cache_impl.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -107,19 +109,25 @@ static void mesh_render_data_vert_flag(const MeshRenderData *mr,
   }
 }
 
-static void extract_edit_data_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
-                                   void *buf,
-                                   void *tls_data)
+static GPUVertFormat *get_edit_data_format(void)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* WARNING: Adjust #EditLoopData struct accordingly. */
     GPU_vertformat_attr_add(&format, "data", GPU_COMP_U8, 4, GPU_FETCH_INT);
     GPU_vertformat_alias_add(&format, "flag");
   }
-  GPU_vertbuf_init_with_format(vbo, &format);
+  return &format;
+}
+
+static void extract_edit_data_init(const MeshRenderData *mr,
+                                   struct MeshBatchCache *UNUSED(cache),
+                                   void *buf,
+                                   void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat *format = get_edit_data_format();
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len + mr->loop_loose_len);
   EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
   *(EditLoopData **)tls_data = vbo_data;
@@ -240,6 +248,80 @@ static void extract_edit_data_iter_lvert_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_edit_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                          const MeshRenderData *mr,
+                                          MeshBatchCache *UNUSED(cache),
+                                          void *buf,
+                                          void *data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPU_vertbuf_init_with_format(vbo, get_edit_data_format());
+  GPU_vertbuf_data_alloc(vbo, subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+  EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
+  *(EditLoopData **)data = vbo_data;
+}
+
+static void extract_edit_data_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                          const MeshRenderData *mr,
+                                          void *_data)
+{
+  EditLoopData *vbo_data = *(EditLoopData **)_data;
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int edge_origindex = subdiv_loop_edge_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+
+    EditLoopData *edit_loop_data = &vbo_data[i];
+    memset(edit_loop_data, 0, sizeof(EditLoopData));
+
+    if (vert_origindex != -1) {
+      const BMVert *eve = bm_original_vert_get(mr, vert_origindex);
+      if (eve) {
+        mesh_render_data_vert_flag(mr, eve, edit_loop_data);
+      }
+    }
+
+    if (edge_origindex != -1) {
+      const BMEdge *eed = bm_original_edge_get(mr, edge_origindex);
+      if (eed) {
+        mesh_render_data_edge_flag(mr, eed, edit_loop_data);
+      }
+    }
+
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+    /* The -1 parameter is for edit_uvs, which we don't do here. */
+    mesh_render_data_face_flag(mr, efa, -1, edit_loop_data);
+  }
+}
+
+static void extract_edit_data_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                const MeshRenderData *mr,
+                                                const MeshExtractLooseGeom *loose_geom,
+                                                void *UNUSED(buffer),
+                                                void *_data)
+{
+  if (loose_geom->edge_len == 0) {
+    return;
+  }
+
+  EditLoopData *vbo_data = *(EditLoopData **)_data;
+
+  for (int ledge_index = 0; ledge_index < loose_geom->edge_len; ledge_index++) {
+    const int offset = subdiv_cache->num_subdiv_loops + ledge_index * 2;
+    EditLoopData *data = &vbo_data[offset];
+    memset(data, 0, sizeof(EditLoopData));
+    BMEdge *eed = bm_original_edge_get(mr, loose_geom->edges[ledge_index]);
+    mesh_render_data_edge_flag(mr, eed, &data[0]);
+    data[1] = data[0];
+    mesh_render_data_vert_flag(mr, eed->v1, &data[0]);
+    mesh_render_data_vert_flag(mr, eed->v2, &data[1]);
+  }
+}
+
 constexpr MeshExtract create_extractor_edit_data()
 {
   MeshExtract extractor = {nullptr};
@@ -250,6 +332,9 @@ constexpr MeshExtract create_extractor_edit_data()
   extractor.iter_ledge_mesh = extract_edit_data_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_edit_data_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_edit_data_iter_lvert_mesh;
+  extractor.init_subdiv = extract_edit_data_init_subdiv;
+  extractor.iter_subdiv = extract_edit_data_iter_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edit_data_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(EditLoopData *);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
index b8494428eed..067d482bc2b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
@@ -25,6 +25,8 @@
 
 #include "draw_cache_impl.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -36,12 +38,11 @@ struct MeshExtract_EditUVData_Data {
   int cd_ofs;
 };
 
-static void extract_edituv_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
-                                     void *buf,
-                                     void *tls_data)
+static void extract_edituv_data_init_common(const MeshRenderData *mr,
+                                            GPUVertBuf *vbo,
+                                            MeshExtract_EditUVData_Data *data,
+                                            uint loop_len)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* WARNING: Adjust #EditLoopData struct accordingly. */
@@ -50,15 +51,23 @@ static void extract_edituv_data_init(const MeshRenderData *mr,
   }
 
   GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, mr->loop_len);
+  GPU_vertbuf_data_alloc(vbo, loop_len);
 
   CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
-
-  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
   data->vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo);
   data->cd_ofs = CustomData_get_offset(cd_ldata, CD_MLOOPUV);
 }
 
+static void extract_edituv_data_init(const MeshRenderData *mr,
+                                     struct MeshBatchCache *UNUSED(cache),
+                                     void *buf,
+                                     void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
+  extract_edituv_data_init_common(mr, vbo, data, mr->loop_len);
+}
+
 static void extract_edituv_data_iter_poly_bm(const MeshRenderData *mr,
                                              const BMFace *f,
                                              const int UNUSED(f_index),
@@ -119,12 +128,54 @@ static void extract_edituv_data_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_edituv_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            MeshBatchCache *UNUSED(cache),
+                                            void *buf,
+                                            void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data);
+  extract_edituv_data_init_common(mr, vbo, data, subdiv_cache->num_subdiv_loops);
+}
+
+static void extract_edituv_data_iter_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            void *_data)
+{
+  MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(_data);
+  int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
+  int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int vert_origindex = subdiv_loop_vert_index[i];
+    const int edge_origindex = subdiv_loop_edge_index[i];
+    const int poly_origindex = subdiv_loop_poly_index[i];
+
+    EditLoopData *edit_loop_data = &data->vbo_data[i];
+    memset(edit_loop_data, 0, sizeof(EditLoopData));
+
+    BMFace *efa = bm_original_face_get(mr, poly_origindex);
+
+    if (vert_origindex != -1 && edge_origindex != -1) {
+      BMEdge *eed = bm_original_edge_get(mr, edge_origindex);
+      /* Loop on an edge endpoint. */
+      BMLoop *l = BM_face_edge_share_loop(efa, eed);
+      mesh_render_data_loop_flag(mr, l, data->cd_ofs, edit_loop_data);
+      mesh_render_data_loop_edge_flag(mr, l, data->cd_ofs, edit_loop_data);
+    }
+  }
+}
+
 constexpr MeshExtract create_extractor_edituv_data()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_data_init;
   extractor.iter_poly_bm = extract_edituv_data_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_data_iter_poly_mesh;
+  extractor.init_subdiv = extract_edituv_data_init_subdiv;
+  extractor.iter_subdiv = extract_edituv_data_iter_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_EditUVData_Data);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
index a947d98f955..0ea4ef5d5db 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -213,12 +215,69 @@ static void extract_edituv_stretch_angle_iter_poly_mesh(const MeshRenderData *mr
   }
 }
 
+static GPUVertFormat *get_edituv_stretch_angle_format_subdiv()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    /* Waning: adjust #UVStretchAngle struct accordingly. */
+    GPU_vertformat_attr_add(&format, "angle", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "uv_angles", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
+static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                     const MeshRenderData *mr,
+                                                     struct MeshBatchCache *cache,
+                                                     void *buffer,
+                                                     void *UNUSED(tls_data))
+{
+  GPUVertBuf *refined_vbo = static_cast<GPUVertBuf *>(buffer);
+
+  GPU_vertbuf_init_build_on_device(
+      refined_vbo, get_edituv_stretch_angle_format_subdiv(), subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  GPUVertBuf *uvs = cache->final.buff.vbo.uv;
+
+  /* UVs are stored contiguouly so we need to compute the offset in the UVs buffer for the active
+   * UV layer. */
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_MESH) ? &mr->me->ldata : &mr->bm->ldata;
+
+  uint32_t uv_layers = cache->cd_used.uv;
+  /* HACK to fix T68857 */
+  if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
+    int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
+    if (layer != -1) {
+      uv_layers |= (1 << layer);
+    }
+  }
+
+  int uvs_offset = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (uv_layers & (1 << i)) {
+      if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) {
+        break;
+      }
+
+      uvs_offset += 1;
+    }
+  }
+
+  /* The data is at `offset * num loops`, and we have 2 values per index. */
+  uvs_offset *= subdiv_cache->num_subdiv_loops * 2;
+
+  draw_subdiv_build_edituv_stretch_angle_buffer(
+      subdiv_cache, pos_nor, uvs, uvs_offset, refined_vbo);
+}
+
 constexpr MeshExtract create_extractor_edituv_edituv_stretch_angle()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_stretch_angle_init;
   extractor.iter_poly_bm = extract_edituv_stretch_angle_iter_poly_bm;
   extractor.iter_poly_mesh = extract_edituv_stretch_angle_iter_poly_mesh;
+  extractor.init_subdiv = extract_edituv_stretch_angle_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_StretchAngle_Data);
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
index 3db8cd79af5..3b40b3115f5 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
@@ -27,6 +27,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -63,14 +65,12 @@ BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_t
   return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
 }
 
-static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
-                                               struct MeshBatchCache *cache,
-                                               void *buf,
-                                               void *UNUSED(data))
+static void compute_area_ratio(const MeshRenderData *mr,
+                               float *r_area_ratio,
+                               float &r_tot_area,
+                               float &r_tot_uv_area)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   float tot_area = 0.0f, tot_uv_area = 0.0f;
-  float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
 
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     CustomData *cd_ldata = &mr->bm->ldata;
@@ -84,7 +84,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
       float uvarea = BM_face_calc_area_uv(efa, uv_ofs);
       tot_area += area;
       tot_uv_area += uvarea;
-      area_ratio[f] = area_ratio_get(area, uvarea);
+      r_area_ratio[f] = area_ratio_get(area, uvarea);
     }
   }
   else {
@@ -96,12 +96,22 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
       float uvarea = BKE_mesh_calc_poly_uv_area(mp, uv_data);
       tot_area += area;
       tot_uv_area += uvarea;
-      area_ratio[mp_index] = area_ratio_get(area, uvarea);
+      r_area_ratio[mp_index] = area_ratio_get(area, uvarea);
     }
   }
 
-  cache->tot_area = tot_area;
-  cache->tot_uv_area = tot_uv_area;
+  r_tot_area = tot_area;
+  r_tot_uv_area = tot_uv_area;
+}
+
+static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
+                                               struct MeshBatchCache *cache,
+                                               void *buf,
+                                               void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
+  compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area);
 
   /* Convert in place to avoid an extra allocation */
   uint16_t *poly_stretch = (uint16_t *)area_ratio;
@@ -135,11 +145,46 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
   MEM_freeN(area_ratio);
 }
 
+static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                                    const MeshRenderData *mr,
+                                                    struct MeshBatchCache *cache,
+                                                    void *buffer,
+                                                    void *UNUSED(data))
+{
+
+  /* Initialise final buffer. */
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+
+  GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops);
+
+  /* Initialize coarse data buffer. */
+
+  GPUVertBuf *coarse_data = GPU_vertbuf_calloc();
+
+  /* We use the same format as we just copy data around. */
+  GPU_vertbuf_init_with_format(coarse_data, &format);
+  GPU_vertbuf_data_alloc(coarse_data, mr->loop_len);
+
+  compute_area_ratio(mr,
+                     static_cast<float *>(GPU_vertbuf_get_data(coarse_data)),
+                     cache->tot_area,
+                     cache->tot_uv_area);
+
+  draw_subdiv_build_edituv_stretch_area_buffer(subdiv_cache, coarse_data, vbo);
+
+  GPU_vertbuf_discard(coarse_data);
+}
+
 constexpr MeshExtract create_extractor_edituv_stretch_area()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_edituv_stretch_area_init;
   extractor.finish = extract_edituv_stretch_area_finish;
+  extractor.init_subdiv = extract_edituv_stretch_area_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
index 33f9180e122..f65159f9b95 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
@@ -23,24 +23,40 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
 /** \name Extract Face-dots positions
  * \{ */
 
-static void extract_fdots_pos_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
-                                   void *buf,
-                                   void *tls_data)
+static GPUVertFormat *get_fdots_pos_format()
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
   }
+  return &format;
+}
+
+static GPUVertFormat *get_fdots_nor_format_subdiv()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "norAndFlag", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
 
-  GPU_vertbuf_init_with_format(vbo, &format);
+static void extract_fdots_pos_init(const MeshRenderData *mr,
+                                   struct MeshBatchCache *UNUSED(cache),
+                                   void *buf,
+                                   void *tls_data)
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat *format = get_fdots_pos_format();
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->poly_len);
   void *vbo_data = GPU_vertbuf_get_data(vbo);
   *(float(**)[3])tls_data = static_cast<float(*)[3]>(vbo_data);
@@ -97,10 +113,30 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                      const MeshRenderData *UNUSED(mr),
+                                      struct MeshBatchCache *cache,
+                                      void *buffer,
+                                      void *UNUSED(data))
+{
+  /* We "extract" positions, normals, and indices at once. */
+  GPUVertBuf *fdots_pos_vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertBuf *fdots_nor_vbo = cache->final.buff.vbo.fdots_nor;
+  GPUIndexBuf *fdots_pos_ibo = cache->final.buff.ibo.fdots;
+
+  GPU_vertbuf_init_build_on_device(
+      fdots_nor_vbo, get_fdots_nor_format_subdiv(), subdiv_cache->num_coarse_poly);
+  GPU_vertbuf_init_build_on_device(
+      fdots_pos_vbo, get_fdots_pos_format(), subdiv_cache->num_coarse_poly);
+  GPU_indexbuf_init_build_on_device(fdots_pos_ibo, subdiv_cache->num_coarse_poly);
+  draw_subdiv_build_fdots_buffers(subdiv_cache, fdots_pos_vbo, fdots_nor_vbo, fdots_pos_ibo);
+}
+
 constexpr MeshExtract create_extractor_fdots_pos()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_fdots_pos_init;
+  extractor.init_subdiv = extract_fdots_init_subdiv;
   extractor.iter_poly_bm = extract_fdots_pos_iter_poly_bm;
   extractor.iter_poly_mesh = extract_fdots_pos_iter_poly_mesh;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
index 3c3ac7a7a0a..d30c38ef050 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
@@ -23,6 +23,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -107,10 +109,34 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static GPUVertFormat *get_subdiv_lnor_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "lnor");
+  }
+  return &format;
+}
+
+static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor;
+  BLI_assert(pos_nor);
+  GPU_vertbuf_init_build_on_device(vbo, get_subdiv_lnor_format(), subdiv_cache->num_subdiv_loops);
+  draw_subdiv_build_lnor_buffer(subdiv_cache, pos_nor, vbo);
+}
+
 constexpr MeshExtract create_extractor_lnor()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_lnor_init;
+  extractor.init_subdiv = extract_lnor_init_subdiv;
   extractor.iter_poly_bm = extract_lnor_iter_poly_bm;
   extractor.iter_poly_mesh = extract_lnor_iter_poly_mesh;
   extractor.data_type = MR_DATA_LOOP_NOR;
@@ -210,6 +236,7 @@ constexpr MeshExtract create_extractor_lnor_hq()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_lnor_hq_init;
+  extractor.init_subdiv = extract_lnor_init_subdiv;
   extractor.iter_poly_bm = extract_lnor_hq_iter_poly_bm;
   extractor.iter_poly_mesh = extract_lnor_hq_iter_poly_mesh;
   extractor.data_type = MR_DATA_LOOP_NOR;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
index eb9a138590c..00ed4ca6359 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
@@ -25,6 +25,8 @@
 
 #include "extract_mesh.h"
 
+#include "draw_subdivision.h"
+
 namespace blender::draw {
 
 /* ---------------------------------------------------------------------- */
@@ -194,6 +196,123 @@ static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr),
   MEM_freeN(data->normals);
 }
 
+static GPUVertFormat *get_pos_nor_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "vnor");
+  }
+  return &format;
+}
+
+static GPUVertFormat *get_normals_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "lnor");
+  }
+  return &format;
+}
+
+static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                        const MeshRenderData *mr,
+                                        struct MeshBatchCache *UNUSED(cache),
+                                        void *buffer,
+                                        void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  const bool do_limit_normals = subdiv_cache->do_limit_normals;
+
+  /* Initialize the vertex buffer, it was already allocated. */
+  GPU_vertbuf_init_build_on_device(
+      vbo, get_pos_nor_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len);
+
+  draw_subdiv_extract_pos_nor(subdiv_cache, vbo, do_limit_normals);
+
+  if (!do_limit_normals) {
+    /* We cannot evaluate vertex normals using the limit surface, so compute them manually. */
+    GPUVertBuf *subdiv_loop_subdiv_vert_index = draw_subdiv_build_origindex_buffer(
+        subdiv_cache->subdiv_loop_subdiv_vert_index, subdiv_cache->num_subdiv_loops);
+
+    GPUVertBuf *vertex_normals = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_build_on_device(
+        vertex_normals, get_normals_format(), subdiv_cache->num_subdiv_verts);
+
+    draw_subdiv_accumulate_normals(subdiv_cache,
+                                   vbo,
+                                   subdiv_cache->subdiv_vertex_face_adjacency_offsets,
+                                   subdiv_cache->subdiv_vertex_face_adjacency,
+                                   vertex_normals);
+
+    draw_subdiv_finalize_normals(subdiv_cache, vertex_normals, subdiv_loop_subdiv_vert_index, vbo);
+
+    GPU_vertbuf_discard(vertex_normals);
+    GPU_vertbuf_discard(subdiv_loop_subdiv_vert_index);
+  }
+}
+
+static void extract_pos_nor_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                              const MeshRenderData *UNUSED(mr),
+                                              const MeshExtractLooseGeom *loose_geom,
+                                              void *buffer,
+                                              void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+  const MVert *coarse_verts = coarse_mesh->mvert;
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  /* TODO(kevindietrich) : replace this when compressed normals are supported. */
+  struct SubdivPosNorLoop {
+    float pos[3];
+    float nor[3];
+    float flag;
+  };
+
+  SubdivPosNorLoop edge_data[2];
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    const MVert *loose_vert1 = &coarse_verts[loose_edge->v1];
+    const MVert *loose_vert2 = &coarse_verts[loose_edge->v2];
+
+    copy_v3_v3(edge_data[0].pos, loose_vert1->co);
+    normal_short_to_float_v3(edge_data[0].nor, loose_vert1->no);
+    edge_data[0].flag = 0.0f;
+
+    copy_v3_v3(edge_data[1].pos, loose_vert2->co);
+    normal_short_to_float_v3(edge_data[1].nor, loose_vert2->no);
+    edge_data[1].flag = 0.0f;
+
+    GPU_vertbuf_update_sub(
+        vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop) * 2, &edge_data);
+
+    offset += 2;
+  }
+
+  SubdivPosNorLoop vert_data;
+  vert_data.flag = 0.0f;
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    const MVert *loose_vertex = &coarse_verts[loose_geom->verts[i]];
+
+    copy_v3_v3(vert_data.pos, loose_vertex->co);
+    normal_short_to_float_v3(vert_data.nor, loose_vertex->no);
+
+    GPU_vertbuf_update_sub(
+        vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop), &vert_data);
+
+    offset += 1;
+  }
+}
+
 constexpr MeshExtract create_extractor_pos_nor()
 {
   MeshExtract extractor = {nullptr};
@@ -205,6 +324,8 @@ constexpr MeshExtract create_extractor_pos_nor()
   extractor.iter_lvert_bm = extract_pos_nor_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_pos_nor_iter_lvert_mesh;
   extractor.finish = extract_pos_nor_finish;
+  extractor.init_subdiv = extract_pos_nor_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_pos_nor_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(MeshExtract_PosNor_Data);
   extractor.use_threading = true;
@@ -391,6 +512,7 @@ constexpr MeshExtract create_extractor_pos_nor_hq()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_pos_nor_hq_init;
+  extractor.init_subdiv = extract_pos_nor_init_subdiv;
   extractor.iter_poly_bm = extract_pos_nor_hq_iter_poly_bm;
   extractor.iter_poly_mesh = extract_pos_nor_hq_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_pos_nor_hq_iter_ledge_bm;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
index fd91bc5258f..753fbe7e0e2 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
@@ -27,6 +27,7 @@
 
 #include "BKE_paint.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -35,13 +36,23 @@ namespace blender::draw {
 /** \name Extract Sculpt Data
  * \{ */
 
+static GPUVertFormat *get_sculpt_data_format()
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+  return &format;
+}
+
 static void extract_sculpt_data_init(const MeshRenderData *mr,
                                      struct MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(tls_data))
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
+  GPUVertFormat *format = get_sculpt_data_format();
 
   CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata : &mr->me->vdata;
@@ -50,12 +61,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
   float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
   int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS);
 
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
-  }
-
-  GPU_vertbuf_init_with_format(vbo, &format);
+  GPU_vertbuf_init_with_format(vbo, format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len);
 
   struct gpuSculptData {
@@ -121,10 +127,99 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                            const MeshRenderData *mr,
+                                            struct MeshBatchCache *UNUSED(cache),
+                                            void *buffer,
+                                            void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  Mesh *coarse_mesh = mr->me;
+  CustomData *cd_vdata = &coarse_mesh->vdata;
+  CustomData *cd_pdata = &coarse_mesh->pdata;
+
+  /* First, interpolate mask if available. */
+  GPUVertBuf *mask_vbo = nullptr;
+  GPUVertBuf *subdiv_mask_vbo = nullptr;
+  float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
+
+  if (cd_mask) {
+    GPUVertFormat mask_format = {0};
+    GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+
+    mask_vbo = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_with_format(mask_vbo, &mask_format);
+    GPU_vertbuf_data_alloc(mask_vbo, coarse_mesh->totloop);
+    float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo));
+
+    for (int i = 0; i < coarse_mesh->totpoly; i++) {
+      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+
+      for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
+           loop_index++) {
+        const MLoop *ml = &coarse_mesh->mloop[loop_index];
+        *v_mask++ = cd_mask[ml->v];
+      }
+    }
+
+    subdiv_mask_vbo = GPU_vertbuf_calloc();
+    GPU_vertbuf_init_build_on_device(
+        subdiv_mask_vbo, &mask_format, subdiv_cache->num_subdiv_loops);
+
+    draw_subdiv_interp_custom_data(subdiv_cache, mask_vbo, subdiv_mask_vbo, 1, 0);
+  }
+
+  /* Then, gather face sets. */
+  GPUVertFormat face_set_format = {0};
+  GPU_vertformat_attr_add(&face_set_format, "msk", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+
+  GPUVertBuf *face_set_vbo = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(face_set_vbo, &face_set_format);
+  GPU_vertbuf_data_alloc(face_set_vbo, subdiv_cache->num_subdiv_loops);
+
+  struct gpuFaceSet {
+    uint8_t color[4];
+  };
+
+  gpuFaceSet *face_sets = (gpuFaceSet *)GPU_vertbuf_get_data(face_set_vbo);
+  int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS);
+
+  GPUVertFormat *format = get_sculpt_data_format();
+  GPU_vertbuf_init_build_on_device(vbo, format, subdiv_cache->num_subdiv_loops);
+  int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index;
+
+  for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) {
+    const int mp_index = subdiv_loop_poly_index[i];
+
+    uchar face_set_color[4] = {UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX};
+    if (cd_face_set) {
+      const int face_set_id = cd_face_set[mp_index];
+      /* Skip for the default color Face Set to render it white. */
+      if (face_set_id != coarse_mesh->face_sets_color_default) {
+        BKE_paint_face_set_overlay_color_get(
+            face_set_id, coarse_mesh->face_sets_color_seed, face_set_color);
+      }
+    }
+    copy_v3_v3_uchar(face_sets->color, face_set_color);
+    face_sets++;
+  }
+
+  /* Finally, interleave mask and face sets. */
+  draw_subdiv_build_sculpt_data_buffer(subdiv_cache, subdiv_mask_vbo, face_set_vbo, vbo);
+
+  if (mask_vbo) {
+    GPU_vertbuf_discard(mask_vbo);
+    GPU_vertbuf_discard(subdiv_mask_vbo);
+  }
+  GPU_vertbuf_discard(face_set_vbo);
+}
+
 constexpr MeshExtract create_extractor_sculpt_data()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_sculpt_data_init;
+  extractor.init_subdiv = extract_sculpt_data_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
index 5ac30dd3be9..33c27b45627 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
@@ -21,6 +21,7 @@
  * \ingroup draw
  */
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -196,12 +197,104 @@ static void extract_vert_idx_iter_lvert_mesh(const MeshRenderData *mr,
   (*(uint32_t **)data)[offset + lvert_index] = v_orig;
 }
 
+static void extract_vert_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  /* Each element points to an element in the ibo.points. */
+  draw_subdiv_init_origindex_buffer(vbo,
+                                    subdiv_cache->subdiv_loop_subdiv_vert_index,
+                                    subdiv_cache->num_subdiv_loops,
+                                    mr->loop_loose_len);
+}
+
+static void extract_vert_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo);
+  const Mesh *coarse_mesh = subdiv_cache->mesh;
+  const MEdge *coarse_edges = coarse_mesh->medge;
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]];
+    vert_idx_data[offset] = loose_edge->v1;
+    vert_idx_data[offset + 1] = loose_edge->v2;
+    offset += 2;
+  }
+
+  for (int i = 0; i < loose_geom->vert_len; i++) {
+    vert_idx_data[offset] = loose_geom->verts[i];
+    offset += 1;
+  }
+}
+
+static void extract_edge_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *mr,
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  draw_subdiv_init_origindex_buffer(
+      vbo,
+      static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)),
+      subdiv_cache->num_subdiv_loops,
+      mr->edge_loose_len * 2);
+}
+
+static void extract_edge_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
+                                               const MeshRenderData *UNUSED(mr),
+                                               const MeshExtractLooseGeom *loose_geom,
+                                               void *buffer,
+                                               void *UNUSED(data))
+{
+  const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len;
+  if (loop_loose_len == 0) {
+    return;
+  }
+
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo);
+  uint offset = subdiv_cache->num_subdiv_loops;
+
+  for (int i = 0; i < loose_geom->edge_len; i++) {
+    vert_idx_data[offset] = loose_geom->edges[i];
+    vert_idx_data[offset + 1] = loose_geom->edges[i];
+    offset += 2;
+  }
+}
+
+static void extract_poly_idx_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                         const MeshRenderData *UNUSED(mr),
+                                         MeshBatchCache *UNUSED(cache),
+                                         void *buf,
+                                         void *UNUSED(data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  draw_subdiv_init_origindex_buffer(
+      vbo, subdiv_cache->subdiv_loop_poly_index, subdiv_cache->num_subdiv_loops, 0);
+}
+
 constexpr MeshExtract create_extractor_poly_idx()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_select_idx_init;
   extractor.iter_poly_bm = extract_poly_idx_iter_poly_bm;
   extractor.iter_poly_mesh = extract_poly_idx_iter_poly_mesh;
+  extractor.init_subdiv = extract_poly_idx_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
@@ -217,6 +310,8 @@ constexpr MeshExtract create_extractor_edge_idx()
   extractor.iter_poly_mesh = extract_edge_idx_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_edge_idx_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_edge_idx_iter_ledge_mesh;
+  extractor.init_subdiv = extract_edge_idx_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_edge_idx_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
@@ -234,6 +329,8 @@ constexpr MeshExtract create_extractor_vert_idx()
   extractor.iter_ledge_mesh = extract_vert_idx_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_vert_idx_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_vert_idx_iter_lvert_mesh;
+  extractor.init_subdiv = extract_vert_idx_init_subdiv;
+  extractor.iter_loose_geom_subdiv = extract_vert_idx_loose_geom_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(uint32_t *);
   extractor.use_threading = true;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
index af279b08a59..6e9d8ef6926 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
@@ -23,6 +23,7 @@
 
 #include "BLI_string.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -31,25 +32,27 @@ namespace blender::draw {
 /** \name Extract UV  layers
  * \{ */
 
-static void extract_uv_init(const MeshRenderData *mr,
-                            struct MeshBatchCache *cache,
-                            void *buf,
-                            void *UNUSED(tls_data))
+/* Initialize the vertex format to be used for UVs. Return true if any UV layer is
+ * found, false otherwise. */
+static bool mesh_extract_uv_format_init(GPUVertFormat *format,
+                                        struct MeshBatchCache *cache,
+                                        CustomData *cd_ldata,
+                                        eMRExtractType extract_type,
+                                        uint32_t &r_uv_layers)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-  GPU_vertformat_deinterleave(&format);
+  GPU_vertformat_deinterleave(format);
 
-  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   uint32_t uv_layers = cache->cd_used.uv;
   /* HACK to fix T68857 */
-  if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
+  if (extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) {
     int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
     if (layer != -1) {
       uv_layers |= (1 << layer);
     }
   }
 
+  r_uv_layers = uv_layers;
+
   for (int i = 0; i < MAX_MTFACE; i++) {
     if (uv_layers & (1 << i)) {
       char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
@@ -58,30 +61,47 @@ static void extract_uv_init(const MeshRenderData *mr,
       GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
       /* UV layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "u%s", attr_safe_name);
-      GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+      GPU_vertformat_attr_add(format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
       /* Auto layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-      GPU_vertformat_alias_add(&format, attr_name);
+      GPU_vertformat_alias_add(format, attr_name);
       /* Active render layer name. */
       if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "u");
+        GPU_vertformat_alias_add(format, "u");
       }
       /* Active display layer name. */
       if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "au");
+        GPU_vertformat_alias_add(format, "au");
         /* Alias to `pos` for edit uvs. */
-        GPU_vertformat_alias_add(&format, "pos");
+        GPU_vertformat_alias_add(format, "pos");
       }
       /* Stencil mask uv layer name. */
       if (i == CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV)) {
-        GPU_vertformat_alias_add(&format, "mu");
+        GPU_vertformat_alias_add(format, "mu");
       }
     }
   }
 
+  if (format->attr_len == 0) {
+    GPU_vertformat_attr_add(format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    return false;
+  }
+
+  return true;
+}
+
+static void extract_uv_init(const MeshRenderData *mr,
+                            struct MeshBatchCache *cache,
+                            void *buf,
+                            void *UNUSED(tls_data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat format = {0};
+
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   int v_len = mr->loop_len;
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  uint32_t uv_layers = cache->cd_used.uv;
+  if (!mesh_extract_uv_format_init(&format, cache, cd_ldata, mr->extract_type, uv_layers)) {
     /* VBO will not be used, only allocate minimum of memory. */
     v_len = 1;
   }
@@ -116,10 +136,45 @@ static void extract_uv_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                   const MeshRenderData *UNUSED(mr),
+                                   struct MeshBatchCache *cache,
+                                   void *buffer,
+                                   void *UNUSED(data))
+{
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+  GPUVertFormat format = {0};
+
+  uint v_len = subdiv_cache->num_subdiv_loops;
+  uint uv_layers;
+  if (!mesh_extract_uv_format_init(
+          &format, cache, &coarse_mesh->ldata, MR_EXTRACT_MESH, uv_layers)) {
+    // TODO(kevindietrich): handle this more gracefully.
+    v_len = 1;
+  }
+
+  GPU_vertbuf_init_build_on_device(vbo, &format, v_len);
+
+  if (uv_layers == 0) {
+    return;
+  }
+
+  /* Index of the UV layer in the compact buffer. Used UV layers are stored in a single buffer. */
+  int pack_layer_index = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (uv_layers & (1 << i)) {
+      const int offset = (int)subdiv_cache->num_subdiv_loops * pack_layer_index++;
+      draw_subdiv_extract_uvs(subdiv_cache, vbo, i, offset);
+    }
+  }
+}
+
 constexpr MeshExtract create_extractor_uv()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_uv_init;
+  extractor.init_subdiv = extract_uv_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
index f8878eb2617..ea7810bcf6b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
@@ -25,6 +25,7 @@
 
 #include "BLI_string.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -33,17 +34,14 @@ namespace blender::draw {
 /** \name Extract VCol
  * \{ */
 
-static void extract_vcol_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data))
+/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */
+static void init_vcol_format(GPUVertFormat *format,
+                             const MeshBatchCache *cache,
+                             CustomData *cd_ldata)
 {
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-  GPU_vertformat_deinterleave(&format);
+  GPU_vertformat_deinterleave(format);
 
-  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
-  uint32_t vcol_layers = cache->cd_used.vcol;
+  const uint32_t vcol_layers = cache->cd_used.vcol;
 
   for (int i = 0; i < MAX_MCOL; i++) {
     if (vcol_layers & (1 << i)) {
@@ -52,31 +50,56 @@ static void extract_vcol_init(const MeshRenderData *mr,
       GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
       BLI_snprintf(attr_name, sizeof(attr_name), "c%s", attr_safe_name);
-      GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
+      GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
       if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPCOL)) {
-        GPU_vertformat_alias_add(&format, "c");
+        GPU_vertformat_alias_add(format, "c");
       }
       if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL)) {
-        GPU_vertformat_alias_add(&format, "ac");
+        GPU_vertformat_alias_add(format, "ac");
       }
 
       /* Gather number of auto layers. */
       /* We only do `vcols` that are not overridden by `uvs`. */
       if (CustomData_get_named_layer_index(cd_ldata, CD_MLOOPUV, layer_name) == -1) {
         BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-        GPU_vertformat_alias_add(&format, attr_name);
+        GPU_vertformat_alias_add(format, attr_name);
       }
     }
   }
+}
+
+/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision
+ * case. */
+static GPUVertFormat *get_coarse_vcol_format(void)
+{
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_alias_add(&format, "c");
+    GPU_vertformat_alias_add(&format, "ac");
+  }
+  return &format;
+}
+
+using gpuMeshVcol = struct gpuMeshVcol {
+  ushort r, g, b, a;
+};
+
+static void extract_vcol_init(const MeshRenderData *mr,
+                              struct MeshBatchCache *cache,
+                              void *buf,
+                              void *UNUSED(tls_data))
+{
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
+  GPUVertFormat format = {0};
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
+  const uint32_t vcol_layers = cache->cd_used.vcol;
+  init_vcol_format(&format, cache, cd_ldata);
 
   GPU_vertbuf_init_with_format(vbo, &format);
   GPU_vertbuf_data_alloc(vbo, mr->loop_len);
 
-  using gpuMeshVcol = struct gpuMeshVcol {
-    ushort r, g, b, a;
-  };
-
   gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo);
 
   for (int i = 0; i < MAX_MCOL; i++) {
@@ -111,10 +134,64 @@ static void extract_vcol_init(const MeshRenderData *mr,
   }
 }
 
+static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                     const MeshRenderData *UNUSED(mr),
+                                     struct MeshBatchCache *cache,
+                                     void *buffer,
+                                     void *UNUSED(data))
+{
+  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+
+  GPUVertFormat format = {0};
+  init_vcol_format(&format, cache, &coarse_mesh->ldata);
+
+  GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *src_data = GPU_vertbuf_calloc();
+  /* Dynamic as we upload and interpolate layers one at a time. */
+  GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC);
+
+  GPU_vertbuf_data_alloc(src_data, coarse_mesh->totloop);
+
+  gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data);
+
+  const CustomData *cd_ldata = &coarse_mesh->ldata;
+
+  const uint vcol_layers = cache->cd_used.vcol;
+
+  /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in
+   * a single buffer. */
+  int pack_layer_index = 0;
+  for (int i = 0; i < MAX_MTFACE; i++) {
+    if (vcol_layers & (1 << i)) {
+      /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */
+      const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++;
+      const MLoopCol *mloopcol = (MLoopCol *)CustomData_get_layer_n(cd_ldata, CD_MLOOPCOL, i);
+
+      gpuMeshVcol *vcol = mesh_vcol;
+
+      for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, mloopcol++) {
+        vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
+        vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
+        vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
+        vcol->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
+      }
+
+      /* Ensure data is uploaded properly. */
+      GPU_vertbuf_tag_dirty(src_data);
+      draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset);
+    }
+  }
+
+  GPU_vertbuf_discard(src_data);
+}
+
 constexpr MeshExtract create_extractor_vcol()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_vcol_init;
+  extractor.init_subdiv = extract_vcol_init_subdiv;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = 0;
   extractor.use_threading = false;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
index bdb1410a755..bb8853b8154 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
@@ -25,6 +25,7 @@
 
 #include "BKE_deform.h"
 
+#include "draw_subdivision.h"
 #include "extract_mesh.h"
 
 namespace blender::draw {
@@ -167,10 +168,57 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr,
   }
 }
 
+static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
+                                        const MeshRenderData *UNUSED(mr),
+                                        struct MeshBatchCache *cache,
+                                        void *buffer,
+                                        void *UNUSED(data))
+{
+  Mesh *coarse_mesh = subdiv_cache->mesh;
+  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer);
+
+  static GPUVertFormat format = {0};
+  if (format.attr_len == 0) {
+    GPU_vertformat_attr_add(&format, "weight", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+  }
+  GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops);
+
+  GPUVertBuf *coarse_weights = GPU_vertbuf_calloc();
+  GPU_vertbuf_init_with_format(coarse_weights, &format);
+  GPU_vertbuf_data_alloc(coarse_weights, coarse_mesh->totloop);
+  float *coarse_weights_data = static_cast<float *>(GPU_vertbuf_get_data(coarse_weights));
+
+  const DRW_MeshWeightState *wstate = &cache->weight_state;
+  const MDeformVert *dverts = static_cast<const MDeformVert *>(
+      CustomData_get_layer(&coarse_mesh->vdata, CD_MDEFORMVERT));
+
+  for (int i = 0; i < coarse_mesh->totpoly; i++) {
+    const MPoly *mpoly = &coarse_mesh->mpoly[i];
+
+    for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
+         loop_index++) {
+      const MLoop *ml = &coarse_mesh->mloop[loop_index];
+
+      if (dverts != nullptr) {
+        const MDeformVert *dvert = &dverts[ml->v];
+        coarse_weights_data[loop_index] = evaluate_vertex_weight(dvert, wstate);
+      }
+      else {
+        coarse_weights_data[loop_index] = evaluate_vertex_weight(nullptr, wstate);
+      }
+    }
+  }
+
+  draw_subdiv_interp_custom_data(subdiv_cache, coarse_weights, vbo, 1, 0);
+
+  GPU_vertbuf_discard(coarse_weights);
+}
+
 constexpr MeshExtract create_extractor_weights()
 {
   MeshExtract extractor = {nullptr};
   extractor.init = extract_weights_init;
+  extractor.init_subdiv = extract_weights_init_subdiv;
   extractor.iter_poly_bm = extract_weights_iter_poly_bm;
   extractor.iter_poly_mesh = extract_weights_iter_poly_mesh;
   extractor.data_type = MR_DATA_NONE;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
new file mode 100644
index 00000000000..36c3970d9a0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
@@ -0,0 +1,230 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly restrict buffer sourceBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint src_data[];
+#else
+  float src_data[];
+#endif
+};
+
+layout(std430, binding = 2) readonly restrict buffer facePTexOffset
+{
+  uint face_ptex_offset[];
+};
+
+layout(std430, binding = 3) readonly restrict buffer patchCoords
+{
+  BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 5) writeonly restrict buffer destBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint dst_data[];
+#else
+  float dst_data[];
+#endif
+};
+
+struct Vertex {
+  float vertex_data[DIMENSIONS];
+};
+
+void clear(inout Vertex v)
+{
+  for (int i = 0; i < DIMENSIONS; i++) {
+    v.vertex_data[i] = 0.0;
+  }
+}
+
+Vertex read_vertex(uint index)
+{
+  Vertex result;
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint base_index = index * 2;
+  if (DIMENSIONS == 4) {
+    uint xy = src_data[base_index];
+    uint zw = src_data[base_index + 1];
+
+    float x = float((xy >> 16) & 0xffff) / 65535.0;
+    float y = float(xy & 0xffff) / 65535.0;
+    float z = float((zw >> 16) & 0xffff) / 65535.0;
+    float w = float(zw & 0xffff) / 65535.0;
+
+    result.vertex_data[0] = x;
+    result.vertex_data[1] = y;
+    result.vertex_data[2] = z;
+    result.vertex_data[3] = w;
+  }
+  else {
+    /* This case is unsupported for now. */
+    clear(result);
+  }
+#else
+  uint base_index = index * DIMENSIONS;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    result.vertex_data[i] = src_data[base_index + i];
+  }
+#endif
+  return result;
+}
+
+void write_vertex(uint index, Vertex v)
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+  uint base_index = dst_offset + index * 2;
+  if (DIMENSIONS == 4) {
+    uint x = uint(v.vertex_data[0] * 65535.0);
+    uint y = uint(v.vertex_data[1] * 65535.0);
+    uint z = uint(v.vertex_data[2] * 65535.0);
+    uint w = uint(v.vertex_data[3] * 65535.0);
+
+    uint xy = x << 16 | y;
+    uint zw = z << 16 | w;
+
+    dst_data[base_index] = xy;
+    dst_data[base_index + 1] = zw;
+  }
+  else {
+    /* This case is unsupported for now. */
+    dst_data[base_index] = 0;
+  }
+#else
+  uint base_index = dst_offset + index * DIMENSIONS;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    dst_data[base_index + i] = v.vertex_data[i];
+  }
+#endif
+}
+
+Vertex interp_vertex(Vertex v0, Vertex v1, Vertex v2, Vertex v3, vec2 uv)
+{
+  Vertex result;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    float e = mix(v0.vertex_data[i], v1.vertex_data[i], uv.x);
+    float f = mix(v2.vertex_data[i], v3.vertex_data[i], uv.x);
+    result.vertex_data[i] = mix(e, f, uv.y);
+  }
+  return result;
+}
+
+void add_with_weight(inout Vertex v0, Vertex v1, float weight)
+{
+  for (int i = 0; i < DIMENSIONS; i++) {
+    v0.vertex_data[i] += v1.vertex_data[i] * weight;
+  }
+}
+
+Vertex average(Vertex v0, Vertex v1)
+{
+  Vertex result;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    result.vertex_data[i] = (v0.vertex_data[i] + v1.vertex_data[i]) * 0.5;
+  }
+  return result;
+}
+
+uint get_vertex_count(uint coarse_polygon)
+{
+  uint number_of_patches = face_ptex_offset[coarse_polygon + 1] - face_ptex_offset[coarse_polygon];
+  if (number_of_patches == 1) {
+    /* If there is only one patch for the current coarse polygon, then it is a quad. */
+    return 4;
+  }
+  /* Otherwise, the number of patches is the number of vertices. */
+  return number_of_patches;
+}
+
+uint get_polygon_corner_index(uint coarse_polygon, uint patch_index)
+{
+  uint patch_offset = face_ptex_offset[coarse_polygon];
+  return patch_index - patch_offset;
+}
+
+uint get_loop_start(uint coarse_polygon)
+{
+  return extra_coarse_face_data[coarse_polygon] & coarse_face_loopstart_mask;
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  /* Find which coarse polygon we came from. */
+  uint coarse_polygon = coarse_polygon_index_from_subdiv_quad_index(quad_index, coarse_poly_count);
+  uint loop_start = get_loop_start(coarse_polygon);
+
+  /* Find the number of vertices for the coarse polygon. */
+  Vertex v0, v1, v2, v3;
+  clear(v0);
+  clear(v1);
+  clear(v2);
+  clear(v3);
+
+  uint number_of_vertices = get_vertex_count(coarse_polygon);
+  if (number_of_vertices == 4) {
+    /* Interpolate the src data. */
+    v0 = read_vertex(loop_start + 0);
+    v1 = read_vertex(loop_start + 1);
+    v2 = read_vertex(loop_start + 2);
+    v3 = read_vertex(loop_start + 3);
+  }
+  else {
+    /* Interpolate the src data for the center. */
+    uint loop_end = loop_start + number_of_vertices - 1;
+    Vertex center_value;
+    clear(center_value);
+
+    float weight = 1.0 / float(number_of_vertices);
+
+    for (uint l = loop_start; l < loop_end; l++) {
+      add_with_weight(center_value, read_vertex(l), weight);
+    }
+
+    /* Interpolate between the previous and next corner for the middle values for the edges. */
+    uint patch_index = uint(patch_coords[start_loop_index].patch_index);
+    uint current_coarse_corner = get_polygon_corner_index(coarse_polygon, patch_index);
+    uint next_coarse_corner = (current_coarse_corner + 1) % number_of_vertices;
+    uint prev_coarse_corner = (current_coarse_corner + number_of_vertices - 1) %
+                              number_of_vertices;
+
+    v0 = read_vertex(loop_start);
+    v1 = average(v0, read_vertex(loop_start + next_coarse_corner));
+    v3 = average(v0, read_vertex(loop_start + prev_coarse_corner));
+
+    /* Interpolate between the current value, and the ones for the center and mid-edges. */
+    v2 = center_value;
+  }
+
+  /* Do a linear interpolation of the data based on the UVs for each loop of this subdivided quad.
+   */
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    BlenderPatchCoord co = patch_coords[loop_index];
+    vec2 uv = decode_uv(co.encoded_uv);
+    /* NOTE: v2 and v3 are reversed to stay consistent with the interpolation weight on the x-axis:
+     *
+     * v3 +-----+ v2
+     *    |     |
+     *    |     |
+     * v0 +-----+ v1
+     *
+     * otherwise, weight would be `1.0 - uv.x` for `v2 <-> v3`, but `uv.x` for `v0 <-> v1`.
+     */
+    Vertex result = interp_vertex(v0, v1, v3, v2, uv);
+    write_vertex(loop_index, result);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
new file mode 100644
index 00000000000..f11c0f6427e
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
@@ -0,0 +1,57 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputEdgeOrigIndex
+{
+  int input_origindex[];
+};
+
+layout(std430, binding = 1) writeonly buffer outputLinesIndices
+{
+  uint output_lines[];
+};
+
+#ifndef LINES_LOOSE
+void emit_line(uint line_offset, uint start_loop_index, uint corner_index)
+{
+  uint vertex_index = start_loop_index + corner_index;
+
+  if (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display) {
+    output_lines[line_offset + 0] = 0xffffffff;
+    output_lines[line_offset + 1] = 0xffffffff;
+  }
+  else {
+    /* Mod 4 so we loop back at the first vertex on the last loop index (3). */
+    uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+
+    output_lines[line_offset + 0] = vertex_index;
+    output_lines[line_offset + 1] = next_vertex_index;
+  }
+}
+#endif
+
+void main()
+{
+  uint index = get_global_invocation_index();
+  if (index >= total_dispatch_size) {
+    return;
+  }
+
+#ifdef LINES_LOOSE
+  /* In the loose lines case, we execute for each line, with two vertices per line. */
+  uint line_offset = edge_loose_offset + index * 2;
+  uint loop_index = num_subdiv_loops + index * 2;
+  output_lines[line_offset] = loop_index;
+  output_lines[line_offset + 1] = loop_index + 1;
+#else
+  /* We execute for each quad, so the start index of the loop is quad_index * 4. */
+  uint start_loop_index = index * 4;
+  /* We execute for each quad, so the start index of the line is quad_index * 8 (with 2 vertices
+   * per line). */
+  uint start_line_index = index * 8;
+
+  for (int i = 0; i < 4; i++) {
+    emit_line(start_line_index + i * 2, start_loop_index, i);
+  }
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
new file mode 100644
index 00000000000..3257ebdae17
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -0,0 +1,43 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Generate triangles from subdivision quads indices. */
+
+layout(std430, binding = 1) writeonly buffer outputTriangles
+{
+  uint output_tris[];
+};
+
+#ifndef SINGLE_MATERIAL
+layout(std430, binding = 2) readonly buffer inputPolygonMatOffset
+{
+  int polygon_mat_offset[];
+};
+#endif
+
+void main()
+{
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint loop_index = quad_index * 4;
+
+#ifdef SINGLE_MATERIAL
+  uint triangle_loop_index = quad_index * 6;
+#else
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+  int mat_offset = polygon_mat_offset[coarse_quad_index];
+
+  int triangle_loop_index = (int(quad_index) + mat_offset) * 6;
+#endif
+
+  output_tris[triangle_loop_index + 0] = loop_index + 0;
+  output_tris[triangle_loop_index + 1] = loop_index + 1;
+  output_tris[triangle_loop_index + 2] = loop_index + 2;
+  output_tris[triangle_loop_index + 3] = loop_index + 0;
+  output_tris[triangle_loop_index + 4] = loop_index + 2;
+  output_tris[triangle_loop_index + 5] = loop_index + 3;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
new file mode 100644
index 00000000000..005561964b8
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
@@ -0,0 +1,176 @@
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+/* Uniform block for #DRWSubivUboStorage. */
+layout(std140) uniform shader_data
+{
+  /* Offsets in the buffers data where the source and destination data start. */
+  int src_offset;
+  int dst_offset;
+
+  /* Parameters for the DRWPatchMap. */
+  int min_patch_face;
+  int max_patch_face;
+  int max_depth;
+  int patches_are_triangular;
+
+  /* Coarse topology information. */
+  int coarse_poly_count;
+  uint edge_loose_offset;
+
+  /* Subdiv topology information. */
+  uint num_subdiv_loops;
+
+  /* Subdivision settings. */
+  bool optimal_display;
+
+  /* Sculpt data. */
+  bool has_sculpt_mask;
+
+  /* Masks for the extra coarse face data. */
+  uint coarse_face_select_mask;
+  uint coarse_face_smooth_mask;
+  uint coarse_face_active_mask;
+  uint coarse_face_loopstart_mask;
+
+  /* Total number of elements to process. */
+  uint total_dispatch_size;
+};
+
+uint get_global_invocation_index()
+{
+  uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
+  return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
+}
+
+/* Structure for #CompressedPatchCoord. */
+struct BlenderPatchCoord {
+  int patch_index;
+  uint encoded_uv;
+};
+
+vec2 decode_uv(uint encoded_uv)
+{
+  float u = float((encoded_uv >> 16) & 0xFFFFu) / 65535.0;
+  float v = float(encoded_uv & 0xFFFFu) / 65535.0;
+  return vec2(u, v);
+}
+
+/* This structure is a carbon copy of OpenSubDiv's PatchTable::PatchHandle. */
+struct PatchHandle {
+  int array_index;
+  int patch_index;
+  int vertex_index;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord. */
+struct PatchCoord {
+  int array_index;
+  int patch_index;
+  int vertex_index;
+  float u;
+  float v;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord.QuadNode.
+ * Each child is a bitfield. */
+struct QuadNode {
+  uvec4 child;
+};
+
+bool is_set(uint i)
+{
+  /* QuadNode.Child.isSet is the first bit of the bitfield. */
+  return (i & 0x1u) != 0;
+}
+
+bool is_leaf(uint i)
+{
+  /* QuadNode.Child.isLeaf is the second bit of the bitfield. */
+  return (i & 0x2u) != 0;
+}
+
+uint get_index(uint i)
+{
+  /* QuadNode.Child.index is made of the remaining bits. */
+  return (i >> 2) & 0x3FFFFFFFu;
+}
+
+/* Duplicate of #PosNorLoop from the mesh extract CPU code.
+ * We do not use a vec3 for the position as it will be padded to a vec4 which is incompatible with
+ * the format.  */
+struct PosNorLoop {
+  float x, y, z;
+  /* TODO(kevindietrich) : figure how to compress properly as GLSL does not have char/short types,
+   * bit operations get tricky. */
+  float nx, ny, nz;
+  float flag;
+};
+
+vec3 get_vertex_pos(PosNorLoop vertex_data)
+{
+  return vec3(vertex_data.x, vertex_data.y, vertex_data.z);
+}
+
+vec3 get_vertex_nor(PosNorLoop vertex_data)
+{
+  return vec3(vertex_data.nx, vertex_data.ny, vertex_data.nz);
+}
+
+void set_vertex_pos(inout PosNorLoop vertex_data, vec3 pos)
+{
+  vertex_data.x = pos.x;
+  vertex_data.y = pos.y;
+  vertex_data.z = pos.z;
+}
+
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor, uint flag)
+{
+  vertex_data.nx = nor.x;
+  vertex_data.ny = nor.y;
+  vertex_data.nz = nor.z;
+  vertex_data.flag = float(flag);
+}
+
+/* Set the vertex normal but preserve the existing flag. This is for when we compute manually the
+ * vertex normals when we cannot use the limit surface, in which case the flag and the normal are
+ * set by two separate compute pass. */
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor)
+{
+  set_vertex_nor(vertex_data, nor, 0);
+}
+
+#define ORIGINDEX_NONE -1
+
+#ifdef SUBDIV_POLYGON_OFFSET
+layout(std430, binding = 0) readonly buffer inputSubdivPolygonOffset
+{
+  uint subdiv_polygon_offset[];
+};
+
+/* Given the index of the subdivision quad, return the index of the corresponding coarse polygon.
+ * This uses subdiv_polygon_offset and since it is a growing list of offsets, we can use binary
+ * search to locate the right index. */
+uint coarse_polygon_index_from_subdiv_quad_index(uint subdiv_quad_index, uint coarse_poly_count)
+{
+  uint first = 0;
+  uint last = coarse_poly_count;
+
+  while (first != last) {
+    uint middle = (first + last) / 2;
+
+    if (subdiv_polygon_offset[middle] < subdiv_quad_index) {
+      first = middle + 1;
+    }
+    else {
+      last = middle;
+    }
+  }
+
+  if (subdiv_polygon_offset[first] == subdiv_quad_index) {
+    return first;
+  }
+
+  return first - 1;
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
new file mode 100644
index 00000000000..575090472b1
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
@@ -0,0 +1,56 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer faceAdjacencyOffsets
+{
+  uint face_adjacency_offsets[];
+};
+
+layout(std430, binding = 2) readonly buffer faceAdjacencyLists
+{
+  uint face_adjacency_lists[];
+};
+
+layout(std430, binding = 3) writeonly buffer vertexNormals
+{
+  vec3 normals[];
+};
+
+void main()
+{
+  uint vertex_index = get_global_invocation_index();
+  if (vertex_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint first_adjacent_face_offset = face_adjacency_offsets[vertex_index];
+  uint number_of_adjacent_faces = face_adjacency_offsets[vertex_index + 1] -
+                                  first_adjacent_face_offset;
+
+  vec3 accumulated_normal = vec3(0.0);
+
+  /* For each adjacent face. */
+  for (uint i = 0; i < number_of_adjacent_faces; i++) {
+    uint adjacent_face = face_adjacency_lists[first_adjacent_face_offset + i];
+    uint start_loop_index = adjacent_face * 4;
+
+    /* Compute face normal. */
+    vec3 adjacent_verts[3];
+    for (uint j = 0; j < 3; j++) {
+      adjacent_verts[j] = get_vertex_pos(pos_nor[start_loop_index + j]);
+    }
+
+    vec3 face_normal = normalize(
+        cross(adjacent_verts[1] - adjacent_verts[0], adjacent_verts[2] - adjacent_verts[0]));
+    accumulated_normal += face_normal;
+  }
+
+  float weight = 1.0 / float(number_of_adjacent_faces);
+  vec3 normal = normalize(accumulated_normal);
+  normals[vertex_index] = normal;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
new file mode 100644
index 00000000000..84cd65d4161
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
@@ -0,0 +1,34 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputNormals
+{
+  vec3 vertex_normals[];
+};
+
+layout(std430, binding = 1) readonly buffer inputSubdivVertLoopMap
+{
+  uint vert_loop_map[];
+};
+
+layout(std430, binding = 2) buffer outputPosNor
+{
+  PosNorLoop pos_nor[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (int i = 0; i < 4; i++) {
+    uint subdiv_vert_index = vert_loop_map[start_loop_index + i];
+    vec3 nor = vertex_normals[subdiv_vert_index];
+    set_vertex_nor(pos_nor[start_loop_index + i], nor);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
new file mode 100644
index 00000000000..5dd7decf663
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
@@ -0,0 +1,416 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Source buffer. */
+layout(std430, binding = 0) buffer src_buffer
+{
+  float srcVertexBuffer[];
+};
+
+/* #DRWPatchMap */
+layout(std430, binding = 1) readonly buffer inputPatchHandles
+{
+  PatchHandle input_patch_handles[];
+};
+
+layout(std430, binding = 2) readonly buffer inputQuadNodes
+{
+  QuadNode quad_nodes[];
+};
+
+layout(std430, binding = 3) readonly buffer inputPatchCoords
+{
+  BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly buffer inputVertOrigIndices
+{
+  int input_vert_origindex[];
+};
+
+/* Patch buffers. */
+layout(std430, binding = 5) buffer patchArray_buffer
+{
+  OsdPatchArray patchArrayBuffer[];
+};
+
+layout(std430, binding = 6) buffer patchIndex_buffer
+{
+  int patchIndexBuffer[];
+};
+
+layout(std430, binding = 7) buffer patchParam_buffer
+{
+  OsdPatchParam patchParamBuffer[];
+};
+
+  /* Output buffer(s). */
+
+#if defined(FVAR_EVALUATION)
+layout(std430, binding = 8) writeonly buffer outputFVarData
+{
+  vec2 output_fvar[];
+};
+#elif defined(FDOTS_EVALUATION)
+/* For face dots, we build the position, normals, and index buffers in one go. */
+
+/* vec3 is padded to vec4, but the format used for fdots does not have any padding. */
+struct FDotVert {
+  float x, y, z;
+};
+
+/* Same here, do not use vec3. */
+struct FDotNor {
+  float x, y, z;
+  float flag;
+};
+
+layout(std430, binding = 8) writeonly buffer outputVertices
+{
+  FDotVert output_verts[];
+};
+
+layout(std430, binding = 9) writeonly buffer outputNormals
+{
+  FDotNor output_nors[];
+};
+
+layout(std430, binding = 10) writeonly buffer outputFdotsIndices
+{
+  uint output_indices[];
+};
+
+layout(std430, binding = 11) readonly buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+#else
+layout(std430, binding = 8) writeonly buffer outputVertexData
+{
+  PosNorLoop output_verts[];
+};
+#endif
+
+vec2 read_vec2(int index)
+{
+  vec2 result;
+  result.x = srcVertexBuffer[index * 2];
+  result.y = srcVertexBuffer[index * 2 + 1];
+  return result;
+}
+
+vec3 read_vec3(int index)
+{
+  vec3 result;
+  result.x = srcVertexBuffer[index * 3];
+  result.y = srcVertexBuffer[index * 3 + 1];
+  result.z = srcVertexBuffer[index * 3 + 2];
+  return result;
+}
+
+OsdPatchArray GetPatchArray(int arrayIndex)
+{
+  return patchArrayBuffer[arrayIndex];
+}
+
+OsdPatchParam GetPatchParam(int patchIndex)
+{
+  return patchParamBuffer[patchIndex];
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch Coordinate lookup. Return an OsdPatchCoord for the given patch_index and uvs.
+ * This code is a port of the OpenSubdiv PatchMap lookup code.
+ */
+
+PatchHandle bogus_patch_handle()
+{
+  PatchHandle ret;
+  ret.array_index = -1;
+  ret.vertex_index = -1;
+  ret.patch_index = -1;
+  return ret;
+}
+
+int transformUVToQuadQuadrant(float median, inout float u, inout float v)
+{
+  int uHalf = (u >= median) ? 1 : 0;
+  if (uHalf != 0)
+    u -= median;
+
+  int vHalf = (v >= median) ? 1 : 0;
+  if (vHalf != 0)
+    v -= median;
+
+  return (vHalf << 1) | uHalf;
+}
+
+int transformUVToTriQuadrant(float median, inout float u, inout float v, inout bool rotated)
+{
+
+  if (!rotated) {
+    if (u >= median) {
+      u -= median;
+      return 1;
+    }
+    if (v >= median) {
+      v -= median;
+      return 2;
+    }
+    if ((u + v) >= median) {
+      rotated = true;
+      return 3;
+    }
+    return 0;
+  }
+  else {
+    if (u < median) {
+      v -= median;
+      return 1;
+    }
+    if (v < median) {
+      u -= median;
+      return 2;
+    }
+    u -= median;
+    v -= median;
+    if ((u + v) < median) {
+      rotated = false;
+      return 3;
+    }
+    return 0;
+  }
+}
+
+PatchHandle find_patch(int face_index, float u, float v)
+{
+  if (face_index < min_patch_face || face_index > max_patch_face) {
+    return bogus_patch_handle();
+  }
+
+  QuadNode node = quad_nodes[face_index - min_patch_face];
+
+  if (!is_set(node.child[0])) {
+    return bogus_patch_handle();
+  }
+
+  float median = 0.5;
+  bool tri_rotated = false;
+
+  for (int depth = 0; depth <= max_depth; ++depth, median *= 0.5) {
+    int quadrant = (patches_are_triangular != 0) ?
+                       transformUVToTriQuadrant(median, u, v, tri_rotated) :
+                       transformUVToQuadQuadrant(median, u, v);
+
+    if (is_leaf(node.child[quadrant])) {
+      return input_patch_handles[get_index(node.child[quadrant])];
+    }
+
+    node = quad_nodes[get_index(node.child[quadrant])];
+  }
+}
+
+OsdPatchCoord bogus_patch_coord(int face_index, float u, float v)
+{
+  OsdPatchCoord coord;
+  coord.arrayIndex = 0;
+  coord.patchIndex = face_index;
+  coord.vertIndex = 0;
+  coord.s = u;
+  coord.t = v;
+  return coord;
+}
+
+OsdPatchCoord GetPatchCoord(int face_index, float u, float v)
+{
+  PatchHandle patch_handle = find_patch(face_index, u, v);
+
+  if (patch_handle.array_index == -1) {
+    return bogus_patch_coord(face_index, u, v);
+  }
+
+  OsdPatchCoord coord;
+  coord.arrayIndex = patch_handle.array_index;
+  coord.patchIndex = patch_handle.patch_index;
+  coord.vertIndex = patch_handle.vertex_index;
+  coord.s = u;
+  coord.t = v;
+  return coord;
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch evaluation. Note that the 1st and 2nd derivatives are always computed, although we
+ * only return and use the 1st derivatives if adaptive patches are used. This could
+ * perhaps be optimized.
+ */
+
+#if defined(FVAR_EVALUATION)
+void evaluate_patches_limits(int patch_index, float u, float v, inout vec2 dst)
+{
+  OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+  OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+  OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+  int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+  float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+  int nPoints = OsdEvaluatePatchBasis(
+      patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+  int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+  for (int cv = 0; cv < nPoints; ++cv) {
+    int index = patchIndexBuffer[indexBase + cv];
+    vec2 src_fvar = read_vec2(src_offset + index);
+    dst += src_fvar * wP[cv];
+  }
+}
+#else
+void evaluate_patches_limits(
+    int patch_index, float u, float v, inout vec3 dst, inout vec3 du, inout vec3 dv)
+{
+  OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+  OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+  OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+  int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+  float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+  int nPoints = OsdEvaluatePatchBasis(
+      patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+  int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+  for (int cv = 0; cv < nPoints; ++cv) {
+    int index = patchIndexBuffer[indexBase + cv];
+    vec3 src_vertex = read_vec3(index);
+
+    dst += src_vertex * wP[cv];
+    du += src_vertex * wDu[cv];
+    dv += src_vertex * wDv[cv];
+  }
+}
+#endif
+
+/* ------------------------------------------------------------------------------
+ * Entry point.
+ */
+
+#if defined(FVAR_EVALUATION)
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    vec2 fvar = vec2(0.0);
+
+    BlenderPatchCoord patch_co = patch_coords[loop_index];
+    vec2 uv = decode_uv(patch_co.encoded_uv);
+
+    evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, fvar);
+    output_fvar[dst_offset + loop_index] = fvar;
+  }
+}
+#elif defined(FDOTS_EVALUATION)
+bool is_face_selected(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0;
+}
+
+bool is_face_active(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_active_mask) != 0;
+}
+
+float get_face_flag(uint coarse_quad_index)
+{
+  if (is_face_active(coarse_quad_index)) {
+    return -1.0;
+  }
+
+  if (is_face_selected(coarse_quad_index)) {
+    return 1.0;
+  }
+
+  return 0.0;
+}
+
+void main()
+{
+  /* We execute for each coarse quad. */
+  uint coarse_quad_index = get_global_invocation_index();
+  if (coarse_quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  BlenderPatchCoord patch_co = patch_coords[coarse_quad_index];
+  vec2 uv = decode_uv(patch_co.encoded_uv);
+
+  vec3 pos = vec3(0.0);
+  vec3 du = vec3(0.0);
+  vec3 dv = vec3(0.0);
+  evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+  vec3 nor = normalize(cross(du, dv));
+
+  FDotVert vert;
+  vert.x = pos.x;
+  vert.y = pos.y;
+  vert.z = pos.z;
+
+  FDotNor fnor;
+  fnor.x = nor.x;
+  fnor.y = nor.y;
+  fnor.z = nor.z;
+  fnor.flag = get_face_flag(coarse_quad_index);
+
+  output_verts[coarse_quad_index] = vert;
+  output_nors[coarse_quad_index] = fnor;
+  output_indices[coarse_quad_index] = coarse_quad_index;
+}
+#else
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    vec3 pos = vec3(0.0);
+    vec3 du = vec3(0.0);
+    vec3 dv = vec3(0.0);
+
+    BlenderPatchCoord patch_co = patch_coords[loop_index];
+    vec2 uv = decode_uv(patch_co.encoded_uv);
+
+    evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+
+#  if defined(LIMIT_NORMALS)
+    vec3 nor = normalize(cross(du, dv));
+#  else
+    /* This will be computed later. */
+    vec3 nor = vec3(0.0);
+#  endif
+
+    int origindex = input_vert_origindex[loop_index];
+    uint flag = 0;
+    if (origindex == -1) {
+      flag = -1;
+    }
+
+    PosNorLoop vertex_data;
+    set_vertex_pos(vertex_data, pos);
+    set_vertex_nor(vertex_data, nor, flag);
+    output_verts[loop_index] = vertex_data;
+  }
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
new file mode 100644
index 00000000000..6c76cd41ca4
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
@@ -0,0 +1,97 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputEdgeIndex
+{
+  uint input_edge_index[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputEdgeFactors
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  float output_edge_fac[];
+#else
+  uint output_edge_fac[];
+#endif
+};
+
+void write_vec4(uint index, vec4 edge_facs)
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  for (uint i = 0; i < 4; i++) {
+    output_edge_fac[index + i] = edge_facs[i];
+  }
+#else
+  /* Use same scaling as in extract_edge_fac_iter_poly_mesh. */
+  uint a = uint(clamp(edge_facs.x * 253.0 + 1.0, 0.0, 255.0));
+  uint b = uint(clamp(edge_facs.y * 253.0 + 1.0, 0.0, 255.0));
+  uint c = uint(clamp(edge_facs.z * 253.0 + 1.0, 0.0, 255.0));
+  uint d = uint(clamp(edge_facs.w * 253.0 + 1.0, 0.0, 255.0));
+  uint packed_edge_fac = a << 24 | b << 16 | c << 8 | d;
+  output_edge_fac[index] = packed_edge_fac;
+#endif
+}
+
+/* From extract_mesh_vbo_edge_fac.cc, keep in sync! */
+float loop_edge_factor_get(vec3 f_no, vec3 v_co, vec3 v_no, vec3 v_next_co)
+{
+  vec3 evec = v_next_co - v_co;
+  vec3 enor = normalize(cross(v_no, evec));
+  float d = abs(dot(enor, f_no));
+  /* Re-scale to the slider range. */
+  d *= (1.0 / 0.065);
+  return clamp(d, 0.0, 1.0);
+}
+
+float compute_line_factor(uint start_loop_index, uint corner_index, vec3 face_normal)
+{
+  uint vertex_index = start_loop_index + corner_index;
+  uint edge_index = input_edge_index[vertex_index];
+
+  if (edge_index == -1 && optimal_display) {
+    return 0.0;
+  }
+
+  /* Mod 4 so we loop back at the first vertex on the last loop index (3), but only the corner
+   * index needs to be wrapped. */
+  uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+  vec3 vertex_pos = get_vertex_pos(pos_nor[vertex_index]);
+  vec3 vertex_nor = get_vertex_nor(pos_nor[vertex_index]);
+  vec3 next_vertex_pos = get_vertex_pos(pos_nor[next_vertex_index]);
+  return loop_edge_factor_get(face_normal, vertex_pos, vertex_nor, next_vertex_pos);
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  /* First compute the face normal, we need it to compute the bihedral edge angle. */
+  vec3 v0 = get_vertex_pos(pos_nor[start_loop_index + 0]);
+  vec3 v1 = get_vertex_pos(pos_nor[start_loop_index + 1]);
+  vec3 v2 = get_vertex_pos(pos_nor[start_loop_index + 2]);
+  vec3 face_normal = normalize(cross(v1 - v0, v2 - v0));
+
+  vec4 edge_facs = vec4(0.0);
+  for (int i = 0; i < 4; i++) {
+    edge_facs[i] = compute_line_factor(start_loop_index, i, face_normal);
+  }
+
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+  write_vec4(start_loop_index, edge_facs);
+#else
+  /* When packed into bytes, the index is the same as for the quad. */
+  write_vec4(quad_index, edge_facs);
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
new file mode 100644
index 00000000000..ea73b9482d3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
@@ -0,0 +1,80 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVerts
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputUVs
+{
+  vec2 uvs[];
+};
+
+/* Mirror of #UVStretchAngle in the C++ code, but using floats until proper data compression
+ * is implemented for all subdivision data. */
+struct UVStretchAngle {
+  float angle;
+  float uv_angle0;
+  float uv_angle1;
+};
+
+layout(std430, binding = 2) writeonly buffer outputStretchAngles
+{
+  UVStretchAngle uv_stretches[];
+};
+
+#define M_PI 3.1415926535897932
+#define M_1_PI 0.31830988618379067154
+
+/* Adapted from BLI_math_vector.h */
+float angle_normalized_v3v3(vec3 v1, vec3 v2)
+{
+  /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */
+  bool q = (dot(v1, v2) >= 0.0);
+  vec3 v = (q) ? (v1 - v2) : (v1 + v2);
+  float a = 2.0 * asin(length(v) / 2.0);
+  return (q) ? a : M_PI - a;
+}
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint i = 0; i < 4; i++) {
+    uint cur_loop_index = start_loop_index + i;
+    uint next_loop_index = start_loop_index + (i + 1) % 4;
+    uint prev_loop_index = start_loop_index + (i + 3) % 4;
+
+    /* Compute 2d edge vectors from UVs. */
+    vec2 cur_uv = uvs[src_offset + cur_loop_index];
+    vec2 next_uv = uvs[src_offset + next_loop_index];
+    vec2 prev_uv = uvs[src_offset + prev_loop_index];
+
+    vec2 norm_uv_edge0 = normalize(prev_uv - cur_uv);
+    vec2 norm_uv_edge1 = normalize(cur_uv - next_uv);
+
+    /* Compute 3d edge vectors from positions. */
+    vec3 cur_pos = get_vertex_pos(pos_nor[cur_loop_index]);
+    vec3 next_pos = get_vertex_pos(pos_nor[next_loop_index]);
+    vec3 prev_pos = get_vertex_pos(pos_nor[prev_loop_index]);
+
+    vec3 norm_pos_edge0 = normalize(prev_pos - cur_pos);
+    vec3 norm_pos_edge1 = normalize(cur_pos - next_pos);
+
+    /* Compute stretches, this logic is adapted from #edituv_get_edituv_stretch_angle.
+     * Keep in sync! */
+    UVStretchAngle stretch;
+    stretch.uv_angle0 = atan(norm_uv_edge0.y, norm_uv_edge0.x) * M_1_PI;
+    stretch.uv_angle1 = atan(norm_uv_edge1.y, norm_uv_edge1.x) * M_1_PI;
+    stretch.angle = angle_normalized_v3v3(norm_pos_edge0, norm_pos_edge1) * M_1_PI;
+
+    uv_stretches[cur_loop_index] = stretch;
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
new file mode 100644
index 00000000000..e897fb3f3c0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
@@ -0,0 +1,31 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputCoarseData
+{
+  float coarse_stretch_area[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputSubdivData
+{
+  float subdiv_stretch_area[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+
+  for (int i = 0; i < 4; i++) {
+    subdiv_stretch_area[start_loop_index + i] = coarse_stretch_area[coarse_quad_index];
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
new file mode 100644
index 00000000000..41a8df3cf82
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
@@ -0,0 +1,52 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputVertexData
+{
+  PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 2) readonly buffer extraCoarseFaceData
+{
+  uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 3) writeonly buffer outputLoopNormals
+{
+  vec3 output_lnor[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  /* The start index of the loop is quad_index * 4. */
+  uint start_loop_index = quad_index * 4;
+
+  uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+                                                                       coarse_poly_count);
+
+  if ((extra_coarse_face_data[coarse_quad_index] & coarse_face_smooth_mask) != 0) {
+    /* Face is smooth, use vertex normals. */
+    for (int i = 0; i < 4; i++) {
+      PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i];
+      output_lnor[start_loop_index + i] = get_vertex_nor(pos_nor_loop);
+    }
+  }
+  else {
+    /* Face is flat shaded, compute flat face normal from an inscribed triangle. */
+    vec3 verts[3];
+    for (int i = 0; i < 3; i++) {
+      verts[i] = get_vertex_pos(pos_nor[start_loop_index + i]);
+    }
+
+    vec3 face_normal = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+    for (int i = 0; i < 4; i++) {
+      output_lnor[start_loop_index + i] = face_normal;
+    }
+  }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
new file mode 100644
index 00000000000..7182ce57ad3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
@@ -0,0 +1,47 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+struct SculptData {
+  uint face_set_color;
+  float mask;
+};
+
+layout(std430, binding = 0) readonly restrict buffer sculptMask
+{
+  float sculpt_mask[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer faceSetColor
+{
+  uint face_set_color[];
+};
+
+layout(std430, binding = 2) writeonly restrict buffer sculptData
+{
+  SculptData sculpt_data[];
+};
+
+void main()
+{
+  /* We execute for each quad. */
+  uint quad_index = get_global_invocation_index();
+  if (quad_index >= total_dispatch_size) {
+    return;
+  }
+
+  uint start_loop_index = quad_index * 4;
+
+  for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+    SculptData data;
+    data.face_set_color = face_set_color[loop_index];
+
+    if (has_sculpt_mask) {
+      data.mask = sculpt_mask[loop_index];
+    }
+    else {
+      data.mask = 0.0;
+    }
+
+    sculpt_data[loop_index] = data;
+  }
+}
diff --git a/source/blender/editors/space_view3d/view3d_draw.c b/source/blender/editors/space_view3d/view3d_draw.c
index a7d170982ed..b1f19581543 100644
--- a/source/blender/editors/space_view3d/view3d_draw.c
+++ b/source/blender/editors/space_view3d/view3d_draw.c
@@ -1581,6 +1581,7 @@ void view3d_main_region_draw(const bContext *C, ARegion *region)
 
   view3d_draw_view(C, region);
 
+  DRW_cache_free_old_subdiv();
   DRW_cache_free_old_batches(bmain);
   BKE_image_free_old_gputextures(bmain);
   GPU_pass_cache_garbage_collect();
diff --git a/source/blender/editors/transform/transform_snap_object.c b/source/blender/editors/transform/transform_snap_object.c
index 350d3a2676c..e3a2d1f6531 100644
--- a/source/blender/editors/transform/transform_snap_object.c
+++ b/source/blender/editors/transform/transform_snap_object.c
@@ -146,7 +146,7 @@ struct SnapObjectContext {
  * If NULL the BMesh should be used. */
 static Mesh *mesh_for_snap(Object *ob_eval, eSnapEditType edit_mode_type, bool *r_use_hide)
 {
-  Mesh *me_eval = ob_eval->data;
+  Mesh *me_eval = BKE_object_get_evaluated_mesh(ob_eval);
   bool use_hide = false;
   if (BKE_object_is_in_editmode(ob_eval)) {
     if (edit_mode_type == SNAP_GEOM_EDIT) {
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index 5189fa1ae41..5e67441be27 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -40,6 +40,8 @@ typedef enum eGPUBackendType {
 void GPU_backend_init(eGPUBackendType backend);
 void GPU_backend_exit(void);
 
+eGPUBackendType GPU_backend_get_type(void);
+
 /** Opaque type hiding blender::gpu::Context. */
 typedef struct GPUContext GPUContext;
 
diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h
index e4f1709173e..0f83e590597 100644
--- a/source/blender/gpu/GPU_index_buffer.h
+++ b/source/blender/gpu/GPU_index_buffer.h
@@ -53,6 +53,8 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *, GPUPrimType, uint index_len, uin
 void GPU_indexbuf_init(GPUIndexBufBuilder *, GPUPrimType, uint prim_len, uint vertex_len);
 GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len);
 
+void GPU_indexbuf_init_build_on_device(GPUIndexBuf *elem, uint index_len);
+
 /*
  * Thread safe.
  *
@@ -82,6 +84,16 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *);
 
 void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding);
 
+/* Upload data to the GPU (if not built on the device) and bind the buffer to its default target.
+ */
+void GPU_indexbuf_use(GPUIndexBuf *elem);
+
+/* Partially update the GPUIndexBuf which was already sent to the device, or built directly on the
+ * device. The data needs to be compatible with potential compression applied to the original
+ * indices when the index buffer was built, i.e., if the data was compressed to use shorts instead
+ * of ints, shorts should passed here. */
+void GPU_indexbuf_update_sub(GPUIndexBuf *elem, uint start, uint len, const void *data);
+
 /* Create a sub-range of an existing index-buffer. */
 GPUIndexBuf *GPU_indexbuf_create_subrange(GPUIndexBuf *elem_src, uint start, uint length);
 void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem,
diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h
index 62a495abfb3..43a8e7fc4cb 100644
--- a/source/blender/gpu/GPU_vertex_buffer.h
+++ b/source/blender/gpu/GPU_vertex_buffer.h
@@ -91,6 +91,8 @@ void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts);
 
 void GPU_vertbuf_init_with_format_ex(GPUVertBuf *, const GPUVertFormat *, GPUUsageType);
 
+void GPU_vertbuf_init_build_on_device(GPUVertBuf *verts, GPUVertFormat *format, uint v_len);
+
 #define GPU_vertbuf_init_with_format(verts, format) \
   GPU_vertbuf_init_with_format_ex(verts, format, GPU_USAGE_STATIC)
 
@@ -172,6 +174,7 @@ const GPUVertFormat *GPU_vertbuf_get_format(const GPUVertBuf *verts);
 uint GPU_vertbuf_get_vertex_alloc(const GPUVertBuf *verts);
 uint GPU_vertbuf_get_vertex_len(const GPUVertBuf *verts);
 GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts);
+void GPU_vertbuf_tag_dirty(GPUVertBuf *verts);
 
 /**
  * Should be rename to #GPU_vertbuf_data_upload.
@@ -179,12 +182,14 @@ GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts);
 void GPU_vertbuf_use(GPUVertBuf *);
 void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding);
 
+void GPU_vertbuf_wrap_handle(GPUVertBuf *verts, uint64_t handle);
+
 /**
  * XXX: do not use!
  * This is just a wrapper for the use of the Hair refine workaround.
  * To be used with #GPU_vertbuf_use().
  */
-void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data);
+void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, const void *data);
 
 /* Metrics */
 uint GPU_vertbuf_get_memory_usage(void);
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index 5af15d1bc3d..98714269402 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -186,6 +186,15 @@ void GPU_backend_exit()
   g_backend = nullptr;
 }
 
+eGPUBackendType GPU_backend_get_type()
+{
+  if (g_backend && dynamic_cast<GLBackend *>(g_backend) != nullptr) {
+    return GPU_BACKEND_OPENGL;
+  }
+
+  return GPU_BACKEND_NONE;
+}
+
 GPUBackend *GPUBackend::get()
 {
   return g_backend;
diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc
index 3472cc24a74..895b2a8461b 100644
--- a/source/blender/gpu/intern/gpu_index_buffer.cc
+++ b/source/blender/gpu/intern/gpu_index_buffer.cc
@@ -74,11 +74,16 @@ void GPU_indexbuf_init(GPUIndexBufBuilder *builder,
 GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len)
 {
   GPUIndexBuf *elem_ = GPU_indexbuf_calloc();
-  IndexBuf *elem = unwrap(elem_);
-  elem->init_build_on_device(index_len);
+  GPU_indexbuf_init_build_on_device(elem_, index_len);
   return elem_;
 }
 
+void GPU_indexbuf_init_build_on_device(GPUIndexBuf *elem, uint index_len)
+{
+  IndexBuf *elem_ = unwrap(elem);
+  elem_->init_build_on_device(index_len);
+}
+
 void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from)
 {
   BLI_assert(builder_to->data == builder_from->data);
@@ -410,9 +415,19 @@ int GPU_indexbuf_primitive_len(GPUPrimType prim_type)
   return indices_per_primitive(prim_type);
 }
 
+void GPU_indexbuf_use(GPUIndexBuf *elem)
+{
+  unwrap(elem)->upload_data();
+}
+
 void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding)
 {
   unwrap(elem)->bind_as_ssbo(binding);
 }
 
+void GPU_indexbuf_update_sub(GPUIndexBuf *elem, uint start, uint len, const void *data)
+{
+  unwrap(elem)->update_sub(start, len, data);
+}
+
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_index_buffer_private.hh b/source/blender/gpu/intern/gpu_index_buffer_private.hh
index ed7dd830c8c..adc0145f867 100644
--- a/source/blender/gpu/intern/gpu_index_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_index_buffer_private.hh
@@ -92,11 +92,15 @@ class IndexBuf {
     return is_init_;
   };
 
+  virtual void upload_data(void) = 0;
+
   virtual void bind_as_ssbo(uint binding) = 0;
 
   virtual const uint32_t *read() const = 0;
   uint32_t *unmap(const uint32_t *mapped_memory) const;
 
+  virtual void update_sub(uint start, uint len, const void *data) = 0;
+
  private:
   inline void squeeze_indices_short(uint min_idx, uint max_idx);
   inline uint index_range(uint *r_min, uint *r_max);
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc
index 5ed9648387f..dba31f501f2 100644
--- a/source/blender/gpu/intern/gpu_vertex_buffer.cc
+++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc
@@ -144,6 +144,12 @@ void GPU_vertbuf_init_with_format_ex(GPUVertBuf *verts_,
   unwrap(verts_)->init(format, usage);
 }
 
+void GPU_vertbuf_init_build_on_device(GPUVertBuf *verts, GPUVertFormat *format, uint v_len)
+{
+  GPU_vertbuf_init_with_format_ex(verts, format, GPU_USAGE_DEVICE_ONLY);
+  GPU_vertbuf_data_alloc(verts, v_len);
+}
+
 GPUVertBuf *GPU_vertbuf_duplicate(GPUVertBuf *verts_)
 {
   return wrap(unwrap(verts_)->duplicate());
@@ -313,6 +319,11 @@ GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts)
   return unwrap(verts)->flag;
 }
 
+void GPU_vertbuf_tag_dirty(GPUVertBuf *verts)
+{
+  unwrap(verts)->flag |= GPU_VERTBUF_DATA_DIRTY;
+}
+
 uint GPU_vertbuf_get_memory_usage()
 {
   return VertBuf::memory_usage;
@@ -323,12 +334,17 @@ void GPU_vertbuf_use(GPUVertBuf *verts)
   unwrap(verts)->upload();
 }
 
+void GPU_vertbuf_wrap_handle(GPUVertBuf *verts, uint64_t handle)
+{
+  unwrap(verts)->wrap_handle(handle);
+}
+
 void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding)
 {
   unwrap(verts)->bind_as_ssbo(binding);
 }
 
-void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data)
+void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, const void *data)
 {
   unwrap(verts)->update_sub(start, len, data);
 }
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh
index 9531c2c1a5f..2f46295f45a 100644
--- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh
@@ -68,6 +68,8 @@ class VertBuf {
   void upload(void);
   virtual void bind_as_ssbo(uint binding) = 0;
 
+  virtual void wrap_handle(uint64_t handle) = 0;
+
   VertBuf *duplicate(void);
 
   /* Size of the data allocated. */
@@ -96,7 +98,7 @@ class VertBuf {
     }
   }
 
-  virtual void update_sub(uint start, uint len, void *data) = 0;
+  virtual void update_sub(uint start, uint len, const void *data) = 0;
   virtual const void *read() const = 0;
   virtual void *unmap(const void *mapped_data) const = 0;
 
diff --git a/source/blender/gpu/opengl/gl_index_buffer.cc b/source/blender/gpu/opengl/gl_index_buffer.cc
index e305f765ad9..82bab460ae3 100644
--- a/source/blender/gpu/opengl/gl_index_buffer.cc
+++ b/source/blender/gpu/opengl/gl_index_buffer.cc
@@ -81,4 +81,14 @@ bool GLIndexBuf::is_active() const
   return ibo_id_ == active_ibo_id;
 }
 
+void GLIndexBuf::upload_data()
+{
+  bind();
+}
+
+void GLIndexBuf::update_sub(uint start, uint len, const void *data)
+{
+  glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, start, len, data);
+}
+
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_index_buffer.hh b/source/blender/gpu/opengl/gl_index_buffer.hh
index 0dbdaa6d398..85d52447bc6 100644
--- a/source/blender/gpu/opengl/gl_index_buffer.hh
+++ b/source/blender/gpu/opengl/gl_index_buffer.hh
@@ -61,6 +61,10 @@ class GLIndexBuf : public IndexBuf {
     return (index_type_ == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu;
   }
 
+  void upload_data(void) override;
+
+  void update_sub(uint start, uint len, const void *data) override;
+
  private:
   bool is_active() const;
 
diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.cc b/source/blender/gpu/opengl/gl_vertex_buffer.cc
index ce16a491528..469ac2cf8d6 100644
--- a/source/blender/gpu/opengl/gl_vertex_buffer.cc
+++ b/source/blender/gpu/opengl/gl_vertex_buffer.cc
@@ -49,6 +49,10 @@ void GLVertBuf::resize_data()
 
 void GLVertBuf::release_data()
 {
+  if (is_wrapper_) {
+    return;
+  }
+
   if (vbo_id_ != 0) {
     GLContext::buf_free(vbo_id_);
     vbo_id_ = 0;
@@ -137,6 +141,16 @@ void *GLVertBuf::unmap(const void *mapped_data) const
   return result;
 }
 
+void GLVertBuf::wrap_handle(uint64_t handle)
+{
+  BLI_assert(vbo_id_ == 0);
+  BLI_assert(glIsBuffer(static_cast<uint>(handle)));
+  is_wrapper_ = true;
+  vbo_id_ = static_cast<uint>(handle);
+  /* We assume the data is already on the device, so no need to allocate or send it. */
+  flag = GPU_VERTBUF_DATA_UPLOADED;
+}
+
 bool GLVertBuf::is_active() const
 {
   if (!vbo_id_) {
@@ -147,7 +161,7 @@ bool GLVertBuf::is_active() const
   return vbo_id_ == active_vbo_id;
 }
 
-void GLVertBuf::update_sub(uint start, uint len, void *data)
+void GLVertBuf::update_sub(uint start, uint len, const void *data)
 {
   glBufferSubData(GL_ARRAY_BUFFER, start, len, data);
 }
diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.hh b/source/blender/gpu/opengl/gl_vertex_buffer.hh
index 6c38a2225b3..27e4cc4f8e2 100644
--- a/source/blender/gpu/opengl/gl_vertex_buffer.hh
+++ b/source/blender/gpu/opengl/gl_vertex_buffer.hh
@@ -39,17 +39,22 @@ class GLVertBuf : public VertBuf {
  private:
   /** OpenGL buffer handle. Init on first upload. Immutable after that. */
   GLuint vbo_id_ = 0;
+  /** Defines whether the buffer handle is wrapped by this GLVertBuf, i.e. we do not own it and
+   * should not free it. */
+  bool is_wrapper_ = false;
   /** Size on the GPU. */
   size_t vbo_size_ = 0;
 
  public:
   void bind(void);
 
-  void update_sub(uint start, uint len, void *data) override;
+  void update_sub(uint start, uint len, const void *data) override;
 
   const void *read() const override;
   void *unmap(const void *mapped_data) const override;
 
+  void wrap_handle(uint64_t handle) override;
+
  protected:
   void acquire_data(void) override;
   void resize_data(void) override;
diff --git a/source/blender/makesdna/DNA_mesh_types.h b/source/blender/makesdna/DNA_mesh_types.h
index c053baf9f7e..94e88bdaca6 100644
--- a/source/blender/makesdna/DNA_mesh_types.h
+++ b/source/blender/makesdna/DNA_mesh_types.h
@@ -138,6 +138,15 @@ typedef struct Mesh_Runtime {
   int64_t cd_dirty_loop;
   int64_t cd_dirty_poly;
 
+  /**
+   * Settings for lazily evaluating the subdivision on the CPU if needed. These are
+   * set in the modifier when GPU subdivision can be performed.
+   */
+  char subsurf_apply_render;
+  char subsurf_use_optimal_display;
+  char _pad[2];
+  int subsurf_resolution;
+
 } Mesh_Runtime;
 
 typedef struct Mesh {
@@ -356,7 +365,8 @@ typedef enum eMeshWrapperType {
   ME_WRAPPER_TYPE_MDATA = 0,
   /** Use edit-mesh data (#Mesh.edit_mesh, #Mesh_Runtime.edit_data). */
   ME_WRAPPER_TYPE_BMESH = 1,
-  /* ME_WRAPPER_TYPE_SUBD = 2, */ /* TODO */
+  /** Use subdivision mesh data (#Mesh_Runtime.mesh_eval). */
+  ME_WRAPPER_TYPE_SUBD = 2,
 } eMeshWrapperType;
 
 /** #Mesh.texflag */
diff --git a/source/blender/makesdna/DNA_modifier_types.h b/source/blender/makesdna/DNA_modifier_types.h
index 85cc1361adf..fc041e257b0 100644
--- a/source/blender/makesdna/DNA_modifier_types.h
+++ b/source/blender/makesdna/DNA_modifier_types.h
@@ -196,6 +196,13 @@ typedef enum {
   SUBSURF_BOUNDARY_SMOOTH_PRESERVE_CORNERS = 1,
 } eSubsurfBoundarySmooth;
 
+typedef struct SubsurfRuntimeData {
+  /* Cached subdivision surface descriptor, with topology and settings. */
+  struct Subdiv *subdiv;
+  char set_by_draw_code;
+  char _pad[7];
+} SubsurfRuntimeData;
+
 typedef struct SubsurfModifierData {
   ModifierData modifier;
 
diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h
index c99651f0717..34415308ef6 100644
--- a/source/blender/makesdna/DNA_userdef_types.h
+++ b/source/blender/makesdna/DNA_userdef_types.h
@@ -1145,6 +1145,7 @@ typedef enum eUserpref_GPU_Flag {
   USER_GPU_FLAG_NO_DEPT_PICK = (1 << 0),
   USER_GPU_FLAG_NO_EDIT_MODE_SMOOTH_WIRE = (1 << 1),
   USER_GPU_FLAG_OVERLAY_SMOOTH_WIRE = (1 << 2),
+  USER_GPU_FLAG_SUBDIVISION_EVALUATION = (1 << 3),
 } eUserpref_GPU_Flag;
 
 /** #UserDef.tablet_api */
diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c
index 929cf94615b..71c38311124 100644
--- a/source/blender/makesrna/intern/rna_userdef.c
+++ b/source/blender/makesrna/intern/rna_userdef.c
@@ -182,6 +182,7 @@ static const EnumPropertyItem rna_enum_userdef_viewport_aa_items[] = {
 #  include "BKE_image.h"
 #  include "BKE_main.h"
 #  include "BKE_mesh_runtime.h"
+#  include "BKE_object.h"
 #  include "BKE_paint.h"
 #  include "BKE_pbvh.h"
 #  include "BKE_preferences.h"
@@ -578,6 +579,20 @@ static PointerRNA rna_UserDef_apps_get(PointerRNA *ptr)
   return rna_pointer_inherit_refine(ptr, &RNA_PreferencesApps, ptr->data);
 }
 
+/* Reevaluate objects with a subsurf modifier as the last in their modifiers stacks. */
+static void rna_UserDef_subdivision_update(Main *bmain, Scene *scene, PointerRNA *ptr)
+{
+  Object *ob;
+
+  for (ob = bmain->objects.first; ob; ob = ob->id.next) {
+    if (BKE_object_get_last_subsurf_modifier(ob) != NULL) {
+      DEG_id_tag_update(&ob->id, ID_RECALC_GEOMETRY);
+    }
+  }
+
+  rna_userdef_update(bmain, scene, ptr);
+}
+
 static void rna_UserDef_audio_update(Main *bmain, Scene *UNUSED(scene), PointerRNA *UNUSED(ptr))
 {
   BKE_sound_init(bmain);
@@ -5651,6 +5666,16 @@ static void rna_def_userdef_system(BlenderRNA *brna)
                            "Use the depth buffer for picking 3D View selection "
                            "(without this the front most object may not be selected first)");
 
+  /* GPU subdivision evaluation. */
+
+  prop = RNA_def_property(srna, "use_gpu_subdivision", PROP_BOOLEAN, PROP_NONE);
+  RNA_def_property_boolean_sdna(prop, NULL, "gpu_flag", USER_GPU_FLAG_SUBDIVISION_EVALUATION);
+  RNA_def_property_ui_text(prop,
+                           "GPU Subdivision",
+                           "Enable GPU acceleration for evaluating the last subdivision surface "
+                           "modifiers in the stack");
+  RNA_def_property_update(prop, 0, "rna_UserDef_subdivision_update");
+
   /* Audio */
 
   prop = RNA_def_property(srna, "audio_mixing_buffer", PROP_ENUM, PROP_NONE);
diff --git a/source/blender/modifiers/intern/MOD_subsurf.c b/source/blender/modifiers/intern/MOD_subsurf.c
index 7470f2abb15..00870d076ef 100644
--- a/source/blender/modifiers/intern/MOD_subsurf.c
+++ b/source/blender/modifiers/intern/MOD_subsurf.c
@@ -39,6 +39,7 @@
 #include "DNA_screen_types.h"
 
 #include "BKE_context.h"
+#include "BKE_editmesh.h"
 #include "BKE_mesh.h"
 #include "BKE_scene.h"
 #include "BKE_screen.h"
@@ -46,6 +47,7 @@
 #include "BKE_subdiv_ccg.h"
 #include "BKE_subdiv_deform.h"
 #include "BKE_subdiv_mesh.h"
+#include "BKE_subdiv_modifier.h"
 #include "BKE_subsurf.h"
 
 #include "UI_interface.h"
@@ -65,11 +67,6 @@
 
 #include "intern/CCGSubSurf.h"
 
-typedef struct SubsurfRuntimeData {
-  /* Cached subdivision surface descriptor, with topology and settings. */
-  struct Subdiv *subdiv;
-} SubsurfRuntimeData;
-
 static void initData(ModifierData *md)
 {
   SubsurfModifierData *smd = (SubsurfModifierData *)md;
@@ -155,37 +152,6 @@ static int subdiv_levels_for_modifier_get(const SubsurfModifierData *smd,
   return get_render_subsurf_level(&scene->r, requested_levels, use_render_params);
 }
 
-static void subdiv_settings_init(SubdivSettings *settings,
-                                 const SubsurfModifierData *smd,
-                                 const ModifierEvalContext *ctx)
-{
-  const bool use_render_params = (ctx->flag & MOD_APPLY_RENDER);
-  const int requested_levels = (use_render_params) ? smd->renderLevels : smd->levels;
-
-  settings->is_simple = (smd->subdivType == SUBSURF_TYPE_SIMPLE);
-  settings->is_adaptive = !(smd->flags & eSubsurfModifierFlag_UseRecursiveSubdivision);
-  settings->level = settings->is_simple ?
-                        1 :
-                        (settings->is_adaptive ? smd->quality : requested_levels);
-  settings->use_creases = (smd->flags & eSubsurfModifierFlag_UseCrease);
-  settings->vtx_boundary_interpolation = BKE_subdiv_vtx_boundary_interpolation_from_subsurf(
-      smd->boundary_smooth);
-  settings->fvar_linear_interpolation = BKE_subdiv_fvar_interpolation_from_uv_smooth(
-      smd->uv_smooth);
-}
-
-/* Main goal of this function is to give usable subdivision surface descriptor
- * which matches settings and topology. */
-static Subdiv *subdiv_descriptor_ensure(SubsurfModifierData *smd,
-                                        const SubdivSettings *subdiv_settings,
-                                        const Mesh *mesh)
-{
-  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
-  Subdiv *subdiv = BKE_subdiv_update_from_mesh(runtime_data->subdiv, subdiv_settings, mesh);
-  runtime_data->subdiv = subdiv;
-  return subdiv;
-}
-
 /* Subdivide into fully qualified mesh. */
 
 static void subdiv_mesh_settings_init(SubdivToMeshSettings *settings,
@@ -240,14 +206,17 @@ static Mesh *subdiv_as_ccg(SubsurfModifierData *smd,
   return result;
 }
 
-static SubsurfRuntimeData *subsurf_ensure_runtime(SubsurfModifierData *smd)
+/* Cache settings for lazy CPU evaluation. */
+
+static void subdiv_cache_cpu_evaluation_settings(const ModifierEvalContext *ctx,
+                                                 Mesh *me,
+                                                 SubsurfModifierData *smd)
 {
-  SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime;
-  if (runtime_data == NULL) {
-    runtime_data = MEM_callocN(sizeof(*runtime_data), "subsurf runtime");
-    smd->modifier.runtime = runtime_data;
-  }
-  return runtime_data;
+  SubdivToMeshSettings mesh_settings;
+  subdiv_mesh_settings_init(&mesh_settings, smd, ctx);
+  me->runtime.subsurf_apply_render = (ctx->flag & MOD_APPLY_RENDER) != 0;
+  me->runtime.subsurf_resolution = mesh_settings.resolution;
+  me->runtime.subsurf_use_optimal_display = mesh_settings.use_optimal_display;
 }
 
 /* Modifier itself. */
@@ -261,12 +230,30 @@ static Mesh *modifyMesh(ModifierData *md, const ModifierEvalContext *ctx, Mesh *
 #endif
   SubsurfModifierData *smd = (SubsurfModifierData *)md;
   SubdivSettings subdiv_settings;
-  subdiv_settings_init(&subdiv_settings, smd, ctx);
+  BKE_subsurf_modifier_subdiv_settings_init(
+      &subdiv_settings, smd, (ctx->flag & MOD_APPLY_RENDER) != 0);
   if (subdiv_settings.level == 0) {
     return result;
   }
-  SubsurfRuntimeData *runtime_data = subsurf_ensure_runtime(smd);
-  Subdiv *subdiv = subdiv_descriptor_ensure(smd, &subdiv_settings, mesh);
+  SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd);
+
+  /* Delay evaluation to the draw code if possible, provided we do not have to apply the modifier.
+   */
+  if ((ctx->flag & MOD_APPLY_TO_BASE_MESH) == 0) {
+    Scene *scene = DEG_get_evaluated_scene(ctx->depsgraph);
+    const bool is_render_mode = (ctx->flag & MOD_APPLY_RENDER) != 0;
+    /* Same check as in `DRW_mesh_batch_cache_create_requested` to keep both code coherent. */
+    const bool is_editmode = (mesh->edit_mesh != NULL) &&
+                             (mesh->edit_mesh->mesh_eval_final != NULL);
+    const int required_mode = BKE_subsurf_modifier_eval_required_mode(is_render_mode, is_editmode);
+    if (BKE_subsurf_modifier_can_do_gpu_subdiv_ex(scene, ctx->object, smd, required_mode, false)) {
+      subdiv_cache_cpu_evaluation_settings(ctx, mesh, smd);
+      return result;
+    }
+  }
+
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(
+      smd, &subdiv_settings, mesh, false);
   if (subdiv == NULL) {
     /* Happens on bad topology, but also on empty input mesh. */
     return result;
@@ -320,12 +307,14 @@ static void deformMatrices(ModifierData *md,
 
   SubsurfModifierData *smd = (SubsurfModifierData *)md;
   SubdivSettings subdiv_settings;
-  subdiv_settings_init(&subdiv_settings, smd, ctx);
+  BKE_subsurf_modifier_subdiv_settings_init(
+      &subdiv_settings, smd, (ctx->flag & MOD_APPLY_RENDER) != 0);
   if (subdiv_settings.level == 0) {
     return;
   }
-  SubsurfRuntimeData *runtime_data = subsurf_ensure_runtime(smd);
-  Subdiv *subdiv = subdiv_descriptor_ensure(smd, &subdiv_settings, mesh);
+  SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd);
+  Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(
+      smd, &subdiv_settings, mesh, false);
   if (subdiv == NULL) {
     /* Happens on bad topology, but also on empty input mesh. */
     return;
diff --git a/source/blender/windowmanager/intern/wm_init_exit.c b/source/blender/windowmanager/intern/wm_init_exit.c
index 2f87e5789fe..957ec7d800d 100644
--- a/source/blender/windowmanager/intern/wm_init_exit.c
+++ b/source/blender/windowmanager/intern/wm_init_exit.c
@@ -562,6 +562,13 @@ void WM_exit_ex(bContext *C, const bool do_python)
 
   BKE_blender_free(); /* blender.c, does entire library and spacetypes */
                       //  BKE_material_copybuf_free();
+
+  /* Free the GPU subdivision data after the database to ensure that subdivision structs used by
+   * the modifiers were garbage collected. */
+  if (opengl_is_init) {
+    DRW_subdiv_free();
+  }
+
   ANIM_fcurves_copybuf_free();
   ANIM_drivers_copybuf_free();
   ANIM_driver_vars_copybuf_free();
author	Kévin Dietrich <kevin.dietrich@mailoo.org>	2021-12-27 18:34:47 +0300
committer	Kévin Dietrich <kevin.dietrich@mailoo.org>	2021-12-27 18:35:54 +0300
commit	eed45d2a239a2a18a2420ba15dfb55e0f8dc5630 (patch)
tree	aa55ce966caa8e28db4853d7d755003ed249805b /source/blender
parent	31e120ef4997583332aa9b5af93521e7e666e9f3 (diff)