diff options
Diffstat (limited to 'source/blender')
84 files changed, 5799 insertions, 200 deletions
diff --git a/source/blender/blenkernel/BKE_mesh_wrapper.h b/source/blender/blenkernel/BKE_mesh_wrapper.h index 2fe264fd0f7..12e8fd71503 100644 --- a/source/blender/blenkernel/BKE_mesh_wrapper.h +++ b/source/blender/blenkernel/BKE_mesh_wrapper.h @@ -22,6 +22,7 @@ struct BMEditMesh; struct CustomData_MeshMasks; struct Mesh; +struct Object; #ifdef __cplusplus extern "C" { @@ -51,6 +52,8 @@ void BKE_mesh_wrapper_vert_coords_copy_with_mat4(const struct Mesh *me, int vert_coords_len, const float mat[4][4]); +struct Mesh *BKE_mesh_wrapper_ensure_subdivision(const struct Object *ob, struct Mesh *me); + #ifdef __cplusplus } #endif diff --git a/source/blender/blenkernel/BKE_object.h b/source/blender/blenkernel/BKE_object.h index 03565bd3bda..a7d39598e54 100644 --- a/source/blender/blenkernel/BKE_object.h +++ b/source/blender/blenkernel/BKE_object.h @@ -48,6 +48,7 @@ struct RegionView3D; struct RigidBodyWorld; struct Scene; struct ShaderFxData; +struct SubsurfModifierData; struct View3D; struct ViewLayer; @@ -512,6 +513,7 @@ bool BKE_object_obdata_texspace_get(struct Object *ob, float **r_loc, float **r_size); +struct Mesh *BKE_object_get_evaluated_mesh_no_subsurf(const struct Object *object); /** Get evaluated mesh for given object. */ struct Mesh *BKE_object_get_evaluated_mesh(const struct Object *object); /** @@ -712,6 +714,15 @@ void BKE_object_modifiers_lib_link_common(void *userData, struct ID **idpoin, int cb_flag); +/** + * Return the last subsurf modifier of an object, this does not check whether modifiers on top of + * it are disabled. Return NULL if no such modifier is found. + * + * This does not check if the modifier is enabled as it is assumed that the caller verified that it + * is enabled for its evaluation mode. + */ +struct SubsurfModifierData *BKE_object_get_last_subsurf_modifier(const struct Object *ob); + void BKE_object_replace_data_on_shallow_copy(struct Object *ob, struct ID *new_data); struct PartEff; diff --git a/source/blender/blenkernel/BKE_subdiv.h b/source/blender/blenkernel/BKE_subdiv.h index 2fb27fad30d..169a4337f6a 100644 --- a/source/blender/blenkernel/BKE_subdiv.h +++ b/source/blender/blenkernel/BKE_subdiv.h @@ -188,7 +188,16 @@ typedef struct Subdiv { /* Cached values, are not supposed to be accessed directly. */ struct { /* Indexed by base face index, element indicates total number of ptex - * faces created for preceding base faces. */ + * faces created for preceding base faces. This also stores the final + * ptex offset (the total number of PTex faces) at the end of the array + * so that algorithms can compute the number of ptex faces for a given + * face by computing the delta with the offset for the next face without + * using a separate data structure, e.g.: + * + * const int num_face_ptex_faces = face_ptex_offset[i + 1] - face_ptex_offset[i]; + * + * In total this array has a size of `num base faces + 1`. + */ int *face_ptex_offset; } cache_; } Subdiv; @@ -257,6 +266,9 @@ void BKE_subdiv_displacement_detach(Subdiv *subdiv); /* ============================ TOPOLOGY HELPERS ============================ */ +/* For each element in the array, this stores the total number of ptex faces up to that element, + * with the total number of ptex faces being the last element in the array. The array is of length + * `base face count + 1`. */ int *BKE_subdiv_face_ptex_offset_get(Subdiv *subdiv); /* =========================== PTEX FACES AND GRIDS ========================= */ diff --git a/source/blender/blenkernel/BKE_subdiv_eval.h b/source/blender/blenkernel/BKE_subdiv_eval.h index 0b61e62c89c..177d5f386a8 100644 --- a/source/blender/blenkernel/BKE_subdiv_eval.h +++ b/source/blender/blenkernel/BKE_subdiv_eval.h @@ -31,15 +31,25 @@ extern "C" { struct Mesh; struct Subdiv; +struct OpenSubdiv_EvaluatorCache; + +typedef enum eSubdivEvaluatorType { + SUBDIV_EVALUATOR_TYPE_CPU, + SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, +} eSubdivEvaluatorType; /* Returns true if evaluator is ready for use. */ -bool BKE_subdiv_eval_begin(struct Subdiv *subdiv); +bool BKE_subdiv_eval_begin(struct Subdiv *subdiv, + eSubdivEvaluatorType evaluator_type, + struct OpenSubdiv_EvaluatorCache *evaluator_cache); /* coarse_vertex_cos is an optional argument which allows to override coordinates of the coarse * mesh. */ bool BKE_subdiv_eval_begin_from_mesh(struct Subdiv *subdiv, const struct Mesh *mesh, - const float (*coarse_vertex_cos)[3]); + const float (*coarse_vertex_cos)[3], + eSubdivEvaluatorType evaluator_type, + struct OpenSubdiv_EvaluatorCache *evaluator_cache); bool BKE_subdiv_eval_refine_from_mesh(struct Subdiv *subdiv, const struct Mesh *mesh, const float (*coarse_vertex_cos)[3]); diff --git a/source/blender/blenkernel/BKE_subdiv_foreach.h b/source/blender/blenkernel/BKE_subdiv_foreach.h index 3f74299455d..f63e23917ef 100644 --- a/source/blender/blenkernel/BKE_subdiv_foreach.h +++ b/source/blender/blenkernel/BKE_subdiv_foreach.h @@ -38,7 +38,8 @@ typedef bool (*SubdivForeachTopologyInformationCb)(const struct SubdivForeachCon const int num_vertices, const int num_edges, const int num_loops, - const int num_polygons); + const int num_polygons, + const int *subdiv_polygon_offset); typedef void (*SubdivForeachVertexFromCornerCb)(const struct SubdivForeachContext *context, void *tls, diff --git a/source/blender/blenkernel/BKE_subdiv_modifier.h b/source/blender/blenkernel/BKE_subdiv_modifier.h new file mode 100644 index 00000000000..94068613101 --- /dev/null +++ b/source/blender/blenkernel/BKE_subdiv_modifier.h @@ -0,0 +1,71 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2021 by Blender Foundation. + * All rights reserved. + */ + +/** \file + * \ingroup bke + */ + +#pragma once + +#include "BLI_sys_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct Mesh; +struct Object; +struct Scene; +struct Subdiv; +struct SubdivSettings; +struct SubsurfModifierData; + +void BKE_subsurf_modifier_subdiv_settings_init(struct SubdivSettings *settings, + const struct SubsurfModifierData *smd, + const bool use_render_params); + +/* If skip_check_is_last is true, we assume that the modifier passed is the last enabled modifier + * in the stack. */ +bool BKE_subsurf_modifier_can_do_gpu_subdiv_ex(const struct Scene *scene, + const struct Object *ob, + const struct SubsurfModifierData *smd, + int required_mode, + bool skip_check_is_last); + +bool BKE_subsurf_modifier_can_do_gpu_subdiv(const struct Scene *scene, + const struct Object *ob, + const int required_mode); + +extern void (*BKE_subsurf_modifier_free_gpu_cache_cb)(struct Subdiv *subdiv); + +struct Subdiv *BKE_subsurf_modifier_subdiv_descriptor_ensure( + const struct SubsurfModifierData *smd, + const struct SubdivSettings *subdiv_settings, + const struct Mesh *mesh, + const bool for_draw_code); + +struct SubsurfRuntimeData *BKE_subsurf_modifier_ensure_runtime(struct SubsurfModifierData *smd); + +/* Return the #ModifierMode required for the evaluation of the subsurf modifier, which should be + * used to check if the modifier is enabled. */ +int BKE_subsurf_modifier_eval_required_mode(bool is_final_render, bool is_edit_mode); + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt index fe33abd17c0..3c780a933d3 100644 --- a/source/blender/blenkernel/CMakeLists.txt +++ b/source/blender/blenkernel/CMakeLists.txt @@ -275,6 +275,7 @@ set(SRC intern/subdiv_eval.c intern/subdiv_foreach.c intern/subdiv_mesh.c + intern/subdiv_modifier.c intern/subdiv_stats.c intern/subdiv_topology.c intern/subsurf_ccg.c @@ -453,6 +454,7 @@ set(SRC BKE_subdiv_eval.h BKE_subdiv_foreach.h BKE_subdiv_mesh.h + BKE_subdiv_modifier.h BKE_subdiv_topology.h BKE_subsurf.h BKE_text.h diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc index da5b4ccc764..47ea55be871 100644 --- a/source/blender/blenkernel/intern/mesh_normals.cc +++ b/source/blender/blenkernel/intern/mesh_normals.cc @@ -319,6 +319,7 @@ void BKE_mesh_ensure_normals(Mesh *mesh) void BKE_mesh_ensure_normals_for_display(Mesh *mesh) { switch ((eMeshWrapperType)mesh->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: case ME_WRAPPER_TYPE_MDATA: /* Run code below. */ break; diff --git a/source/blender/blenkernel/intern/mesh_wrapper.c b/source/blender/blenkernel/intern/mesh_wrapper.c index bc1ffeb8cf4..5956f2802b5 100644 --- a/source/blender/blenkernel/intern/mesh_wrapper.c +++ b/source/blender/blenkernel/intern/mesh_wrapper.c @@ -36,6 +36,7 @@ #include "DNA_mesh_types.h" #include "DNA_meshdata_types.h" +#include "DNA_modifier_types.h" #include "DNA_object_types.h" #include "BLI_ghash.h" @@ -50,8 +51,14 @@ #include "BKE_mesh.h" #include "BKE_mesh_runtime.h" #include "BKE_mesh_wrapper.h" +#include "BKE_modifier.h" +#include "BKE_object.h" +#include "BKE_subdiv.h" +#include "BKE_subdiv_mesh.h" +#include "BKE_subdiv_modifier.h" #include "DEG_depsgraph.h" +#include "DEG_depsgraph_query.h" Mesh *BKE_mesh_wrapper_from_editmesh_with_coords(BMEditMesh *em, const CustomData_MeshMasks *cd_mask_extra, @@ -106,7 +113,8 @@ static void mesh_wrapper_ensure_mdata_isolated(void *userdata) me->runtime.wrapper_type = ME_WRAPPER_TYPE_MDATA; switch (geom_type_orig) { - case ME_WRAPPER_TYPE_MDATA: { + case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: { break; /* Quiet warning. */ } case ME_WRAPPER_TYPE_BMESH: { @@ -157,6 +165,7 @@ bool BKE_mesh_wrapper_minmax(const Mesh *me, float min[3], float max[3]) case ME_WRAPPER_TYPE_BMESH: return BKE_editmesh_cache_calc_minmax(me->edit_mesh, me->runtime.edit_data, min, max); case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: return BKE_mesh_minmax(me, min, max); } BLI_assert_unreachable(); @@ -191,7 +200,8 @@ void BKE_mesh_wrapper_vert_coords_copy(const Mesh *me, } return; } - case ME_WRAPPER_TYPE_MDATA: { + case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: { BLI_assert(vert_coords_len <= me->totvert); const MVert *mvert = me->mvert; for (int i = 0; i < vert_coords_len; i++) { @@ -228,7 +238,8 @@ void BKE_mesh_wrapper_vert_coords_copy_with_mat4(const Mesh *me, } return; } - case ME_WRAPPER_TYPE_MDATA: { + case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: { BLI_assert(vert_coords_len == me->totvert); const MVert *mvert = me->mvert; for (int i = 0; i < vert_coords_len; i++) { @@ -252,6 +263,7 @@ int BKE_mesh_wrapper_vert_len(const Mesh *me) case ME_WRAPPER_TYPE_BMESH: return me->edit_mesh->bm->totvert; case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: return me->totvert; } BLI_assert_unreachable(); @@ -264,6 +276,7 @@ int BKE_mesh_wrapper_edge_len(const Mesh *me) case ME_WRAPPER_TYPE_BMESH: return me->edit_mesh->bm->totedge; case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: return me->totedge; } BLI_assert_unreachable(); @@ -276,6 +289,7 @@ int BKE_mesh_wrapper_loop_len(const Mesh *me) case ME_WRAPPER_TYPE_BMESH: return me->edit_mesh->bm->totloop; case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: return me->totloop; } BLI_assert_unreachable(); @@ -288,6 +302,7 @@ int BKE_mesh_wrapper_poly_len(const Mesh *me) case ME_WRAPPER_TYPE_BMESH: return me->edit_mesh->bm->totface; case ME_WRAPPER_TYPE_MDATA: + case ME_WRAPPER_TYPE_SUBD: return me->totpoly; } BLI_assert_unreachable(); @@ -295,3 +310,67 @@ int BKE_mesh_wrapper_poly_len(const Mesh *me) } /** \} */ + +/* -------------------------------------------------------------------- */ +/** \name CPU Subdivision Evaluation + * \{ */ + +Mesh *BKE_mesh_wrapper_ensure_subdivision(const Object *ob, Mesh *me) +{ + ThreadMutex *mesh_eval_mutex = (ThreadMutex *)me->runtime.eval_mutex; + BLI_mutex_lock(mesh_eval_mutex); + + if (me->runtime.wrapper_type == ME_WRAPPER_TYPE_SUBD) { + BLI_mutex_unlock(mesh_eval_mutex); + return me->runtime.mesh_eval; + } + + SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob); + if (!smd) { + BLI_mutex_unlock(mesh_eval_mutex); + return me; + } + + const bool apply_render = me->runtime.subsurf_apply_render; + + SubdivSettings subdiv_settings; + BKE_subsurf_modifier_subdiv_settings_init(&subdiv_settings, smd, apply_render); + if (subdiv_settings.level == 0) { + return me; + } + + SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd); + + Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &subdiv_settings, me, false); + if (subdiv == NULL) { + /* Happens on bad topology, but also on empty input mesh. */ + return me; + } + + SubdivToMeshSettings mesh_settings; + mesh_settings.resolution = me->runtime.subsurf_resolution; + mesh_settings.use_optimal_display = me->runtime.subsurf_use_optimal_display; + + if (mesh_settings.resolution < 3) { + return me; + } + + Mesh *subdiv_mesh = BKE_subdiv_to_mesh(subdiv, &mesh_settings, me); + + if (subdiv != runtime_data->subdiv) { + BKE_subdiv_free(subdiv); + } + + if (subdiv_mesh != me) { + if (me->runtime.mesh_eval != NULL) { + BKE_id_free(NULL, me->runtime.mesh_eval); + } + me->runtime.mesh_eval = subdiv_mesh; + me->runtime.wrapper_type = ME_WRAPPER_TYPE_SUBD; + } + + BLI_mutex_unlock(mesh_eval_mutex); + return me->runtime.mesh_eval; +} + +/** \} */ diff --git a/source/blender/blenkernel/intern/modifier.c b/source/blender/blenkernel/intern/modifier.c index e1d201d7806..f3b6c2544bf 100644 --- a/source/blender/blenkernel/intern/modifier.c +++ b/source/blender/blenkernel/intern/modifier.c @@ -970,6 +970,7 @@ static void modwrap_dependsOnNormals(Mesh *me) } break; } + case ME_WRAPPER_TYPE_SUBD: case ME_WRAPPER_TYPE_MDATA: BKE_mesh_calc_normals(me); break; diff --git a/source/blender/blenkernel/intern/multires_reshape_smooth.c b/source/blender/blenkernel/intern/multires_reshape_smooth.c index 3665d01926b..50b4410a28e 100644 --- a/source/blender/blenkernel/intern/multires_reshape_smooth.c +++ b/source/blender/blenkernel/intern/multires_reshape_smooth.c @@ -566,7 +566,8 @@ static bool foreach_topology_info(const SubdivForeachContext *foreach_context, const int num_vertices, const int num_edges, const int num_loops, - const int num_polygons) + const int num_polygons, + const int *UNUSED(subdiv_polygon_offset)) { MultiresReshapeSmoothContext *reshape_smooth_context = foreach_context->user_data; const int max_edges = reshape_smooth_context->smoothing_type == MULTIRES_SUBDIVIDE_LINEAR ? @@ -1037,7 +1038,7 @@ static void reshape_subdiv_create(MultiresReshapeSmoothContext *reshape_smooth_c converter_init(reshape_smooth_context, &converter); Subdiv *reshape_subdiv = BKE_subdiv_new_from_converter(settings, &converter); - BKE_subdiv_eval_begin(reshape_subdiv); + BKE_subdiv_eval_begin(reshape_subdiv, SUBDIV_EVALUATOR_TYPE_CPU, NULL); reshape_smooth_context->reshape_subdiv = reshape_subdiv; diff --git a/source/blender/blenkernel/intern/multires_reshape_util.c b/source/blender/blenkernel/intern/multires_reshape_util.c index b7572204182..07a5d7c4a61 100644 --- a/source/blender/blenkernel/intern/multires_reshape_util.c +++ b/source/blender/blenkernel/intern/multires_reshape_util.c @@ -65,7 +65,7 @@ Subdiv *multires_reshape_create_subdiv(Depsgraph *depsgraph, SubdivSettings subdiv_settings; BKE_multires_subdiv_settings_init(&subdiv_settings, mmd); Subdiv *subdiv = BKE_subdiv_new_from_mesh(&subdiv_settings, base_mesh); - if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL)) { + if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) { BKE_subdiv_free(subdiv); return NULL; } diff --git a/source/blender/blenkernel/intern/multires_reshape_vertcos.c b/source/blender/blenkernel/intern/multires_reshape_vertcos.c index ed2df1ba8c5..c009349ff1b 100644 --- a/source/blender/blenkernel/intern/multires_reshape_vertcos.c +++ b/source/blender/blenkernel/intern/multires_reshape_vertcos.c @@ -114,7 +114,8 @@ static bool multires_reshape_vertcos_foreach_topology_info( const int num_vertices, const int UNUSED(num_edges), const int UNUSED(num_loops), - const int UNUSED(num_polygons)) + const int UNUSED(num_polygons), + const int *UNUSED(subdiv_polygon_offset)) { MultiresReshapeAssignVertcosContext *reshape_vertcos_context = foreach_context->user_data; if (num_vertices != reshape_vertcos_context->num_vert_coords) { diff --git a/source/blender/blenkernel/intern/multires_versioning.c b/source/blender/blenkernel/intern/multires_versioning.c index 4c0d7165cd0..18708c43f26 100644 --- a/source/blender/blenkernel/intern/multires_versioning.c +++ b/source/blender/blenkernel/intern/multires_versioning.c @@ -61,7 +61,7 @@ static Subdiv *subdiv_for_simple_to_catmull_clark(Object *object, MultiresModifi Subdiv *subdiv = BKE_subdiv_new_from_converter(&subdiv_settings, &converter); BKE_subdiv_converter_free(&converter); - if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL)) { + if (!BKE_subdiv_eval_begin_from_mesh(subdiv, base_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) { BKE_subdiv_free(subdiv); return NULL; } diff --git a/source/blender/blenkernel/intern/object.cc b/source/blender/blenkernel/intern/object.cc index 6cc6219b7d7..d08ea74d2c6 100644 --- a/source/blender/blenkernel/intern/object.cc +++ b/source/blender/blenkernel/intern/object.cc @@ -1773,8 +1773,9 @@ static void object_update_from_subsurf_ccg(Object *object) if (!object->runtime.is_data_eval_owned) { return; } - /* Object was never evaluated, so can not have CCG subdivision surface. */ - Mesh *mesh_eval = BKE_object_get_evaluated_mesh(object); + /* Object was never evaluated, so can not have CCG subdivision surface. If it were evaluated, do + * not try to compute OpenSubDiv on the CPU as it is not needed here. */ + Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(object); if (mesh_eval == nullptr) { return; } @@ -4496,7 +4497,7 @@ bool BKE_object_obdata_texspace_get(Object *ob, char **r_texflag, float **r_loc, return true; } -Mesh *BKE_object_get_evaluated_mesh(const Object *object) +Mesh *BKE_object_get_evaluated_mesh_no_subsurf(const Object *object) { /* First attempt to retrieve the evaluated mesh from the evaluated geometry set. Most * object types either store it there or add a reference to it if it's owned elsewhere. */ @@ -4523,6 +4524,20 @@ Mesh *BKE_object_get_evaluated_mesh(const Object *object) return nullptr; } +Mesh *BKE_object_get_evaluated_mesh(const Object *object) +{ + Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(object); + if (!mesh) { + return nullptr; + } + + if (object->data && GS(((const ID *)object->data)->name) == ID_ME) { + mesh = BKE_mesh_wrapper_ensure_subdivision(object, mesh); + } + + return mesh; +} + Mesh *BKE_object_get_pre_modified_mesh(const Object *object) { if (object->type == OB_MESH && object->runtime.data_orig != nullptr) { @@ -5779,6 +5794,21 @@ void BKE_object_modifiers_lib_link_common(void *userData, } } +SubsurfModifierData *BKE_object_get_last_subsurf_modifier(const Object *ob) +{ + ModifierData *md = (ModifierData *)(ob->modifiers.last); + + while (md) { + if (md->type == eModifierType_Subsurf) { + break; + } + + md = md->prev; + } + + return (SubsurfModifierData *)(md); +} + void BKE_object_replace_data_on_shallow_copy(Object *ob, ID *new_data) { ob->type = BKE_object_obdata_to_type(new_data); diff --git a/source/blender/blenkernel/intern/subdiv.c b/source/blender/blenkernel/intern/subdiv.c index fd32f52351a..45810e29565 100644 --- a/source/blender/blenkernel/intern/subdiv.c +++ b/source/blender/blenkernel/intern/subdiv.c @@ -29,6 +29,9 @@ #include "BLI_utildefines.h" +#include "BKE_modifier.h" +#include "BKE_subdiv_modifier.h" + #include "MEM_guardedalloc.h" #include "subdiv_converter.h" @@ -189,6 +192,12 @@ Subdiv *BKE_subdiv_update_from_mesh(Subdiv *subdiv, void BKE_subdiv_free(Subdiv *subdiv) { if (subdiv->evaluator != NULL) { + const eOpenSubdivEvaluator evaluator_type = subdiv->evaluator->type; + if (evaluator_type != OPENSUBDIV_EVALUATOR_CPU) { + /* Let the draw code do the freeing, to ensure that the OpenGL context is valid. */ + BKE_subsurf_modifier_free_gpu_cache_cb(subdiv); + return; + } openSubdiv_deleteEvaluator(subdiv->evaluator); } if (subdiv->topology_refiner != NULL) { @@ -214,12 +223,13 @@ int *BKE_subdiv_face_ptex_offset_get(Subdiv *subdiv) } const int num_coarse_faces = topology_refiner->getNumFaces(topology_refiner); subdiv->cache_.face_ptex_offset = MEM_malloc_arrayN( - num_coarse_faces, sizeof(int), "subdiv face_ptex_offset"); + num_coarse_faces + 1, sizeof(int), "subdiv face_ptex_offset"); int ptex_offset = 0; for (int face_index = 0; face_index < num_coarse_faces; face_index++) { const int num_ptex_faces = topology_refiner->getNumFacePtexFaces(topology_refiner, face_index); subdiv->cache_.face_ptex_offset[face_index] = ptex_offset; ptex_offset += num_ptex_faces; } + subdiv->cache_.face_ptex_offset[num_coarse_faces] = ptex_offset; return subdiv->cache_.face_ptex_offset; } diff --git a/source/blender/blenkernel/intern/subdiv_ccg.c b/source/blender/blenkernel/intern/subdiv_ccg.c index 77962ec924c..7d876acf776 100644 --- a/source/blender/blenkernel/intern/subdiv_ccg.c +++ b/source/blender/blenkernel/intern/subdiv_ccg.c @@ -603,7 +603,8 @@ Mesh *BKE_subdiv_to_ccg_mesh(Subdiv *subdiv, { /* Make sure evaluator is ready. */ BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_CCG); - if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, NULL)) { + if (!BKE_subdiv_eval_begin_from_mesh( + subdiv, coarse_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) { if (coarse_mesh->totpoly) { return NULL; } diff --git a/source/blender/blenkernel/intern/subdiv_deform.c b/source/blender/blenkernel/intern/subdiv_deform.c index 7a2d639e4e5..c385b1b291d 100644 --- a/source/blender/blenkernel/intern/subdiv_deform.c +++ b/source/blender/blenkernel/intern/subdiv_deform.c @@ -117,7 +117,8 @@ static bool subdiv_mesh_topology_info(const SubdivForeachContext *foreach_contex const int UNUSED(num_vertices), const int UNUSED(num_edges), const int UNUSED(num_loops), - const int UNUSED(num_polygons)) + const int UNUSED(num_polygons), + const int *UNUSED(subdiv_polygon_offset)) { SubdivDeformContext *subdiv_context = foreach_context->user_data; subdiv_mesh_prepare_accumulator(subdiv_context, subdiv_context->coarse_mesh->totvert); @@ -202,7 +203,8 @@ void BKE_subdiv_deform_coarse_vertices(struct Subdiv *subdiv, BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_MESH); /* Make sure evaluator is up to date with possible new topology, and that * is refined for the new positions of coarse vertices. */ - if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, vertex_cos)) { + if (!BKE_subdiv_eval_begin_from_mesh( + subdiv, coarse_mesh, vertex_cos, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) { /* This could happen in two situations: * - OpenSubdiv is disabled. * - Something totally bad happened, and OpenSubdiv rejected our diff --git a/source/blender/blenkernel/intern/subdiv_eval.c b/source/blender/blenkernel/intern/subdiv_eval.c index 0001eb8a205..9733a1498a6 100644 --- a/source/blender/blenkernel/intern/subdiv_eval.c +++ b/source/blender/blenkernel/intern/subdiv_eval.c @@ -28,6 +28,7 @@ #include "BLI_bitmap.h" #include "BLI_math_vector.h" +#include "BLI_task.h" #include "BLI_utildefines.h" #include "BKE_customdata.h" @@ -38,7 +39,28 @@ #include "opensubdiv_evaluator_capi.h" #include "opensubdiv_topology_refiner_capi.h" -bool BKE_subdiv_eval_begin(Subdiv *subdiv) +/* ============================ Helper Function ============================ */ + +static eOpenSubdivEvaluator opensubdiv_evalutor_from_subdiv_evaluator_type( + eSubdivEvaluatorType evaluator_type) +{ + switch (evaluator_type) { + case SUBDIV_EVALUATOR_TYPE_CPU: { + return OPENSUBDIV_EVALUATOR_CPU; + } + case SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE: { + return OPENSUBDIV_EVALUATOR_GLSL_COMPUTE; + } + } + BLI_assert_msg(0, "Unknown evaluator type"); + return OPENSUBDIV_EVALUATOR_CPU; +} + +/* ====================== Main Subdivision Evaluation ====================== */ + +bool BKE_subdiv_eval_begin(Subdiv *subdiv, + eSubdivEvaluatorType evaluator_type, + OpenSubdiv_EvaluatorCache *evaluator_cache) { BKE_subdiv_stats_reset(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE); if (subdiv->topology_refiner == NULL) { @@ -47,8 +69,11 @@ bool BKE_subdiv_eval_begin(Subdiv *subdiv) return false; } if (subdiv->evaluator == NULL) { + eOpenSubdivEvaluator opensubdiv_evaluator_type = + opensubdiv_evalutor_from_subdiv_evaluator_type(evaluator_type); BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE); - subdiv->evaluator = openSubdiv_createEvaluatorFromTopologyRefiner(subdiv->topology_refiner); + subdiv->evaluator = openSubdiv_createEvaluatorFromTopologyRefiner( + subdiv->topology_refiner, opensubdiv_evaluator_type, evaluator_cache); BKE_subdiv_stats_end(&subdiv->stats, SUBDIV_STATS_EVALUATOR_CREATE); if (subdiv->evaluator == NULL) { return false; @@ -80,6 +105,9 @@ static void set_coarse_positions(Subdiv *subdiv, BLI_BITMAP_ENABLE(vertex_used_map, loop->v); } } + /* Use a temporary buffer so we do not upload vertices one at a time to the GPU. */ + float(*buffer)[3] = MEM_mallocN(sizeof(float[3]) * mesh->totvert, "subdiv tmp coarse positions"); + int manifold_vertex_count = 0; for (int vertex_index = 0, manifold_vertex_index = 0; vertex_index < mesh->totvert; vertex_index++) { if (!BLI_BITMAP_TEST_BOOL(vertex_used_map, vertex_index)) { @@ -93,13 +121,49 @@ static void set_coarse_positions(Subdiv *subdiv, const MVert *vertex = &mvert[vertex_index]; vertex_co = vertex->co; } - subdiv->evaluator->setCoarsePositions(subdiv->evaluator, vertex_co, manifold_vertex_index, 1); + copy_v3_v3(&buffer[manifold_vertex_index][0], vertex_co); manifold_vertex_index++; + manifold_vertex_count++; } + subdiv->evaluator->setCoarsePositions( + subdiv->evaluator, &buffer[0][0], 0, manifold_vertex_count); MEM_freeN(vertex_used_map); + MEM_freeN(buffer); +} + +/* Context which is used to fill face varying data in parallel. */ +typedef struct FaceVaryingDataFromUVContext { + OpenSubdiv_TopologyRefiner *topology_refiner; + const Mesh *mesh; + const MLoopUV *mloopuv; + float (*buffer)[2]; + int layer_index; +} FaceVaryingDataFromUVContext; + +static void set_face_varying_data_from_uv_task(void *__restrict userdata, + const int face_index, + const TaskParallelTLS *__restrict UNUSED(tls)) +{ + FaceVaryingDataFromUVContext *ctx = userdata; + OpenSubdiv_TopologyRefiner *topology_refiner = ctx->topology_refiner; + const int layer_index = ctx->layer_index; + const Mesh *mesh = ctx->mesh; + const MPoly *mpoly = &mesh->mpoly[face_index]; + const MLoopUV *mluv = &ctx->mloopuv[mpoly->loopstart]; + + /* TODO(sergey): OpenSubdiv's C-API converter can change winding of + * loops of a face, need to watch for that, to prevent wrong UVs assigned. + */ + const int num_face_vertices = topology_refiner->getNumFaceVertices(topology_refiner, face_index); + const int *uv_indices = topology_refiner->getFaceFVarValueIndices( + topology_refiner, face_index, layer_index); + for (int vertex_index = 0; vertex_index < num_face_vertices; vertex_index++, mluv++) { + copy_v2_v2(ctx->buffer[uv_indices[vertex_index]], mluv->uv); + } } static void set_face_varying_data_from_uv(Subdiv *subdiv, + const Mesh *mesh, const MLoopUV *mloopuv, const int layer_index) { @@ -107,25 +171,37 @@ static void set_face_varying_data_from_uv(Subdiv *subdiv, OpenSubdiv_Evaluator *evaluator = subdiv->evaluator; const int num_faces = topology_refiner->getNumFaces(topology_refiner); const MLoopUV *mluv = mloopuv; - /* TODO(sergey): OpenSubdiv's C-API converter can change winding of - * loops of a face, need to watch for that, to prevent wrong UVs assigned. - */ - for (int face_index = 0; face_index < num_faces; face_index++) { - const int num_face_vertices = topology_refiner->getNumFaceVertices(topology_refiner, - face_index); - const int *uv_indices = topology_refiner->getFaceFVarValueIndices( - topology_refiner, face_index, layer_index); - for (int vertex_index = 0; vertex_index < num_face_vertices; vertex_index++, mluv++) { - evaluator->setFaceVaryingData(evaluator, layer_index, mluv->uv, uv_indices[vertex_index], 1); - } - } + + const int num_fvar_values = topology_refiner->getNumFVarValues(topology_refiner, layer_index); + /* Use a temporary buffer so we do not upload UVs one at a time to the GPU. */ + float(*buffer)[2] = MEM_mallocN(sizeof(float[2]) * num_fvar_values, "temp UV storage"); + + FaceVaryingDataFromUVContext ctx; + ctx.topology_refiner = topology_refiner; + ctx.layer_index = layer_index; + ctx.mloopuv = mluv; + ctx.mesh = mesh; + ctx.buffer = buffer; + + TaskParallelSettings parallel_range_settings; + BLI_parallel_range_settings_defaults(¶llel_range_settings); + parallel_range_settings.min_iter_per_thread = 1; + + BLI_task_parallel_range( + 0, num_faces, &ctx, set_face_varying_data_from_uv_task, ¶llel_range_settings); + + evaluator->setFaceVaryingData(evaluator, layer_index, &buffer[0][0], 0, num_fvar_values); + + MEM_freeN(buffer); } bool BKE_subdiv_eval_begin_from_mesh(Subdiv *subdiv, const Mesh *mesh, - const float (*coarse_vertex_cos)[3]) + const float (*coarse_vertex_cos)[3], + eSubdivEvaluatorType evaluator_type, + OpenSubdiv_EvaluatorCache *evaluator_cache) { - if (!BKE_subdiv_eval_begin(subdiv)) { + if (!BKE_subdiv_eval_begin(subdiv, evaluator_type, evaluator_cache)) { return false; } return BKE_subdiv_eval_refine_from_mesh(subdiv, mesh, coarse_vertex_cos); @@ -146,7 +222,7 @@ bool BKE_subdiv_eval_refine_from_mesh(Subdiv *subdiv, const int num_uv_layers = CustomData_number_of_layers(&mesh->ldata, CD_MLOOPUV); for (int layer_index = 0; layer_index < num_uv_layers; layer_index++) { const MLoopUV *mloopuv = CustomData_get_layer_n(&mesh->ldata, CD_MLOOPUV, layer_index); - set_face_varying_data_from_uv(subdiv, mloopuv, layer_index); + set_face_varying_data_from_uv(subdiv, mesh, mloopuv, layer_index); } /* Update evaluator to the new coarse geometry. */ BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_EVALUATOR_REFINE); diff --git a/source/blender/blenkernel/intern/subdiv_foreach.c b/source/blender/blenkernel/intern/subdiv_foreach.c index 061c196df2a..69bead27fe6 100644 --- a/source/blender/blenkernel/intern/subdiv_foreach.c +++ b/source/blender/blenkernel/intern/subdiv_foreach.c @@ -1877,7 +1877,8 @@ bool BKE_subdiv_foreach_subdiv_geometry(Subdiv *subdiv, ctx.num_subdiv_vertices, ctx.num_subdiv_edges, ctx.num_subdiv_loops, - ctx.num_subdiv_polygons)) { + ctx.num_subdiv_polygons, + ctx.subdiv_polygon_offset)) { subdiv_foreach_ctx_free(&ctx); return false; } diff --git a/source/blender/blenkernel/intern/subdiv_mesh.c b/source/blender/blenkernel/intern/subdiv_mesh.c index e5c7d13edab..1f31d0543ad 100644 --- a/source/blender/blenkernel/intern/subdiv_mesh.c +++ b/source/blender/blenkernel/intern/subdiv_mesh.c @@ -514,7 +514,8 @@ static bool subdiv_mesh_topology_info(const SubdivForeachContext *foreach_contex const int num_vertices, const int num_edges, const int num_loops, - const int num_polygons) + const int num_polygons, + const int *UNUSED(subdiv_polygon_offset)) { /* Multires grid data will be applied or become invalid after subdivision, * so don't try to preserve it and use memory. */ @@ -1193,7 +1194,8 @@ Mesh *BKE_subdiv_to_mesh(Subdiv *subdiv, BKE_subdiv_stats_begin(&subdiv->stats, SUBDIV_STATS_SUBDIV_TO_MESH); /* Make sure evaluator is up to date with possible new topology, and that * it is refined for the new positions of coarse vertices. */ - if (!BKE_subdiv_eval_begin_from_mesh(subdiv, coarse_mesh, NULL)) { + if (!BKE_subdiv_eval_begin_from_mesh( + subdiv, coarse_mesh, NULL, SUBDIV_EVALUATOR_TYPE_CPU, NULL)) { /* This could happen in two situations: * - OpenSubdiv is disabled. * - Something totally bad happened, and OpenSubdiv rejected our diff --git a/source/blender/blenkernel/intern/subdiv_modifier.c b/source/blender/blenkernel/intern/subdiv_modifier.c new file mode 100644 index 00000000000..bafcb631f59 --- /dev/null +++ b/source/blender/blenkernel/intern/subdiv_modifier.c @@ -0,0 +1,162 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2021 by Blender Foundation. + * All rights reserved. + */ + +#include "BKE_subdiv_modifier.h" + +#include "MEM_guardedalloc.h" + +#include "DNA_mesh_types.h" +#include "DNA_modifier_types.h" +#include "DNA_object_types.h" +#include "DNA_scene_types.h" +#include "DNA_userdef_types.h" + +#include "BKE_modifier.h" +#include "BKE_subdiv.h" + +#include "GPU_capabilities.h" +#include "GPU_context.h" + +#include "opensubdiv_capi.h" + +void BKE_subsurf_modifier_subdiv_settings_init(SubdivSettings *settings, + const SubsurfModifierData *smd, + const bool use_render_params) +{ + const int requested_levels = (use_render_params) ? smd->renderLevels : smd->levels; + + settings->is_simple = (smd->subdivType == SUBSURF_TYPE_SIMPLE); + settings->is_adaptive = !(smd->flags & eSubsurfModifierFlag_UseRecursiveSubdivision); + settings->level = settings->is_simple ? + 1 : + (settings->is_adaptive ? smd->quality : requested_levels); + settings->use_creases = (smd->flags & eSubsurfModifierFlag_UseCrease); + settings->vtx_boundary_interpolation = BKE_subdiv_vtx_boundary_interpolation_from_subsurf( + smd->boundary_smooth); + settings->fvar_linear_interpolation = BKE_subdiv_fvar_interpolation_from_uv_smooth( + smd->uv_smooth); +} + +static ModifierData *modifier_get_last_enabled_for_mode(const Scene *scene, + const Object *ob, + int required_mode) +{ + ModifierData *md = ob->modifiers.last; + + while (md) { + if (BKE_modifier_is_enabled(scene, md, required_mode)) { + break; + } + + md = md->prev; + } + + return md; +} + +bool BKE_subsurf_modifier_can_do_gpu_subdiv_ex(const Scene *scene, + const Object *ob, + const SubsurfModifierData *smd, + int required_mode, + bool skip_check_is_last) +{ + if ((U.gpu_flag & USER_GPU_FLAG_SUBDIVISION_EVALUATION) == 0) { + return false; + } + + if (!skip_check_is_last) { + ModifierData *md = modifier_get_last_enabled_for_mode(scene, ob, required_mode); + if (md != (const ModifierData *)smd) { + return false; + } + } + + /* Only OpenGL is supported for OpenSubdiv evaluation for now. */ + if (GPU_backend_get_type() != GPU_BACKEND_OPENGL) { + return false; + } + + if (!GPU_compute_shader_support()) { + return false; + } + + const int available_evaluators = openSubdiv_getAvailableEvaluators(); + if ((available_evaluators & OPENSUBDIV_EVALUATOR_GLSL_COMPUTE) == 0) { + return false; + } + + return true; +} + +bool BKE_subsurf_modifier_can_do_gpu_subdiv(const Scene *scene, + const Object *ob, + int required_mode) +{ + ModifierData *md = modifier_get_last_enabled_for_mode(scene, ob, required_mode); + + if (!md) { + return false; + } + + if (md->type != eModifierType_Subsurf) { + return false; + } + + return BKE_subsurf_modifier_can_do_gpu_subdiv_ex( + scene, ob, (SubsurfModifierData *)md, required_mode, true); +} + +void (*BKE_subsurf_modifier_free_gpu_cache_cb)(Subdiv *subdiv) = NULL; + +/* Main goal of this function is to give usable subdivision surface descriptor + * which matches settings and topology. */ +Subdiv *BKE_subsurf_modifier_subdiv_descriptor_ensure(const SubsurfModifierData *smd, + const SubdivSettings *subdiv_settings, + const Mesh *mesh, + const bool for_draw_code) +{ + SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime; + if (runtime_data->subdiv && runtime_data->set_by_draw_code != for_draw_code) { + BKE_subdiv_free(runtime_data->subdiv); + runtime_data->subdiv = NULL; + } + Subdiv *subdiv = BKE_subdiv_update_from_mesh(runtime_data->subdiv, subdiv_settings, mesh); + runtime_data->subdiv = subdiv; + runtime_data->set_by_draw_code = for_draw_code; + return subdiv; +} + +SubsurfRuntimeData *BKE_subsurf_modifier_ensure_runtime(SubsurfModifierData *smd) +{ + SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime; + if (runtime_data == NULL) { + runtime_data = MEM_callocN(sizeof(*runtime_data), "subsurf runtime"); + smd->modifier.runtime = runtime_data; + } + return runtime_data; +} + +int BKE_subsurf_modifier_eval_required_mode(bool is_final_render, bool is_edit_mode) +{ + if (is_final_render) { + return eModifierMode_Render; + } + + return eModifierMode_Realtime | (is_edit_mode ? eModifierMode_Editmode : 0); +} diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index 821b6025fff..eea3adc440a 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -44,9 +44,11 @@ set(INC ../../../intern/atomic ../../../intern/glew-mx ../../../intern/guardedalloc + ../../../intern/opensubdiv # dna_type_offsets.h ${CMAKE_CURRENT_BINARY_DIR}/../makesdna/intern + ${OPENSUBDIV_INCLUDE_DIRS} ) set(SRC @@ -91,6 +93,7 @@ set(SRC intern/draw_cache_impl_metaball.c intern/draw_cache_impl_particles.c intern/draw_cache_impl_pointcloud.c + intern/draw_cache_impl_subdivision.cc intern/draw_cache_impl_volume.c intern/draw_color_management.cc intern/draw_common.c @@ -209,6 +212,7 @@ set(SRC intern/draw_manager_testing.h intern/draw_manager_text.h intern/draw_shader.h + intern/draw_subdivision.h intern/draw_texture_pool.h intern/draw_view.h intern/draw_view_data.h @@ -372,6 +376,18 @@ data_to_c_simple(intern/shaders/common_view_lib.glsl SRC) data_to_c_simple(intern/shaders/common_fxaa_lib.glsl SRC) data_to_c_simple(intern/shaders/common_smaa_lib.glsl SRC) data_to_c_simple(intern/shaders/common_fullscreen_vert.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_custom_data_interp_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_ibo_lines_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_ibo_tris_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_lib.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_normals_accumulate_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_normals_finalize_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_patch_evaluation_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_vbo_lnor_comp.glsl SRC) +data_to_c_simple(intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl SRC) data_to_c_simple(engines/gpencil/shaders/gpencil_frag.glsl SRC) data_to_c_simple(engines/gpencil/shaders/gpencil_vert.glsl SRC) diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h index 98e166ac3a7..132f66ecb1e 100644 --- a/source/blender/draw/DRW_engine.h +++ b/source/blender/draw/DRW_engine.h @@ -191,6 +191,10 @@ void DRW_xr_drawing_end(void); /* For garbage collection */ void DRW_cache_free_old_batches(struct Main *bmain); +void DRW_cache_free_old_subdiv(void); + +/* For the OpenGL evaluators and garbage collected subdivision data. */ +void DRW_subdiv_free(void); /* Never use this. Only for closing blender. */ void DRW_opengl_context_enable_ex(bool restore); diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c index 2345a110134..a754e81b949 100644 --- a/source/blender/draw/engines/overlay/overlay_armature.c +++ b/source/blender/draw/engines/overlay/overlay_armature.c @@ -589,7 +589,7 @@ static void drw_shgroup_bone_custom_wire(ArmatureDrawContext *ctx, Object *custom) { /* See comments in #drw_shgroup_bone_custom_solid. */ - Mesh *mesh = BKE_object_get_evaluated_mesh(custom); + Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(custom); if (mesh == NULL) { return; } diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c index 03fb3b92277..1110658e3b2 100644 --- a/source/blender/draw/intern/draw_cache.c +++ b/source/blender/draw/intern/draw_cache.c @@ -923,7 +923,7 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob) GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob) { - Mesh *me = BKE_object_get_evaluated_mesh(ob); + Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob); short type = (me != NULL) ? OB_MESH : ob->type; switch (type) { @@ -950,7 +950,7 @@ int DRW_cache_object_material_count_get(struct Object *ob) { short type = ob->type; - Mesh *me = BKE_object_get_evaluated_mesh(ob); + Mesh *me = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (me != NULL && type != OB_POINTCLOUD) { /* Some object types can have one data type in ob->data, but will be rendered as mesh. * For point clouds this never happens. Ideally this check would happen at another level @@ -3021,7 +3021,7 @@ GPUBatch *DRW_cache_surf_surface_get(Object *ob) BLI_assert(ob->type == OB_SURF); struct Curve *cu = ob->data; - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (mesh_eval != NULL) { return DRW_mesh_batch_cache_get_surface(mesh_eval); } @@ -3034,7 +3034,7 @@ GPUBatch *DRW_cache_surf_edge_wire_get(Object *ob) BLI_assert(ob->type == OB_SURF); struct Curve *cu = ob->data; - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (mesh_eval != NULL) { return DRW_mesh_batch_cache_get_loose_edges(mesh_eval); } @@ -3047,7 +3047,7 @@ GPUBatch *DRW_cache_surf_face_wireframe_get(Object *ob) BLI_assert(ob->type == OB_SURF); struct Curve *cu = ob->data; - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (mesh_eval != NULL) { return DRW_mesh_batch_cache_get_wireframes_face(mesh_eval); } @@ -3059,7 +3059,7 @@ GPUBatch *DRW_cache_surf_edge_detection_get(Object *ob, bool *r_is_manifold) { BLI_assert(ob->type == OB_SURF); struct Curve *cu = ob->data; - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (mesh_eval != NULL) { return DRW_mesh_batch_cache_get_edge_detection(mesh_eval, r_is_manifold); } @@ -3072,7 +3072,7 @@ GPUBatch *DRW_cache_surf_loose_edges_get(Object *ob) BLI_assert(ob->type == OB_SURF); struct Curve *cu = ob->data; - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (mesh_eval != NULL) { return DRW_mesh_batch_cache_get_loose_edges(mesh_eval); } @@ -3089,7 +3089,7 @@ GPUBatch **DRW_cache_surf_surface_shaded_get(Object *ob, BLI_assert(ob->type == OB_SURF); struct Curve *cu = ob->data; - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); if (mesh_eval != NULL) { return DRW_mesh_batch_cache_get_surface_shaded(mesh_eval, gpumat_array, gpumat_array_len); } @@ -3382,7 +3382,7 @@ GPUBatch *DRW_cache_cursor_get(bool crosshair_lines) void drw_batch_cache_validate(Object *ob) { - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); switch (ob->type) { case OB_MESH: DRW_mesh_batch_cache_validate((Mesh *)ob->data); @@ -3431,7 +3431,7 @@ void drw_batch_cache_generate_requested(Object *ob) DRW_object_use_hide_faces(ob)) || ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob)))); - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); switch (ob->type) { case OB_MESH: DRW_mesh_batch_cache_create_requested( @@ -3470,7 +3470,7 @@ void drw_batch_cache_generate_requested_evaluated_mesh(Object *ob) DRW_object_use_hide_faces(ob)) || ((mode == CTX_MODE_EDIT_MESH) && DRW_object_is_in_edit_mode(ob)))); - Mesh *mesh = BKE_object_get_evaluated_mesh(ob); + Mesh *mesh = BKE_object_get_evaluated_mesh_no_subsurf(ob); DRW_mesh_batch_cache_create_requested(DST.task_graph, ob, mesh, scene, is_paint_mode, use_hide); } @@ -3481,7 +3481,7 @@ void drw_batch_cache_generate_requested_delayed(Object *ob) void DRW_batch_cache_free_old(Object *ob, int ctime) { - struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); + struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh_no_subsurf(ob); switch (ob->type) { case OB_MESH: diff --git a/source/blender/draw/intern/draw_cache_extract.h b/source/blender/draw/intern/draw_cache_extract.h index ba42cdf66e7..6de9788b434 100644 --- a/source/blender/draw/intern/draw_cache_extract.h +++ b/source/blender/draw/intern/draw_cache_extract.h @@ -22,6 +22,7 @@ #pragma once +struct DRWSubdivCache; struct TaskGraph; #include "DNA_customdata_types.h" @@ -244,6 +245,13 @@ typedef enum DRWBatchFlag { BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of bit fields"); +typedef struct MeshExtractLooseGeom { + int edge_len; + int vert_len; + int *verts; + int *edges; +} MeshExtractLooseGeom; + /** * Data that are kept around between extractions to reduce rebuilding time. * @@ -252,12 +260,7 @@ BLI_STATIC_ASSERT(MBC_BATCH_LEN < 32, "Number of batches exceeded the limit of b typedef struct MeshBufferCache { MeshBufferList buff; - struct { - int edge_len; - int vert_len; - int *verts; - int *edges; - } loose_geom; + MeshExtractLooseGeom loose_geom; struct { int *tri_first_index; @@ -283,6 +286,8 @@ typedef struct MeshBatchCache { GPUBatch **surface_per_mat; + struct DRWSubdivCache *subdiv_cache; + DRWBatchFlag batch_requested; /* DRWBatchFlag */ DRWBatchFlag batch_ready; /* DRWBatchFlag */ @@ -332,9 +337,14 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, const bool do_uvedit, const bool use_subsurf_fdots, const Scene *scene, - const ToolSettings *ts, + const struct ToolSettings *ts, const bool use_hide); +void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache, + MeshBufferCache *mbc, + struct DRWSubdivCache *subdiv_cache, + const struct ToolSettings *ts); + #ifdef __cplusplus } #endif diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc index 485b803310c..383a3b05b67 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh.cc +++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc @@ -42,6 +42,7 @@ #include "draw_cache_extract.h" #include "draw_cache_inline.h" +#include "draw_subdivision.h" #include "mesh_extractors/extract_mesh.h" @@ -783,6 +784,99 @@ static void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, /** \} */ +/* ---------------------------------------------------------------------- */ +/** \name Subdivision Extract Loop + * \{ */ + +static void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache, + MeshBufferCache *mbc, + DRWSubdivCache *subdiv_cache, + const ToolSettings *ts) +{ + /* Create an array containing all the extractors that needs to be executed. */ + ExtractorRunDatas extractors; + + MeshBufferList *mbuflist = &mbc->buff; + +#define EXTRACT_ADD_REQUESTED(type, name) \ + do { \ + if (DRW_##type##_requested(mbuflist->type.name)) { \ + const MeshExtract *extractor = &extract_##name; \ + extractors.append(extractor); \ + } \ + } while (0) + + /* The order in which extractors are added to the list matters somewhat, as some buffers are + * reused when building others. */ + EXTRACT_ADD_REQUESTED(ibo, tris); + EXTRACT_ADD_REQUESTED(vbo, pos_nor); + EXTRACT_ADD_REQUESTED(vbo, lnor); + for (int i = 0; i < GPU_MAX_ATTR; i++) { + EXTRACT_ADD_REQUESTED(vbo, attr[i]); + } + + /* We use only one extractor for face dots, as the work is done in a single compute shader. */ + if (DRW_vbo_requested(mbuflist->vbo.fdots_nor) || DRW_vbo_requested(mbuflist->vbo.fdots_pos) || + DRW_ibo_requested(mbuflist->ibo.fdots)) { + extractors.append(&extract_fdots_pos); + } + + EXTRACT_ADD_REQUESTED(ibo, lines); + EXTRACT_ADD_REQUESTED(ibo, edituv_points); + EXTRACT_ADD_REQUESTED(ibo, edituv_tris); + EXTRACT_ADD_REQUESTED(ibo, edituv_lines); + EXTRACT_ADD_REQUESTED(vbo, vert_idx); + EXTRACT_ADD_REQUESTED(vbo, edge_idx); + EXTRACT_ADD_REQUESTED(vbo, poly_idx); + EXTRACT_ADD_REQUESTED(vbo, edge_fac); + EXTRACT_ADD_REQUESTED(ibo, points); + EXTRACT_ADD_REQUESTED(vbo, edit_data); + EXTRACT_ADD_REQUESTED(vbo, edituv_data); + /* Make sure UVs are computed before edituv stuffs. */ + EXTRACT_ADD_REQUESTED(vbo, uv); + EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_area); + EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle); + EXTRACT_ADD_REQUESTED(ibo, lines_adjacency); + EXTRACT_ADD_REQUESTED(vbo, vcol); + EXTRACT_ADD_REQUESTED(vbo, weights); + EXTRACT_ADD_REQUESTED(vbo, sculpt_data); + +#undef EXTRACT_ADD_REQUESTED + + if (extractors.is_empty()) { + return; + } + + MeshRenderData mr; + draw_subdiv_init_mesh_render_data(subdiv_cache, &mr, ts); + mesh_render_data_update_loose_geom(&mr, mbc, MR_ITER_LEDGE | MR_ITER_LVERT, MR_DATA_LOOSE_GEOM); + + void *data_stack = MEM_mallocN(extractors.data_size_total(), __func__); + uint32_t data_offset = 0; + for (const ExtractorRunData &run_data : extractors) { + const MeshExtract *extractor = run_data.extractor; + void *buffer = mesh_extract_buffer_get(extractor, mbuflist); + void *data = POINTER_OFFSET(data_stack, data_offset); + + extractor->init_subdiv(subdiv_cache, &mr, cache, buffer, data); + + if (extractor->iter_subdiv) { + extractor->iter_subdiv(subdiv_cache, &mr, data); + } + + if (extractor->iter_loose_geom_subdiv) { + extractor->iter_loose_geom_subdiv(subdiv_cache, &mr, &mbc->loose_geom, buffer, data); + } + + if (extractor->finish_subdiv) { + extractor->finish_subdiv(subdiv_cache, buffer, data); + } + } + MEM_freeN(data_stack); +} + +/** \} */ + } // namespace blender::draw extern "C" { @@ -818,4 +912,12 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, use_hide); } +void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache, + MeshBufferCache *mbc, + DRWSubdivCache *subdiv_cache, + const ToolSettings *ts) +{ + blender::draw::mesh_buffer_cache_create_requested_subdiv(cache, mbc, subdiv_cache, ts); +} + } // extern "C" diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c index 82b3b5aee41..1e5ffc14911 100644 --- a/source/blender/draw/intern/draw_cache_impl_mesh.c +++ b/source/blender/draw/intern/draw_cache_impl_mesh.c @@ -54,6 +54,7 @@ #include "BKE_object_deform.h" #include "BKE_paint.h" #include "BKE_pbvh.h" +#include "BKE_subdiv_modifier.h" #include "atomic_ops.h" @@ -69,6 +70,7 @@ #include "draw_cache_extract.h" #include "draw_cache_inline.h" +#include "draw_subdivision.h" #include "draw_cache_impl.h" /* own include */ @@ -380,6 +382,7 @@ static void drw_mesh_attributes_add_request(DRW_MeshAttributes *attrs, BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me) { switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: case ME_WRAPPER_TYPE_MDATA: return &me->ldata; break; @@ -395,6 +398,7 @@ BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me) BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me) { switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: case ME_WRAPPER_TYPE_MDATA: return &me->pdata; break; @@ -410,6 +414,7 @@ BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me) BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me) { switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: case ME_WRAPPER_TYPE_MDATA: return &me->edata; break; @@ -425,6 +430,7 @@ BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me) BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me) { switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: case ME_WRAPPER_TYPE_MDATA: return &me->vdata; break; @@ -1037,6 +1043,15 @@ static void mesh_buffer_cache_clear(MeshBufferCache *mbc) mbc->poly_sorted.visible_tri_len = 0; } +static void mesh_batch_cache_free_subdiv_cache(MeshBatchCache *cache) +{ + if (cache->subdiv_cache) { + draw_subdiv_cache_free(cache->subdiv_cache); + MEM_freeN(cache->subdiv_cache); + cache->subdiv_cache = NULL; + } +} + static void mesh_batch_cache_clear(Mesh *me) { MeshBatchCache *cache = me->runtime.batch_cache; @@ -1064,6 +1079,8 @@ static void mesh_batch_cache_clear(Mesh *me) cache->batch_ready = 0; drw_mesh_weight_state_clear(&cache->weight_state); + + mesh_batch_cache_free_subdiv_cache(cache); } void DRW_mesh_batch_cache_free(Mesh *me) @@ -1693,6 +1710,10 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, const bool do_uvcage = is_editmode && !me->edit_mesh->mesh_eval_final->runtime.is_original; + const int required_mode = BKE_subsurf_modifier_eval_required_mode(DRW_state_is_scene_render(), + is_editmode); + const bool do_subdivision = BKE_subsurf_modifier_can_do_gpu_subdiv(scene, ob, required_mode); + MeshBufferList *mbuflist = &cache->final.buff; /* Initialize batches and request VBO's & IBO's. */ @@ -2038,6 +2059,15 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, true); } + if (do_subdivision) { + DRW_create_subdivision(scene, ob, me, cache, &cache->final, ts); + } + else { + /* The subsurf modifier may have been recently removed, or another modifier was added after it, + * so free any potential subdivision cache as it is not needed anymore. */ + mesh_batch_cache_free_subdiv_cache(cache); + } + mesh_buffer_cache_create_requested(task_graph, cache, &cache->final, diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc new file mode 100644 index 00000000000..5533130212e --- /dev/null +++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc @@ -0,0 +1,1932 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#include "draw_subdivision.h" + +#include "DNA_mesh_types.h" +#include "DNA_object_types.h" +#include "DNA_scene_types.h" + +#include "BKE_editmesh.h" +#include "BKE_modifier.h" +#include "BKE_object.h" +#include "BKE_scene.h" +#include "BKE_subdiv.h" +#include "BKE_subdiv_eval.h" +#include "BKE_subdiv_foreach.h" +#include "BKE_subdiv_mesh.h" +#include "BKE_subdiv_modifier.h" + +#include "BLI_linklist.h" + +#include "BLI_string.h" + +#include "PIL_time.h" + +#include "DRW_engine.h" +#include "DRW_render.h" + +#include "GPU_capabilities.h" +#include "GPU_compute.h" +#include "GPU_index_buffer.h" +#include "GPU_state.h" +#include "GPU_vertex_buffer.h" + +#include "opensubdiv_capi.h" +#include "opensubdiv_capi_type.h" +#include "opensubdiv_converter_capi.h" +#include "opensubdiv_evaluator_capi.h" +#include "opensubdiv_topology_refiner_capi.h" + +#include "draw_cache_extract.h" +#include "draw_cache_impl.h" +#include "draw_cache_inline.h" +#include "mesh_extractors/extract_mesh.h" + +extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[]; +extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[]; +extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[]; +extern "C" char datatoc_common_subdiv_lib_glsl[]; +extern "C" char datatoc_common_subdiv_normals_accumulate_comp_glsl[]; +extern "C" char datatoc_common_subdiv_normals_finalize_comp_glsl[]; +extern "C" char datatoc_common_subdiv_patch_evaluation_comp_glsl[]; +extern "C" char datatoc_common_subdiv_vbo_edge_fac_comp_glsl[]; +extern "C" char datatoc_common_subdiv_vbo_lnor_comp_glsl[]; +extern "C" char datatoc_common_subdiv_vbo_sculpt_data_comp_glsl[]; +extern "C" char datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl[]; +extern "C" char datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl[]; + +enum { + SHADER_BUFFER_LINES, + SHADER_BUFFER_LINES_LOOSE, + SHADER_BUFFER_EDGE_FAC, + SHADER_BUFFER_LNOR, + SHADER_BUFFER_TRIS, + SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS, + SHADER_BUFFER_NORMALS_ACCUMULATE, + SHADER_BUFFER_NORMALS_FINALIZE, + SHADER_PATCH_EVALUATION, + SHADER_PATCH_EVALUATION_LIMIT_NORMALS, + SHADER_PATCH_EVALUATION_FVAR, + SHADER_PATCH_EVALUATION_FACE_DOTS, + SHADER_COMP_CUSTOM_DATA_INTERP_1D, + SHADER_COMP_CUSTOM_DATA_INTERP_2D, + SHADER_COMP_CUSTOM_DATA_INTERP_3D, + SHADER_COMP_CUSTOM_DATA_INTERP_4D, + SHADER_BUFFER_SCULPT_DATA, + SHADER_BUFFER_UV_STRETCH_ANGLE, + SHADER_BUFFER_UV_STRETCH_AREA, + + NUM_SHADERS, +}; + +static GPUShader *g_subdiv_shaders[NUM_SHADERS]; + +static const char *get_shader_code(int shader_type) +{ + switch (shader_type) { + case SHADER_BUFFER_LINES: + case SHADER_BUFFER_LINES_LOOSE: { + return datatoc_common_subdiv_ibo_lines_comp_glsl; + } + case SHADER_BUFFER_EDGE_FAC: { + return datatoc_common_subdiv_vbo_edge_fac_comp_glsl; + } + case SHADER_BUFFER_LNOR: { + return datatoc_common_subdiv_vbo_lnor_comp_glsl; + } + case SHADER_BUFFER_TRIS: + case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: { + return datatoc_common_subdiv_ibo_tris_comp_glsl; + } + case SHADER_BUFFER_NORMALS_ACCUMULATE: { + return datatoc_common_subdiv_normals_accumulate_comp_glsl; + } + case SHADER_BUFFER_NORMALS_FINALIZE: { + return datatoc_common_subdiv_normals_finalize_comp_glsl; + } + case SHADER_PATCH_EVALUATION: + case SHADER_PATCH_EVALUATION_LIMIT_NORMALS: + case SHADER_PATCH_EVALUATION_FVAR: + case SHADER_PATCH_EVALUATION_FACE_DOTS: { + return datatoc_common_subdiv_patch_evaluation_comp_glsl; + } + case SHADER_COMP_CUSTOM_DATA_INTERP_1D: + case SHADER_COMP_CUSTOM_DATA_INTERP_2D: + case SHADER_COMP_CUSTOM_DATA_INTERP_3D: + case SHADER_COMP_CUSTOM_DATA_INTERP_4D: { + return datatoc_common_subdiv_custom_data_interp_comp_glsl; + } + case SHADER_BUFFER_SCULPT_DATA: { + return datatoc_common_subdiv_vbo_sculpt_data_comp_glsl; + } + case SHADER_BUFFER_UV_STRETCH_ANGLE: { + return datatoc_common_subdiv_vbo_edituv_strech_angle_comp_glsl; + } + case SHADER_BUFFER_UV_STRETCH_AREA: { + return datatoc_common_subdiv_vbo_edituv_strech_area_comp_glsl; + } + } + return nullptr; +} + +static const char *get_shader_name(int shader_type) +{ + switch (shader_type) { + case SHADER_BUFFER_LINES: { + return "subdiv lines build"; + } + case SHADER_BUFFER_LINES_LOOSE: { + return "subdiv lines loose build"; + } + case SHADER_BUFFER_LNOR: { + return "subdiv lnor build"; + } + case SHADER_BUFFER_EDGE_FAC: { + return "subdiv edge fac build"; + } + case SHADER_BUFFER_TRIS: + case SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS: { + return "subdiv tris"; + } + case SHADER_BUFFER_NORMALS_ACCUMULATE: { + return "subdiv normals accumulate"; + } + case SHADER_BUFFER_NORMALS_FINALIZE: { + return "subdiv normals finalize"; + } + case SHADER_PATCH_EVALUATION: { + return "subdiv patch evaluation"; + } + case SHADER_PATCH_EVALUATION_LIMIT_NORMALS: { + return "subdiv patch evaluation limit normals"; + } + case SHADER_PATCH_EVALUATION_FVAR: { + return "subdiv patch evaluation face-varying"; + } + case SHADER_PATCH_EVALUATION_FACE_DOTS: { + return "subdiv patch evaluation face dots"; + } + case SHADER_COMP_CUSTOM_DATA_INTERP_1D: { + return "subdiv custom data interp 1D"; + } + case SHADER_COMP_CUSTOM_DATA_INTERP_2D: { + return "subdiv custom data interp 2D"; + } + case SHADER_COMP_CUSTOM_DATA_INTERP_3D: { + return "subdiv custom data interp 3D"; + } + case SHADER_COMP_CUSTOM_DATA_INTERP_4D: { + return "subdiv custom data interp 4D"; + } + case SHADER_BUFFER_SCULPT_DATA: { + return "subdiv sculpt data"; + } + case SHADER_BUFFER_UV_STRETCH_ANGLE: { + return "subdiv uv stretch angle"; + } + case SHADER_BUFFER_UV_STRETCH_AREA: { + return "subdiv uv stretch area"; + } + } + return nullptr; +} + +static GPUShader *get_patch_evaluation_shader(int shader_type) +{ + if (g_subdiv_shaders[shader_type] == nullptr) { + const char *compute_code = get_shader_code(shader_type); + + const char *defines = nullptr; + if (shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS) { + defines = + "#define OSD_PATCH_BASIS_GLSL\n" + "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n" + "#define LIMIT_NORMALS\n"; + } + else if (shader_type == SHADER_PATCH_EVALUATION_FVAR) { + defines = + "#define OSD_PATCH_BASIS_GLSL\n" + "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n" + "#define FVAR_EVALUATION\n"; + } + else if (shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) { + defines = + "#define OSD_PATCH_BASIS_GLSL\n" + "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n" + "#define FDOTS_EVALUATION\n"; + } + else { + defines = + "#define OSD_PATCH_BASIS_GLSL\n" + "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"; + } + + /* Merge OpenSubdiv library code with our own library code. */ + const char *patch_basis_source = openSubdiv_getGLSLPatchBasisSource(); + const char *subdiv_lib_code = datatoc_common_subdiv_lib_glsl; + char *library_code = static_cast<char *>( + MEM_mallocN(strlen(patch_basis_source) + strlen(subdiv_lib_code) + 1, + "subdiv patch evaluation library code")); + library_code[0] = '\0'; + strcat(library_code, patch_basis_source); + strcat(library_code, subdiv_lib_code); + + g_subdiv_shaders[shader_type] = GPU_shader_create_compute( + compute_code, library_code, defines, get_shader_name(shader_type)); + + MEM_freeN(library_code); + } + + return g_subdiv_shaders[shader_type]; +} + +static GPUShader *get_subdiv_shader(int shader_type, const char *defines) +{ + if (shader_type == SHADER_PATCH_EVALUATION || + shader_type == SHADER_PATCH_EVALUATION_LIMIT_NORMALS || + shader_type == SHADER_PATCH_EVALUATION_FVAR || + shader_type == SHADER_PATCH_EVALUATION_FACE_DOTS) { + return get_patch_evaluation_shader(shader_type); + } + if (g_subdiv_shaders[shader_type] == nullptr) { + const char *compute_code = get_shader_code(shader_type); + g_subdiv_shaders[shader_type] = GPU_shader_create_compute( + compute_code, datatoc_common_subdiv_lib_glsl, defines, get_shader_name(shader_type)); + } + return g_subdiv_shaders[shader_type]; +} + +/* -------------------------------------------------------------------- */ +/** Vertex formats used for data transfer from OpenSubdiv, and for data processing on our side. + * \{ */ + +static GPUVertFormat *get_uvs_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "uvs", GPU_COMP_F32, 2, GPU_FETCH_FLOAT); + } + return &format; +} + +/* Vertex format for `OpenSubdiv::Osd::PatchArray`. */ +static GPUVertFormat *get_patch_array_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "regDesc", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "desc", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "numPatches", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "indexBase", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "stride", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "primitiveIdBase", GPU_COMP_I32, 1, GPU_FETCH_INT); + } + return &format; +} + +/* Vertex format used for the `PatchTable::PatchHandle`. */ +static GPUVertFormat *get_patch_handle_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "vertex_index", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "array_index", GPU_COMP_I32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "patch_index", GPU_COMP_I32, 1, GPU_FETCH_INT); + } + return &format; +} + +/* Vertex format used for the quad-tree nodes of the PatchMap. */ +static GPUVertFormat *get_quadtree_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "child", GPU_COMP_U32, 4, GPU_FETCH_INT); + } + return &format; +} + +/* Vertex format for `OpenSubdiv::Osd::PatchParam`, not really used, it is only for making sure + * that the #GPUVertBuf used to wrap the OpenSubdiv patch param buffer is valid. */ +static GPUVertFormat *get_patch_param_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "data", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); + } + return &format; +} + +/* Vertex format for the patches' vertices index buffer. */ +static GPUVertFormat *get_patch_index_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "data", GPU_COMP_I32, 1, GPU_FETCH_INT); + } + return &format; +} + +/* Vertex format for the OpenSubdiv vertex buffer. */ +static GPUVertFormat *get_subdiv_vertex_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + /* We use 4 components for the vectors to account for padding in the compute shaders, where + * vec3 is promoted to vec4. */ + GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + } + return &format; +} + +typedef struct CompressedPatchCoord { + int ptex_face_index; + /* UV coordinate encoded as u << 16 | v, where u and v are quantized on 16-bits. */ + unsigned int encoded_uv; +} CompressedPatchCoord; + +MINLINE CompressedPatchCoord make_patch_coord(int ptex_face_index, float u, float v) +{ + CompressedPatchCoord patch_coord = { + ptex_face_index, + (static_cast<unsigned int>(u * 65535.0f) << 16) | static_cast<unsigned int>(v * 65535.0f), + }; + return patch_coord; +} + +/* Vertex format used for the #CompressedPatchCoord. */ +static GPUVertFormat *get_blender_patch_coords_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + /* WARNING! Adjust #CompressedPatchCoord accordingly. */ + GPU_vertformat_attr_add(&format, "ptex_face_index", GPU_COMP_U32, 1, GPU_FETCH_INT); + GPU_vertformat_attr_add(&format, "uv", GPU_COMP_U32, 1, GPU_FETCH_INT); + } + return &format; +} + +static GPUVertFormat *get_origindex_format(void) +{ + static GPUVertFormat format; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "color", GPU_COMP_U32, 1, GPU_FETCH_INT); + } + return &format; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Utilities to initialize a OpenSubdiv_Buffer for a GPUVertBuf. + * \{ */ + +static void vertbuf_bind_gpu(const OpenSubdiv_Buffer *buffer) +{ + GPUVertBuf *verts = (GPUVertBuf *)(buffer->data); + GPU_vertbuf_use(verts); +} + +static void *vertbuf_alloc(const OpenSubdiv_Buffer *interface, const uint len) +{ + GPUVertBuf *verts = (GPUVertBuf *)(interface->data); + GPU_vertbuf_data_alloc(verts, len); + return GPU_vertbuf_get_data(verts); +} + +static void vertbuf_device_alloc(const OpenSubdiv_Buffer *interface, const uint len) +{ + GPUVertBuf *verts = (GPUVertBuf *)(interface->data); + /* This assumes that GPU_USAGE_DEVICE_ONLY was used, which won't allocate host memory. */ + // BLI_assert(GPU_vertbuf_get_usage(verts) == GPU_USAGE_DEVICE_ONLY); + GPU_vertbuf_data_alloc(verts, len); +} + +static void vertbuf_wrap_device_handle(const OpenSubdiv_Buffer *interface, uint64_t handle) +{ + GPUVertBuf *verts = (GPUVertBuf *)(interface->data); + GPU_vertbuf_wrap_handle(verts, handle); +} + +static void vertbuf_update_data(const OpenSubdiv_Buffer *interface, + uint start, + uint len, + const void *data) +{ + GPUVertBuf *verts = (GPUVertBuf *)(interface->data); + GPU_vertbuf_update_sub(verts, start, len, data); +} + +static void opensubdiv_gpu_buffer_init(OpenSubdiv_Buffer *buffer_interface, GPUVertBuf *vertbuf) +{ + buffer_interface->data = vertbuf; + buffer_interface->bind_gpu = vertbuf_bind_gpu; + buffer_interface->buffer_offset = 0; + buffer_interface->wrap_device_handle = vertbuf_wrap_device_handle; + buffer_interface->alloc = vertbuf_alloc; + buffer_interface->device_alloc = vertbuf_device_alloc; + buffer_interface->device_update = vertbuf_update_data; +} + +static GPUVertBuf *create_buffer_and_interface(OpenSubdiv_Buffer *interface, GPUVertFormat *format) +{ + GPUVertBuf *buffer = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format_ex(buffer, format, GPU_USAGE_DEVICE_ONLY); + opensubdiv_gpu_buffer_init(interface, buffer); + return buffer; +} + +/** \} */ + +// -------------------------------------------------------- + +static uint tris_count_from_number_of_loops(const uint number_of_loops) +{ + const uint32_t number_of_quads = number_of_loops / 4; + return number_of_quads * 2; +} + +/* -------------------------------------------------------------------- */ +/** \name Utilities to build a GPUVertBuf from an origindex buffer. + * \{ */ + +void draw_subdiv_init_origindex_buffer(GPUVertBuf *buffer, + int *vert_origindex, + uint num_loops, + uint loose_len) +{ + GPU_vertbuf_init_with_format_ex(buffer, get_origindex_format(), GPU_USAGE_STATIC); + GPU_vertbuf_data_alloc(buffer, num_loops + loose_len); + + int *vbo_data = (int *)GPU_vertbuf_get_data(buffer); + memcpy(vbo_data, vert_origindex, num_loops * sizeof(int)); +} + +GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops) +{ + GPUVertBuf *buffer = GPU_vertbuf_calloc(); + draw_subdiv_init_origindex_buffer(buffer, vert_origindex, num_loops, 0); + return buffer; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Utilities for DRWPatchMap. + * \{ */ + +static void draw_patch_map_build(DRWPatchMap *gpu_patch_map, Subdiv *subdiv) +{ + GPUVertBuf *patch_map_handles = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format_ex(patch_map_handles, get_patch_handle_format(), GPU_USAGE_STATIC); + + GPUVertBuf *patch_map_quadtree = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format_ex(patch_map_quadtree, get_quadtree_format(), GPU_USAGE_STATIC); + + OpenSubdiv_Buffer patch_map_handles_interface; + opensubdiv_gpu_buffer_init(&patch_map_handles_interface, patch_map_handles); + + OpenSubdiv_Buffer patch_map_quad_tree_interface; + opensubdiv_gpu_buffer_init(&patch_map_quad_tree_interface, patch_map_quadtree); + + int min_patch_face = 0; + int max_patch_face = 0; + int max_depth = 0; + int patches_are_triangular = 0; + + OpenSubdiv_Evaluator *evaluator = subdiv->evaluator; + evaluator->getPatchMap(evaluator, + &patch_map_handles_interface, + &patch_map_quad_tree_interface, + &min_patch_face, + &max_patch_face, + &max_depth, + &patches_are_triangular); + + gpu_patch_map->patch_map_handles = patch_map_handles; + gpu_patch_map->patch_map_quadtree = patch_map_quadtree; + gpu_patch_map->min_patch_face = min_patch_face; + gpu_patch_map->max_patch_face = max_patch_face; + gpu_patch_map->max_depth = max_depth; + gpu_patch_map->patches_are_triangular = patches_are_triangular; +} + +static void draw_patch_map_free(DRWPatchMap *gpu_patch_map) +{ + GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_handles); + GPU_VERTBUF_DISCARD_SAFE(gpu_patch_map->patch_map_quadtree); + gpu_patch_map->min_patch_face = 0; + gpu_patch_map->max_patch_face = 0; + gpu_patch_map->max_depth = 0; + gpu_patch_map->patches_are_triangular = 0; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name DRWSubdivCache + * \{ */ + +static void draw_subdiv_cache_free_material_data(DRWSubdivCache *cache) +{ + GPU_VERTBUF_DISCARD_SAFE(cache->polygon_mat_offset); + MEM_SAFE_FREE(cache->mat_start); + MEM_SAFE_FREE(cache->mat_end); +} + +static void draw_subdiv_free_edit_mode_cache(DRWSubdivCache *cache) +{ + GPU_VERTBUF_DISCARD_SAFE(cache->verts_orig_index); + GPU_VERTBUF_DISCARD_SAFE(cache->edges_orig_index); + GPU_VERTBUF_DISCARD_SAFE(cache->fdots_patch_coords); +} + +void draw_subdiv_cache_free(DRWSubdivCache *cache) +{ + GPU_VERTBUF_DISCARD_SAFE(cache->patch_coords); + GPU_VERTBUF_DISCARD_SAFE(cache->face_ptex_offset_buffer); + GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_polygon_offset_buffer); + GPU_VERTBUF_DISCARD_SAFE(cache->extra_coarse_face_data); + MEM_SAFE_FREE(cache->subdiv_loop_subdiv_vert_index); + MEM_SAFE_FREE(cache->subdiv_loop_poly_index); + MEM_SAFE_FREE(cache->point_indices); + MEM_SAFE_FREE(cache->subdiv_polygon_offset); + GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency_offsets); + GPU_VERTBUF_DISCARD_SAFE(cache->subdiv_vertex_face_adjacency); + cache->resolution = 0; + cache->num_subdiv_loops = 0; + cache->num_coarse_poly = 0; + cache->num_subdiv_quads = 0; + draw_subdiv_free_edit_mode_cache(cache); + draw_subdiv_cache_free_material_data(cache); + draw_patch_map_free(&cache->gpu_patch_map); + if (cache->ubo) { + GPU_uniformbuf_free(cache->ubo); + cache->ubo = nullptr; + } +} + +/* Flags used in #DRWSubdivCache.extra_coarse_face_data. The flags are packed in the upper bits of + * each uint (one per coarse face), #SUBDIV_COARSE_FACE_FLAG_OFFSET tells where they are in the + * packed bits. */ +#define SUBDIV_COARSE_FACE_FLAG_SMOOTH 1u +#define SUBDIV_COARSE_FACE_FLAG_SELECT 2u +#define SUBDIV_COARSE_FACE_FLAG_ACTIVE 4u + +#define SUBDIV_COARSE_FACE_FLAG_OFFSET 29u + +#define SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK \ + (SUBDIV_COARSE_FACE_FLAG_SMOOTH << SUBDIV_COARSE_FACE_FLAG_OFFSET) +#define SUBDIV_COARSE_FACE_FLAG_SELECT_MASK \ + (SUBDIV_COARSE_FACE_FLAG_SELECT << SUBDIV_COARSE_FACE_FLAG_OFFSET) +#define SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK \ + (SUBDIV_COARSE_FACE_FLAG_ACTIVE << SUBDIV_COARSE_FACE_FLAG_OFFSET) + +#define SUBDIV_COARSE_FACE_LOOP_START_MASK \ + ~((SUBDIV_COARSE_FACE_FLAG_SMOOTH | SUBDIV_COARSE_FACE_FLAG_SELECT | \ + SUBDIV_COARSE_FACE_FLAG_ACTIVE) \ + << SUBDIV_COARSE_FACE_FLAG_OFFSET) + +static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cache, Mesh *mesh) +{ + if (cache->extra_coarse_face_data == nullptr) { + cache->extra_coarse_face_data = GPU_vertbuf_calloc(); + static GPUVertFormat format; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "data", GPU_COMP_U32, 1, GPU_FETCH_INT); + } + GPU_vertbuf_init_with_format_ex(cache->extra_coarse_face_data, &format, GPU_USAGE_DYNAMIC); + GPU_vertbuf_data_alloc(cache->extra_coarse_face_data, mesh->totpoly); + } + + uint32_t *flags_data = (uint32_t *)(GPU_vertbuf_get_data(cache->extra_coarse_face_data)); + + if (cache->bm) { + BMesh *bm = cache->bm; + BMFace *f; + BMIter iter; + + /* Ensure all current elements follow new customdata layout. */ + BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) { + const int index = BM_elem_index_get(f); + uint32_t flag = 0; + if (BM_elem_flag_test(f, BM_ELEM_SMOOTH)) { + flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH; + } + if (BM_elem_flag_test(f, BM_ELEM_SELECT)) { + flag |= SUBDIV_COARSE_FACE_FLAG_SELECT; + } + if (f == bm->act_face) { + flag |= SUBDIV_COARSE_FACE_FLAG_ACTIVE; + } + const int loopstart = BM_elem_index_get(f->l_first); + flags_data[index] = (uint)(loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET); + } + } + else { + for (int i = 0; i < mesh->totpoly; i++) { + uint32_t flag = 0; + if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) { + flag = SUBDIV_COARSE_FACE_FLAG_SMOOTH; + } + flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET); + } + } + + /* Make sure updated data is re-uploaded. */ + GPU_vertbuf_tag_dirty(cache->extra_coarse_face_data); +} + +static DRWSubdivCache *mesh_batch_cache_ensure_subdiv_cache(MeshBatchCache *mbc) +{ + DRWSubdivCache *subdiv_cache = mbc->subdiv_cache; + if (subdiv_cache == nullptr) { + subdiv_cache = static_cast<DRWSubdivCache *>( + MEM_callocN(sizeof(DRWSubdivCache), "DRWSubdivCache")); + } + mbc->subdiv_cache = subdiv_cache; + return subdiv_cache; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Subdivision grid traversal. + * + * Traverse the uniform subdivision grid over coarse faces and gather useful information for + * building the draw buffers on the GPU. We primarily gather the patch coordinates for all + * subdivision faces, as well as the original coarse indices for each subdivision element (vertex, + * face, or edge) which directly maps to its coarse counterpart (note that all subdivision faces + * map to a coarse face). This information will then be cached in #DRWSubdivCache for subsequent + * reevaluations, as long as the topology does not change. + * \{ */ + +typedef struct DRWCacheBuildingContext { + const Mesh *coarse_mesh; + const SubdivToMeshSettings *settings; + + DRWSubdivCache *cache; + + /* Pointers into DRWSubdivCache buffers for easier access during traversal. */ + CompressedPatchCoord *patch_coords; + int *subdiv_loop_vert_index; + int *subdiv_loop_subdiv_vert_index; + int *subdiv_loop_edge_index; + int *subdiv_loop_poly_index; + int *point_indices; + + /* Temporary buffers used during traversal. */ + int *vert_origindex_map; + int *edge_origindex_map; + + /* Origindex layers from the mesh to directly look up during traversal the origindex from the + * base mesh for edit data so that we do not have to handle yet another GPU buffer and do this in + * the shaders. */ + int *v_origindex; + int *e_origindex; +} DRWCacheBuildingContext; + +static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context, + const int num_vertices, + const int num_edges, + const int num_loops, + const int num_polygons, + const int *subdiv_polygon_offset) +{ + if (num_loops == 0) { + return false; + } + + DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data); + DRWSubdivCache *cache = ctx->cache; + + /* Set topology information. */ + cache->num_subdiv_edges = (uint)num_edges; + cache->num_subdiv_loops = (uint)num_loops; + cache->num_subdiv_verts = (uint)num_vertices; + cache->num_subdiv_quads = (uint)num_polygons; + cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset)); + + /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after + * it was sent to the device, since we may use the data while building other buffers on the CPU + * side. */ + cache->patch_coords = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format_ex( + cache->patch_coords, get_blender_patch_coords_format(), GPU_USAGE_DYNAMIC); + GPU_vertbuf_data_alloc(cache->patch_coords, cache->num_subdiv_loops); + + cache->verts_orig_index = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format_ex( + cache->verts_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC); + GPU_vertbuf_data_alloc(cache->verts_orig_index, cache->num_subdiv_loops); + + cache->edges_orig_index = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format_ex( + cache->edges_orig_index, get_origindex_format(), GPU_USAGE_DYNAMIC); + GPU_vertbuf_data_alloc(cache->edges_orig_index, cache->num_subdiv_loops); + + cache->subdiv_loop_subdiv_vert_index = static_cast<int *>( + MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_subdiv_vert_index")); + + cache->subdiv_loop_poly_index = static_cast<int *>( + MEM_mallocN(cache->num_subdiv_loops * sizeof(int), "subdiv_loop_poly_index")); + + cache->point_indices = static_cast<int *>( + MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "point_indices")); + for (int i = 0; i < num_vertices; i++) { + cache->point_indices[i] = -1; + } + + /* Initialize context pointers and temporary buffers. */ + ctx->patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data(cache->patch_coords); + ctx->subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(cache->verts_orig_index); + ctx->subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(cache->edges_orig_index); + ctx->subdiv_loop_subdiv_vert_index = cache->subdiv_loop_subdiv_vert_index; + ctx->subdiv_loop_poly_index = cache->subdiv_loop_poly_index; + ctx->point_indices = cache->point_indices; + + ctx->v_origindex = static_cast<int *>( + CustomData_get_layer(&ctx->coarse_mesh->vdata, CD_ORIGINDEX)); + + ctx->e_origindex = static_cast<int *>( + CustomData_get_layer(&ctx->coarse_mesh->edata, CD_ORIGINDEX)); + + ctx->vert_origindex_map = static_cast<int *>( + MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map")); + for (int i = 0; i < num_vertices; i++) { + ctx->vert_origindex_map[i] = -1; + } + + ctx->edge_origindex_map = static_cast<int *>( + MEM_mallocN(cache->num_subdiv_edges * sizeof(int), "subdiv_edge_origindex_map")); + for (int i = 0; i < num_edges; i++) { + ctx->edge_origindex_map[i] = -1; + } + + return true; +} + +static void draw_subdiv_vertex_corner_cb(const SubdivForeachContext *foreach_context, + void *UNUSED(tls), + const int UNUSED(ptex_face_index), + const float UNUSED(u), + const float UNUSED(v), + const int coarse_vertex_index, + const int UNUSED(coarse_poly_index), + const int UNUSED(coarse_corner), + const int subdiv_vertex_index) +{ + BLI_assert(coarse_vertex_index != ORIGINDEX_NONE); + DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data); + ctx->vert_origindex_map[subdiv_vertex_index] = coarse_vertex_index; +} + +static void draw_subdiv_vertex_edge_cb(const SubdivForeachContext *UNUSED(foreach_context), + void *UNUSED(tls_v), + const int UNUSED(ptex_face_index), + const float UNUSED(u), + const float UNUSED(v), + const int UNUSED(coarse_edge_index), + const int UNUSED(coarse_poly_index), + const int UNUSED(coarse_corner), + const int UNUSED(subdiv_vertex_index)) +{ + /* Required if SubdivForeachContext.vertex_corner is also set. */ +} + +static void draw_subdiv_edge_cb(const SubdivForeachContext *foreach_context, + void *UNUSED(tls), + const int coarse_edge_index, + const int subdiv_edge_index, + const int UNUSED(subdiv_v1), + const int UNUSED(subdiv_v2)) +{ + DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data); + + int coarse_index = coarse_edge_index; + + if (coarse_index != -1) { + if (ctx->e_origindex) { + coarse_index = ctx->e_origindex[coarse_index]; + } + } + + ctx->edge_origindex_map[subdiv_edge_index] = coarse_index; +} + +static void draw_subdiv_loop_cb(const SubdivForeachContext *foreach_context, + void *UNUSED(tls_v), + const int ptex_face_index, + const float u, + const float v, + const int UNUSED(coarse_loop_index), + const int coarse_poly_index, + const int UNUSED(coarse_corner), + const int subdiv_loop_index, + const int subdiv_vertex_index, + const int subdiv_edge_index) +{ + DRWCacheBuildingContext *ctx = (DRWCacheBuildingContext *)(foreach_context->user_data); + ctx->patch_coords[subdiv_loop_index] = make_patch_coord(ptex_face_index, u, v); + + int coarse_vertex_index = ctx->vert_origindex_map[subdiv_vertex_index]; + + if (coarse_vertex_index != -1) { + if (ctx->v_origindex) { + coarse_vertex_index = ctx->v_origindex[coarse_vertex_index]; + } + + /* Double check as vorigindex may have modified the index. */ + if (coarse_vertex_index != -1) { + ctx->point_indices[coarse_vertex_index] = subdiv_loop_index; + } + } + + ctx->subdiv_loop_subdiv_vert_index[subdiv_loop_index] = subdiv_vertex_index; + /* For now index the subdiv_edge_index, it will be replaced by the actual coarse edge index + * at the end of the traversal as some edges are only then traversed. */ + ctx->subdiv_loop_edge_index[subdiv_loop_index] = subdiv_edge_index; + ctx->subdiv_loop_poly_index[subdiv_loop_index] = coarse_poly_index; + ctx->subdiv_loop_vert_index[subdiv_loop_index] = coarse_vertex_index; +} + +static void draw_subdiv_foreach_callbacks(SubdivForeachContext *foreach_context) +{ + memset(foreach_context, 0, sizeof(*foreach_context)); + foreach_context->topology_info = draw_subdiv_topology_info_cb; + foreach_context->loop = draw_subdiv_loop_cb; + foreach_context->edge = draw_subdiv_edge_cb; + foreach_context->vertex_corner = draw_subdiv_vertex_corner_cb; + foreach_context->vertex_edge = draw_subdiv_vertex_edge_cb; +} + +static void do_subdiv_traversal(DRWCacheBuildingContext *cache_building_context, Subdiv *subdiv) +{ + SubdivForeachContext foreach_context; + draw_subdiv_foreach_callbacks(&foreach_context); + foreach_context.user_data = cache_building_context; + + BKE_subdiv_foreach_subdiv_geometry(subdiv, + &foreach_context, + cache_building_context->settings, + cache_building_context->coarse_mesh); + + /* Now that traversal is done, we can set up the right original indices for the loop-to-edge map. + */ + for (int i = 0; i < cache_building_context->cache->num_subdiv_loops; i++) { + cache_building_context->subdiv_loop_edge_index[i] = + cache_building_context + ->edge_origindex_map[cache_building_context->subdiv_loop_edge_index[i]]; + } +} + +static GPUVertBuf *gpu_vertbuf_create_from_format(GPUVertFormat *format, uint len) +{ + GPUVertBuf *verts = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format(verts, format); + GPU_vertbuf_data_alloc(verts, len); + return verts; +} + +/* Build maps to hold enough information to tell which face is adjacent to which vertex; those will + * be used for computing normals if limit surfaces are unavailable. */ +static void build_vertex_face_adjacency_maps(DRWSubdivCache *cache) +{ + /* +1 so that we do not require a special case for the last vertex, this extra offset will + * contain the total number of adjacent faces. */ + cache->subdiv_vertex_face_adjacency_offsets = gpu_vertbuf_create_from_format( + get_origindex_format(), cache->num_subdiv_verts + 1); + + int *vertex_offsets = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency_offsets); + memset(vertex_offsets, 0, sizeof(int) * cache->num_subdiv_verts + 1); + + for (int i = 0; i < cache->num_subdiv_loops; i++) { + vertex_offsets[cache->subdiv_loop_subdiv_vert_index[i]]++; + } + + int ofs = vertex_offsets[0]; + vertex_offsets[0] = 0; + for (uint i = 1; i < cache->num_subdiv_verts + 1; i++) { + int tmp = vertex_offsets[i]; + vertex_offsets[i] = ofs; + ofs += tmp; + } + + cache->subdiv_vertex_face_adjacency = gpu_vertbuf_create_from_format(get_origindex_format(), + cache->num_subdiv_loops); + int *adjacent_faces = (int *)GPU_vertbuf_get_data(cache->subdiv_vertex_face_adjacency); + int *tmp_set_faces = static_cast<int *>( + MEM_callocN(sizeof(int) * cache->num_subdiv_verts, "tmp subdiv vertex offset")); + + for (int i = 0; i < cache->num_subdiv_loops / 4; i++) { + for (int j = 0; j < 4; j++) { + const int subdiv_vertex = cache->subdiv_loop_subdiv_vert_index[i * 4 + j]; + int first_face_offset = vertex_offsets[subdiv_vertex] + tmp_set_faces[subdiv_vertex]; + adjacent_faces[first_face_offset] = i; + tmp_set_faces[subdiv_vertex] += 1; + } + } + + MEM_freeN(tmp_set_faces); +} + +static bool draw_subdiv_build_cache(DRWSubdivCache *cache, + Subdiv *subdiv, + Mesh *mesh_eval, + const Scene *scene, + const SubsurfModifierData *smd, + const bool is_final_render) +{ + const int level = get_render_subsurf_level(&scene->r, smd->levels, is_final_render); + SubdivToMeshSettings to_mesh_settings; + to_mesh_settings.resolution = (1 << level) + 1; + to_mesh_settings.use_optimal_display = false; + + if (cache->resolution != to_mesh_settings.resolution) { + /* Resolution changed, we need to rebuild, free any existing cached data. */ + draw_subdiv_cache_free(cache); + } + + /* If the resolution between the cache and the settings match for some reason, check if the patch + * coordinates were not already generated. Those coordinates are specific to the resolution, so + * they should be null either after initialization, or after freeing if the resolution (or some + * other subdivision setting) changed. + */ + if (cache->patch_coords != nullptr) { + return true; + } + + DRWCacheBuildingContext cache_building_context; + cache_building_context.coarse_mesh = mesh_eval; + cache_building_context.settings = &to_mesh_settings; + cache_building_context.cache = cache; + + do_subdiv_traversal(&cache_building_context, subdiv); + if (cache->num_subdiv_loops == 0) { + /* Either the traversal failed, or we have an empty mesh, either way we cannot go any further. + * The subdiv_polygon_offset cannot then be reliably stored in the cache, so free it directly. + */ + MEM_SAFE_FREE(cache->subdiv_polygon_offset); + return false; + } + + /* Build buffers for the PatchMap. */ + draw_patch_map_build(&cache->gpu_patch_map, subdiv); + + cache->face_ptex_offset = BKE_subdiv_face_ptex_offset_get(subdiv); + + // Build patch coordinates for all the face dots + cache->fdots_patch_coords = gpu_vertbuf_create_from_format(get_blender_patch_coords_format(), + mesh_eval->totpoly); + CompressedPatchCoord *blender_fdots_patch_coords = (CompressedPatchCoord *)GPU_vertbuf_get_data( + cache->fdots_patch_coords); + for (int i = 0; i < mesh_eval->totpoly; i++) { + const int ptex_face_index = cache->face_ptex_offset[i]; + if (mesh_eval->mpoly[i].totloop == 4) { + /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */ + blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f); + } + else { + /* For N-gons, since they are split into quads from the center, and since the center is + * chosen to be the top right corner of each quad, the center coordinate of the coarse face + * is any one of those top right corners with `u = v = 1.0`. */ + blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 1.0f, 1.0f); + } + } + + cache->resolution = to_mesh_settings.resolution; + + cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer( + cache->subdiv_polygon_offset, mesh_eval->totpoly); + + cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset, + mesh_eval->totpoly + 1); + cache->num_coarse_poly = mesh_eval->totpoly; + cache->point_indices = cache_building_context.point_indices; + + build_vertex_face_adjacency_maps(cache); + + /* Cleanup. */ + MEM_freeN(cache_building_context.vert_origindex_map); + MEM_freeN(cache_building_context.edge_origindex_map); + + return true; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name DRWSubdivUboStorage. + * + * Common uniforms for the various shaders. + * \{ */ + +typedef struct DRWSubdivUboStorage { + /* Offsets in the buffers data where the source and destination data start. */ + int src_offset; + int dst_offset; + + /* Parameters for the DRWPatchMap. */ + int min_patch_face; + int max_patch_face; + int max_depth; + int patches_are_triangular; + + /* Coarse topology information. */ + int coarse_poly_count; + uint edge_loose_offset; + + /* Refined topology information. */ + uint num_subdiv_loops; + + /* Subdivision settings, is int in C but bool in the GLSL code, as there, bools have the same + * size as ints, so we should use int in C to ensure that the size of the structure is what GLSL + * expects. */ + int optimal_display; + + /* The sculpt mask data layer may be null. */ + int has_sculpt_mask; + + /* Masks for the extra coarse face data. */ + uint coarse_face_select_mask; + uint coarse_face_smooth_mask; + uint coarse_face_active_mask; + uint coarse_face_loopstart_mask; + + /* Number of elements to process in the compute shader (can be the coarse quad count, or the + * final vertex count, depending on which compute pass we do). This is used to early out in case + * of out of bond accesses as compute dispatch are of fixed size. */ + uint total_dispatch_size; +} DRWSubdivUboStorage; + +static_assert((sizeof(DRWSubdivUboStorage) % 16) == 0, + "DRWSubdivUboStorage is not padded to a multiple of the size of vec4"); + +static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache, + DRWSubdivUboStorage *ubo, + const int src_offset, + const int dst_offset, + const uint total_dispatch_size, + const bool has_sculpt_mask) +{ + ubo->src_offset = src_offset; + ubo->dst_offset = dst_offset; + ubo->min_patch_face = cache->gpu_patch_map.min_patch_face; + ubo->max_patch_face = cache->gpu_patch_map.max_patch_face; + ubo->max_depth = cache->gpu_patch_map.max_depth; + ubo->patches_are_triangular = cache->gpu_patch_map.patches_are_triangular; + ubo->coarse_poly_count = cache->num_coarse_poly; + ubo->optimal_display = cache->optimal_display; + ubo->num_subdiv_loops = cache->num_subdiv_loops; + ubo->edge_loose_offset = cache->num_subdiv_loops * 2; + ubo->has_sculpt_mask = has_sculpt_mask; + ubo->coarse_face_smooth_mask = SUBDIV_COARSE_FACE_FLAG_SMOOTH_MASK; + ubo->coarse_face_select_mask = SUBDIV_COARSE_FACE_FLAG_SELECT_MASK; + ubo->coarse_face_active_mask = SUBDIV_COARSE_FACE_FLAG_ACTIVE_MASK; + ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK; + ubo->total_dispatch_size = total_dispatch_size; +} + +static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache, + GPUShader *shader, + const int src_offset, + const int dst_offset, + const uint total_dispatch_size, + const bool has_sculpt_mask = false) +{ + DRWSubdivUboStorage storage; + draw_subdiv_init_ubo_storage( + cache, &storage, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask); + + if (!cache->ubo) { + const_cast<DRWSubdivCache *>(cache)->ubo = GPU_uniformbuf_create_ex( + sizeof(DRWSubdivUboStorage), &storage, "DRWSubdivUboStorage"); + } + + GPU_uniformbuf_update(cache->ubo, &storage); + + const int location = GPU_shader_get_uniform_block(shader, "shader_data"); + GPU_uniformbuf_bind(cache->ubo, location); +} + +/** \} */ + +// -------------------------------------------------------- + +#define SUBDIV_LOCAL_WORK_GROUP_SIZE 64 +static uint get_dispatch_size(uint elements) +{ + return divide_ceil_u(elements, SUBDIV_LOCAL_WORK_GROUP_SIZE); +} + +/* Helper to ensure that the UBO is always initalized before dispatching computes and that the same + * number of elements that need to be processed is used for the UBO and the dispatch size. + * Use this instead of a raw call to #GPU_compute_dispatch. */ +static void drw_subdiv_compute_dispatch(const DRWSubdivCache *cache, + GPUShader *shader, + const int src_offset, + const int dst_offset, + uint total_dispatch_size, + const bool has_sculpt_mask = false) +{ + const uint max_res_x = static_cast<uint>(GPU_max_work_group_count(0)); + + const uint dispatch_size = get_dispatch_size(total_dispatch_size); + uint dispatch_rx = dispatch_size; + uint dispatch_ry = 1u; + if (dispatch_rx > max_res_x) { + /* Since there are some limitations with regards to the maximum work group size (could be as + * low as 64k elements per call), we split the number elements into a "2d" number, with the + * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum + * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements + * total, which should be enough. If not, we could also use the 3rd dimension. */ + /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and + * get the smallest rect fitting the requirements. */ + dispatch_rx = dispatch_ry = ceilf(sqrtf(dispatch_size)); + /* Avoid a completely empty dispatch line caused by rounding. */ + if ((dispatch_rx * (dispatch_ry - 1)) >= dispatch_size) { + dispatch_ry -= 1; + } + } + + /* X and Y dimensions may have different limits so the above computation may not be right, but + * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore, + * we presume it all fits. */ + BLI_assert(dispatch_ry < static_cast<uint>(GPU_max_work_group_count(1))); + + draw_subdiv_ubo_update_and_bind( + cache, shader, src_offset, dst_offset, total_dispatch_size, has_sculpt_mask); + + GPU_compute_dispatch(shader, dispatch_rx, dispatch_ry, 1); +} + +void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache, + GPUVertBuf *pos_nor, + const bool do_limit_normals) +{ + Subdiv *subdiv = cache->subdiv; + OpenSubdiv_Evaluator *evaluator = subdiv->evaluator; + + OpenSubdiv_Buffer src_buffer_interface; + GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface, + get_subdiv_vertex_format()); + evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface); + + OpenSubdiv_Buffer patch_arrays_buffer_interface; + GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface, + get_patch_array_format()); + evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface); + + OpenSubdiv_Buffer patch_index_buffer_interface; + GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface, + get_patch_index_format()); + evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface); + + OpenSubdiv_Buffer patch_param_buffer_interface; + GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface, + get_patch_param_format()); + evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface); + + GPUShader *shader = get_patch_evaluation_shader( + do_limit_normals ? SHADER_PATCH_EVALUATION_LIMIT_NORMALS : SHADER_PATCH_EVALUATION); + GPU_shader_bind(shader); + + GPU_vertbuf_bind_as_ssbo(src_buffer, 0); + GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1); + GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2); + GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3); + GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4); + GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5); + GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6); + GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7); + GPU_vertbuf_bind_as_ssbo(pos_nor, 8); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We + * also need it for subsequent compute shaders, so a barrier on the shader storage is also + * needed. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); + + GPU_vertbuf_discard(patch_index_buffer); + GPU_vertbuf_discard(patch_param_buffer); + GPU_vertbuf_discard(patch_arrays_buffer); + GPU_vertbuf_discard(src_buffer); +} + +void draw_subdiv_extract_uvs(const DRWSubdivCache *cache, + GPUVertBuf *uvs, + const int face_varying_channel, + const int dst_offset) +{ + Subdiv *subdiv = cache->subdiv; + OpenSubdiv_Evaluator *evaluator = subdiv->evaluator; + + OpenSubdiv_Buffer src_buffer_interface; + GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface, get_uvs_format()); + evaluator->wrapFVarSrcBuffer(evaluator, face_varying_channel, &src_buffer_interface); + + OpenSubdiv_Buffer patch_arrays_buffer_interface; + GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface, + get_patch_array_format()); + evaluator->fillFVarPatchArraysBuffer( + evaluator, face_varying_channel, &patch_arrays_buffer_interface); + + OpenSubdiv_Buffer patch_index_buffer_interface; + GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface, + get_patch_index_format()); + evaluator->wrapFVarPatchIndexBuffer( + evaluator, face_varying_channel, &patch_index_buffer_interface); + + OpenSubdiv_Buffer patch_param_buffer_interface; + GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface, + get_patch_param_format()); + evaluator->wrapFVarPatchParamBuffer( + evaluator, face_varying_channel, &patch_param_buffer_interface); + + GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FVAR); + GPU_shader_bind(shader); + + GPU_vertbuf_bind_as_ssbo(src_buffer, 0); + GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1); + GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2); + GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3); + GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4); + GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5); + GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6); + GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7); + GPU_vertbuf_bind_as_ssbo(uvs, 8); + + /* The buffer offset has the stride baked in (which is 2 as we have UVs) so remove the stride by + * dividing by 2 */ + const int src_offset = src_buffer_interface.buffer_offset / 2; + drw_subdiv_compute_dispatch(cache, shader, src_offset, dst_offset, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. + * Since it may also be used for computing UV stretches, we also need a barrier on the shader + * storage. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_SHADER_STORAGE); + + /* Cleanup. */ + GPU_shader_unbind(); + + GPU_vertbuf_discard(patch_index_buffer); + GPU_vertbuf_discard(patch_param_buffer); + GPU_vertbuf_discard(patch_arrays_buffer); + GPU_vertbuf_discard(src_buffer); +} + +void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache, + GPUVertBuf *src_data, + GPUVertBuf *dst_data, + int dimensions, + int dst_offset) +{ + GPUShader *shader = nullptr; + + if (dimensions == 1) { + shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D, + "#define SUBDIV_POLYGON_OFFSET\n" + "#define DIMENSIONS 1\n"); + } + else if (dimensions == 2) { + shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_2D, + "#define SUBDIV_POLYGON_OFFSET\n" + "#define DIMENSIONS 2\n"); + } + else if (dimensions == 3) { + shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_3D, + "#define SUBDIV_POLYGON_OFFSET\n" + "#define DIMENSIONS 3\n"); + } + else if (dimensions == 4) { + shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_4D, + "#define SUBDIV_POLYGON_OFFSET\n" + "#define DIMENSIONS 4\n" + "#define GPU_FETCH_U16_TO_FLOAT\n"); + } + else { + /* Crash if dimensions are not supported. */ + } + + GPU_shader_bind(shader); + + /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */ + GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0); + GPU_vertbuf_bind_as_ssbo(src_data, 1); + GPU_vertbuf_bind_as_ssbo(cache->face_ptex_offset_buffer, 2); + GPU_vertbuf_bind_as_ssbo(cache->patch_coords, 3); + GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 4); + GPU_vertbuf_bind_as_ssbo(dst_data, 5); + + drw_subdiv_compute_dispatch(cache, shader, 0, dst_offset, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache, + GPUVertBuf *mask_vbo, + GPUVertBuf *face_set_vbo, + GPUVertBuf *sculpt_data) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_SCULPT_DATA, nullptr); + GPU_shader_bind(shader); + + if (mask_vbo) { + GPU_vertbuf_bind_as_ssbo(mask_vbo, 0); + } + + GPU_vertbuf_bind_as_ssbo(face_set_vbo, 1); + GPU_vertbuf_bind_as_ssbo(sculpt_data, 2); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads, mask_vbo != nullptr); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache, + GPUVertBuf *pos_nor, + GPUVertBuf *face_adjacency_offsets, + GPUVertBuf *face_adjacency_lists, + GPUVertBuf *vertex_normals) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_ACCUMULATE, nullptr); + GPU_shader_bind(shader); + + int binding_point = 0; + + GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++); + GPU_vertbuf_bind_as_ssbo(face_adjacency_offsets, binding_point++); + GPU_vertbuf_bind_as_ssbo(face_adjacency_lists, binding_point++); + GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_verts); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We + * also need it for subsequent compute shaders, so a barrier on the shader storage is also + * needed. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_finalize_normals(const DRWSubdivCache *cache, + GPUVertBuf *vertex_normals, + GPUVertBuf *subdiv_loop_subdiv_vert_index, + GPUVertBuf *pos_nor) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_NORMALS_FINALIZE, nullptr); + GPU_shader_bind(shader); + + int binding_point = 0; + GPU_vertbuf_bind_as_ssbo(vertex_normals, binding_point++); + GPU_vertbuf_bind_as_ssbo(subdiv_loop_subdiv_vert_index, binding_point++); + GPU_vertbuf_bind_as_ssbo(pos_nor, binding_point++); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attrib array. We + * also need it for subsequent compute shaders, so a barrier on the shader storage is also + * needed. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache, + GPUIndexBuf *subdiv_tris, + const int material_count) +{ + const bool do_single_material = material_count <= 1; + + const char *defines = "#define SUBDIV_POLYGON_OFFSET\n"; + if (do_single_material) { + defines = + "#define SUBDIV_POLYGON_OFFSET\n" + "#define SINGLE_MATERIAL\n"; + } + + GPUShader *shader = get_subdiv_shader( + do_single_material ? SHADER_BUFFER_TRIS : SHADER_BUFFER_TRIS_MULTIPLE_MATERIALS, defines); + GPU_shader_bind(shader); + + /* Outputs */ + GPU_indexbuf_bind_as_ssbo(subdiv_tris, 1); + + if (!do_single_material) { + GPU_vertbuf_bind_as_ssbo(cache->polygon_mat_offset, 2); + /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */ + GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0); + } + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates an index buffer, so we need to put a barrier on the element array. */ + GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache, + GPUVertBuf *fdots_pos, + GPUVertBuf *fdots_nor, + GPUIndexBuf *fdots_indices) +{ + Subdiv *subdiv = cache->subdiv; + OpenSubdiv_Evaluator *evaluator = subdiv->evaluator; + + OpenSubdiv_Buffer src_buffer_interface; + GPUVertBuf *src_buffer = create_buffer_and_interface(&src_buffer_interface, + get_subdiv_vertex_format()); + evaluator->wrapSrcBuffer(evaluator, &src_buffer_interface); + + OpenSubdiv_Buffer patch_arrays_buffer_interface; + GPUVertBuf *patch_arrays_buffer = create_buffer_and_interface(&patch_arrays_buffer_interface, + get_patch_array_format()); + opensubdiv_gpu_buffer_init(&patch_arrays_buffer_interface, patch_arrays_buffer); + evaluator->fillPatchArraysBuffer(evaluator, &patch_arrays_buffer_interface); + + OpenSubdiv_Buffer patch_index_buffer_interface; + GPUVertBuf *patch_index_buffer = create_buffer_and_interface(&patch_index_buffer_interface, + get_patch_index_format()); + evaluator->wrapPatchIndexBuffer(evaluator, &patch_index_buffer_interface); + + OpenSubdiv_Buffer patch_param_buffer_interface; + GPUVertBuf *patch_param_buffer = create_buffer_and_interface(&patch_param_buffer_interface, + get_patch_param_format()); + evaluator->wrapPatchParamBuffer(evaluator, &patch_param_buffer_interface); + + GPUShader *shader = get_patch_evaluation_shader(SHADER_PATCH_EVALUATION_FACE_DOTS); + GPU_shader_bind(shader); + + GPU_vertbuf_bind_as_ssbo(src_buffer, 0); + GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_handles, 1); + GPU_vertbuf_bind_as_ssbo(cache->gpu_patch_map.patch_map_quadtree, 2); + GPU_vertbuf_bind_as_ssbo(cache->fdots_patch_coords, 3); + GPU_vertbuf_bind_as_ssbo(cache->verts_orig_index, 4); + GPU_vertbuf_bind_as_ssbo(patch_arrays_buffer, 5); + GPU_vertbuf_bind_as_ssbo(patch_index_buffer, 6); + GPU_vertbuf_bind_as_ssbo(patch_param_buffer, 7); + GPU_vertbuf_bind_as_ssbo(fdots_pos, 8); + GPU_vertbuf_bind_as_ssbo(fdots_nor, 9); + GPU_indexbuf_bind_as_ssbo(fdots_indices, 10); + GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 11); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_coarse_poly); + + /* This generates two vertex buffers and an index buffer, so we need to put a barrier on the + * vertex attributes and element arrays. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY | GPU_BARRIER_ELEMENT_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); + + GPU_vertbuf_discard(patch_index_buffer); + GPU_vertbuf_discard(patch_param_buffer); + GPU_vertbuf_discard(patch_arrays_buffer); + GPU_vertbuf_discard(src_buffer); +} + +void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache, GPUIndexBuf *lines_indices) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES, nullptr); + GPU_shader_bind(shader); + + GPU_vertbuf_bind_as_ssbo(cache->edges_orig_index, 0); + GPU_indexbuf_bind_as_ssbo(lines_indices, 1); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates an index buffer, so we need to put a barrier on the element array. */ + GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache, + GPUIndexBuf *lines_indices, + uint num_loose_edges) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LINES_LOOSE, "#define LINES_LOOSE\n"); + GPU_shader_bind(shader); + + GPU_indexbuf_bind_as_ssbo(lines_indices, 1); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, num_loose_edges); + + /* This generates an index buffer, so we need to put a barrier on the element array. */ + GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache, + GPUVertBuf *pos_nor, + GPUVertBuf *edge_idx, + GPUVertBuf *edge_fac) +{ + /* No separate shader for the AMD driver case as we assume that the GPU will not change during + * the execution of the program. */ + const char *defines = GPU_crappy_amd_driver() ? "#define GPU_AMD_DRIVER_BYTE_BUG\n" : nullptr; + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_EDGE_FAC, defines); + GPU_shader_bind(shader); + + GPU_vertbuf_bind_as_ssbo(pos_nor, 0); + GPU_vertbuf_bind_as_ssbo(edge_idx, 1); + GPU_vertbuf_bind_as_ssbo(edge_fac, 2); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache, + GPUVertBuf *pos_nor, + GPUVertBuf *lnor) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_LNOR, "#define SUBDIV_POLYGON_OFFSET\n"); + GPU_shader_bind(shader); + + /* Inputs */ + GPU_vertbuf_bind_as_ssbo(pos_nor, 1); + GPU_vertbuf_bind_as_ssbo(cache->extra_coarse_face_data, 2); + /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */ + GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0); + + /* Outputs */ + GPU_vertbuf_bind_as_ssbo(lnor, 3); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache, + GPUVertBuf *coarse_data, + GPUVertBuf *subdiv_data) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_AREA, + "#define SUBDIV_POLYGON_OFFSET\n"); + GPU_shader_bind(shader); + + /* Inputs */ + GPU_vertbuf_bind_as_ssbo(coarse_data, 1); + /* subdiv_polygon_offset is always at binding point 0 for each shader using it. */ + GPU_vertbuf_bind_as_ssbo(cache->subdiv_polygon_offset_buffer, 0); + + /* Outputs */ + GPU_vertbuf_bind_as_ssbo(subdiv_data, 2); + + drw_subdiv_compute_dispatch(cache, shader, 0, 0, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache, + GPUVertBuf *pos_nor, + GPUVertBuf *uvs, + int uvs_offset, + GPUVertBuf *stretch_angles) +{ + GPUShader *shader = get_subdiv_shader(SHADER_BUFFER_UV_STRETCH_ANGLE, nullptr); + GPU_shader_bind(shader); + + /* Inputs */ + GPU_vertbuf_bind_as_ssbo(pos_nor, 0); + GPU_vertbuf_bind_as_ssbo(uvs, 1); + + /* Outputs */ + GPU_vertbuf_bind_as_ssbo(stretch_angles, 2); + + drw_subdiv_compute_dispatch(cache, shader, uvs_offset, 0, cache->num_subdiv_quads); + + /* This generates a vertex buffer, so we need to put a barrier on the vertex attribute array. */ + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + + /* Cleanup. */ + GPU_shader_unbind(); +} + +/* -------------------------------------------------------------------- */ + +void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache, + MeshRenderData *mr, + const ToolSettings *toolsettings) +{ + Mesh *mesh = cache->mesh; + + /* Setup required data for loose geometry. */ + mr->me = mesh; + mr->medge = mesh->medge; + mr->mvert = mesh->mvert; + mr->mpoly = mesh->mpoly; + mr->mloop = mesh->mloop; + mr->vert_len = mesh->totvert; + mr->edge_len = mesh->totedge; + mr->poly_len = mesh->totpoly; + mr->loop_len = mesh->totloop; + mr->extract_type = MR_EXTRACT_MESH; + + /* MeshRenderData is only used for generating edit mode data here. */ + if (!cache->bm) { + return; + } + + BMesh *bm = cache->bm; + BM_mesh_elem_table_ensure(bm, BM_EDGE | BM_FACE | BM_VERT); + + mr->bm = bm; + mr->toolsettings = toolsettings; + mr->eed_act = BM_mesh_active_edge_get(bm); + mr->efa_act = BM_mesh_active_face_get(bm, false, true); + mr->eve_act = BM_mesh_active_vert_get(bm); + mr->crease_ofs = CustomData_get_offset(&bm->edata, CD_CREASE); + mr->bweight_ofs = CustomData_get_offset(&bm->edata, CD_BWEIGHT); +#ifdef WITH_FREESTYLE + mr->freestyle_edge_ofs = CustomData_get_offset(&bm->edata, CD_FREESTYLE_EDGE); + mr->freestyle_face_ofs = CustomData_get_offset(&bm->pdata, CD_FREESTYLE_FACE); +#endif + mr->v_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX)); + mr->e_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX)); + mr->p_origindex = static_cast<int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX)); +} + +/** + * For material assignments we want indices for triangles that share a common material to be laid + * out contiguously in memory. To achieve this, we sort the indices based on which material the + * coarse polygon was assigned. The sort is performed by offsetting the loops indices so that they + * are directly assigned to the right sorted indices. + * + * \code{.unparsed} + * Here is a visual representation, considering four quads: + * +---------+---------+---------+---------+ + * | 3 2 | 7 6 | 11 10 | 15 14 | + * | | | | | + * | 0 1 | 4 5 | 8 9 | 12 13 | + * +---------+---------+---------+---------+ + * + * If the first and third quads have the same material, we should have: + * +---------+---------+---------+---------+ + * | 3 2 | 11 10 | 7 6 | 15 14 | + * | | | | | + * | 0 1 | 8 9 | 4 5 | 12 13 | + * +---------+---------+---------+---------+ + * + * So the offsets would be: + * +---------+---------+---------+---------+ + * | 0 0 | 4 4 | -4 -4 | 0 0 | + * | | | | | + * | 0 0 | 4 4 | -4 -4 | 0 0 | + * +---------+---------+---------+---------+ + * \endcode + * + * The offsets are computed not based on the loops indices, but on the number of subdivided + * polygons for each coarse polygon. We then only store a single offset for each coarse polygon, + * since all sub-faces are contiguous, they all share the same offset. + */ +static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache, + Mesh *mesh_eval, + uint mat_len) +{ + draw_subdiv_cache_free_material_data(cache); + + const int number_of_quads = cache->num_subdiv_loops / 4; + + if (mat_len == 1) { + cache->mat_start = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end")); + cache->mat_end = static_cast<int *>(MEM_callocN(sizeof(int), "subdiv mat_end")); + cache->mat_start[0] = 0; + cache->mat_end[0] = number_of_quads; + return; + } + + /* Count number of subdivided polygons for each material. */ + int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start")); + int *subdiv_polygon_offset = cache->subdiv_polygon_offset; + + // TODO: parallel_reduce? + for (int i = 0; i < mesh_eval->totpoly; i++) { + const MPoly *mpoly = &mesh_eval->mpoly[i]; + const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads : + subdiv_polygon_offset[i + 1]; + const int quad_count = next_offset - subdiv_polygon_offset[i]; + const int mat_index = mpoly->mat_nr; + mat_start[mat_index] += quad_count; + } + + /* Accumulate offsets. */ + int ofs = mat_start[0]; + mat_start[0] = 0; + for (uint i = 1; i < mat_len; i++) { + int tmp = mat_start[i]; + mat_start[i] = ofs; + ofs += tmp; + } + + /* Compute per polygon offsets. */ + int *mat_end = static_cast<int *>(MEM_dupallocN(mat_start)); + int *per_polygon_mat_offset = static_cast<int *>( + MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset")); + + for (int i = 0; i < mesh_eval->totpoly; i++) { + const MPoly *mpoly = &mesh_eval->mpoly[i]; + const int mat_index = mpoly->mat_nr; + const int single_material_index = subdiv_polygon_offset[i]; + const int material_offset = mat_end[mat_index]; + const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads : + subdiv_polygon_offset[i + 1]; + const int quad_count = next_offset - subdiv_polygon_offset[i]; + mat_end[mat_index] += quad_count; + + per_polygon_mat_offset[i] = material_offset - single_material_index; + } + + cache->polygon_mat_offset = draw_subdiv_build_origindex_buffer(per_polygon_mat_offset, + mesh_eval->totpoly); + cache->mat_start = mat_start; + cache->mat_end = mat_end; + + MEM_freeN(per_polygon_mat_offset); +} + +static bool draw_subdiv_create_requested_buffers(const Scene *scene, + Object *ob, + Mesh *mesh, + struct MeshBatchCache *batch_cache, + MeshBufferCache *mbc, + const ToolSettings *toolsettings, + OpenSubdiv_EvaluatorCache *evaluator_cache) +{ + SubsurfModifierData *smd = BKE_object_get_last_subsurf_modifier(ob); + BLI_assert(smd); + + const bool is_final_render = DRW_state_is_scene_render(); + + SubdivSettings settings; + BKE_subsurf_modifier_subdiv_settings_init(&settings, smd, is_final_render); + + if (settings.level == 0) { + return false; + } + + Mesh *mesh_eval = mesh; + BMesh *bm = nullptr; + if (mesh->edit_mesh) { + mesh_eval = mesh->edit_mesh->mesh_eval_final; + bm = mesh->edit_mesh->bm; + } + + BKE_subsurf_modifier_ensure_runtime(smd); + + Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure(smd, &settings, mesh_eval, true); + if (!subdiv) { + return false; + } + + if (!BKE_subdiv_eval_begin_from_mesh( + subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) { + return false; + } + + DRWSubdivCache *draw_cache = mesh_batch_cache_ensure_subdiv_cache(batch_cache); + if (!draw_subdiv_build_cache(draw_cache, subdiv, mesh_eval, scene, smd, is_final_render)) { + return false; + } + + const bool optimal_display = (smd->flags & eSubsurfModifierFlag_ControlEdges); + + draw_cache->bm = bm; + draw_cache->mesh = mesh_eval; + draw_cache->subdiv = subdiv; + draw_cache->optimal_display = optimal_display; + draw_cache->num_subdiv_triangles = tris_count_from_number_of_loops(draw_cache->num_subdiv_loops); + /* We can only evaluate limit normals if the patches are adaptive. */ + draw_cache->do_limit_normals = settings.is_adaptive; + + if (DRW_ibo_requested(mbc->buff.ibo.tris)) { + draw_subdiv_cache_ensure_mat_offsets(draw_cache, mesh_eval, batch_cache->mat_len); + } + + draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval); + + mesh_buffer_cache_create_requested_subdiv(batch_cache, mbc, draw_cache, toolsettings); + + return true; +} + +static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr; + +void DRW_create_subdivision(const Scene *scene, + Object *ob, + Mesh *mesh, + struct MeshBatchCache *batch_cache, + MeshBufferCache *mbc, + const ToolSettings *toolsettings) +{ + if (g_evaluator_cache == nullptr) { + g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE); + } + +#undef TIME_SUBDIV + +#ifdef TIME_SUBDIV + const double begin_time = PIL_check_seconds_timer(); +#endif + + if (!draw_subdiv_create_requested_buffers( + scene, ob, mesh, batch_cache, mbc, toolsettings, g_evaluator_cache)) { + return; + } + +#ifdef TIME_SUBDIV + const double end_time = PIL_check_seconds_timer(); + fprintf(stderr, "Time to update subdivision: %f\n", end_time - begin_time); + fprintf(stderr, "Maximum FPS: %f\n", 1.0 / (end_time - begin_time)); +#endif +} + +void DRW_subdiv_free() +{ + for (int i = 0; i < NUM_SHADERS; ++i) { + GPU_shader_free(g_subdiv_shaders[i]); + } + + DRW_cache_free_old_subdiv(); + + if (g_evaluator_cache) { + openSubdiv_deleteEvaluatorCache(g_evaluator_cache); + g_evaluator_cache = nullptr; + } +} + +static LinkNode *gpu_subdiv_free_queue = nullptr; +static ThreadMutex gpu_subdiv_queue_mutex = BLI_MUTEX_INITIALIZER; + +void DRW_subdiv_cache_free(Subdiv *subdiv) +{ + BLI_mutex_lock(&gpu_subdiv_queue_mutex); + BLI_linklist_prepend(&gpu_subdiv_free_queue, subdiv); + BLI_mutex_unlock(&gpu_subdiv_queue_mutex); +} + +void DRW_cache_free_old_subdiv() +{ + if (gpu_subdiv_free_queue == nullptr) { + return; + } + + BLI_mutex_lock(&gpu_subdiv_queue_mutex); + + while (gpu_subdiv_free_queue != nullptr) { + Subdiv *subdiv = static_cast<Subdiv *>(BLI_linklist_pop(&gpu_subdiv_free_queue)); + /* Set the type to CPU so that we do actually free the cache. */ + subdiv->evaluator->type = OPENSUBDIV_EVALUATOR_CPU; + BKE_subdiv_free(subdiv); + } + + BLI_mutex_unlock(&gpu_subdiv_queue_mutex); +} diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index 930fb6eabef..0bf6468f7cc 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -52,6 +52,7 @@ #include "BKE_pointcache.h" #include "BKE_pointcloud.h" #include "BKE_screen.h" +#include "BKE_subdiv_modifier.h" #include "BKE_volume.h" #include "DNA_camera_types.h" @@ -90,6 +91,7 @@ #include "draw_manager_testing.h" #include "draw_manager_text.h" #include "draw_shader.h" +#include "draw_subdivision.h" #include "draw_texture_pool.h" /* only for callbacks */ @@ -2975,6 +2977,8 @@ void DRW_engines_register(void) BKE_volume_batch_cache_dirty_tag_cb = DRW_volume_batch_cache_dirty_tag; BKE_volume_batch_cache_free_cb = DRW_volume_batch_cache_free; + + BKE_subsurf_modifier_free_gpu_cache_cb = DRW_subdiv_cache_free; } } diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h new file mode 100644 index 00000000000..f60ec7afc77 --- /dev/null +++ b/source/blender/draw/intern/draw_subdivision.h @@ -0,0 +1,231 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2021, Blender Foundation. + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "BLI_sys_types.h" + +struct BMesh; +struct GPUIndexBuf; +struct GPUUniformBuf; +struct GPUVertBuf; +struct Mesh; +struct MeshBatchCache; +struct MeshBufferCache; +struct MeshRenderData; +struct Object; +struct Scene; +struct Subdiv; +struct ToolSettings; + +/* -------------------------------------------------------------------- */ +/** \name DRWPatchMap + * + * This is a GPU version of the OpenSubDiv PatchMap. The quad tree and the patch handles are copied + * to GPU buffers in order to lookup the right patch for a given set of patch coordinates. + * \{ */ + +typedef struct DRWPatchMap { + struct GPUVertBuf *patch_map_handles; + struct GPUVertBuf *patch_map_quadtree; + int min_patch_face; + int max_patch_face; + int max_depth; + int patches_are_triangular; +} DRWPatchMap; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name DRWSubdivCache + * + * This holds the various buffers used to evaluate and render subdivision through OpenGL. + * \{ */ + +typedef struct DRWSubdivCache { + struct Mesh *mesh; + struct BMesh *bm; + struct Subdiv *subdiv; + bool optimal_display; + bool do_limit_normals; + + /* Coordinates used to evaluate patches for UVs, positions, and normals. */ + struct GPUVertBuf *patch_coords; + /* Coordinates used to evaluate patches for the face centers (or face dots) in edit-mode. */ + struct GPUVertBuf *fdots_patch_coords; + + /* Resolution used to generate the patch coordinates. */ + int resolution; + + /* Number of subdivided loops, also the number of patch coordinates since we have one coordinate + * but quad corner/vertex. */ + uint num_subdiv_loops; + uint num_subdiv_edges; + uint num_subdiv_triangles; + uint num_subdiv_verts; + uint num_subdiv_quads; + + /* Number of polygons in the coarse mesh, notably used to compute a coarse polygon index given a + * subdivision loop index. */ + int num_coarse_poly; + + /* Maps subdivision loop to subdivided vertex index. */ + int *subdiv_loop_subdiv_vert_index; + /* Maps subdivision loop to original coarse poly index. */ + int *subdiv_loop_poly_index; + + /* Indices of faces adjacent to the vertices, ordered by vertex index, with no particular + * winding. */ + struct GPUVertBuf *subdiv_vertex_face_adjacency; + /* The difference between value (i + 1) and (i) gives the number of faces adjacent to vertex (i). + */ + struct GPUVertBuf *subdiv_vertex_face_adjacency_offsets; + + /* Maps subdivision loop to original coarse vertex index, only really useful for edit mode. */ + struct GPUVertBuf *verts_orig_index; + /* Maps subdivision loop to original coarse edge index, only really useful for edit mode. */ + struct GPUVertBuf *edges_orig_index; + + /* Owned by #Subdiv. Indexed by coarse polygon index, difference between value (i + 1) and (i) + * gives the number of ptex faces for coarse polygon (i). */ + int *face_ptex_offset; + /* Vertex buffer for face_ptex_offset. */ + struct GPUVertBuf *face_ptex_offset_buffer; + + int *subdiv_polygon_offset; + struct GPUVertBuf *subdiv_polygon_offset_buffer; + + /* Contains the start loop index and the smooth flag for each coarse polygon. */ + struct GPUVertBuf *extra_coarse_face_data; + + /* Computed for ibo.points, one value per subdivided vertex, mapping coarse vertices -> + * subdivided loop */ + int *point_indices; + + /* Material offsets. */ + int *mat_start; + int *mat_end; + struct GPUVertBuf *polygon_mat_offset; + + DRWPatchMap gpu_patch_map; + + /* UBO to store settings for the various compute shaders. */ + struct GPUUniformBuf *ubo; +} DRWSubdivCache; + +/* Only frees the data of the cache, caller is responsible to free the cache itself if necessary. + */ +void draw_subdiv_cache_free(DRWSubdivCache *cache); + +/** \} */ + +void DRW_create_subdivision(const struct Scene *scene, + struct Object *ob, + struct Mesh *mesh, + struct MeshBatchCache *batch_cache, + struct MeshBufferCache *mbc, + const struct ToolSettings *toolsettings); + +void DRW_subdiv_cache_free(struct Subdiv *subdiv); + +void draw_subdiv_init_mesh_render_data(DRWSubdivCache *cache, + struct MeshRenderData *mr, + const struct ToolSettings *toolsettings); + +void draw_subdiv_init_origindex_buffer(struct GPUVertBuf *buffer, + int *vert_origindex, + uint num_loops, + uint loose_len); + +struct GPUVertBuf *draw_subdiv_build_origindex_buffer(int *vert_origindex, uint num_loops); + +/* Compute shader functions. */ + +void draw_subdiv_build_sculpt_data_buffer(const DRWSubdivCache *cache, + struct GPUVertBuf *mask_vbo, + struct GPUVertBuf *face_set_vbo, + struct GPUVertBuf *sculpt_data); + +void draw_subdiv_accumulate_normals(const DRWSubdivCache *cache, + struct GPUVertBuf *pos_nor, + struct GPUVertBuf *face_adjacency_offsets, + struct GPUVertBuf *face_adjacency_lists, + struct GPUVertBuf *vertex_normals); + +void draw_subdiv_finalize_normals(const DRWSubdivCache *cache, + struct GPUVertBuf *vertex_normals, + struct GPUVertBuf *subdiv_loop_subdiv_vert_index, + struct GPUVertBuf *pos_nor); + +void draw_subdiv_extract_pos_nor(const DRWSubdivCache *cache, + struct GPUVertBuf *pos_nor, + const bool do_limit_normals); + +void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache, + struct GPUVertBuf *src_data, + struct GPUVertBuf *dst_buffer, + int dimensions, + int dst_offset); + +void draw_subdiv_extract_uvs(const DRWSubdivCache *cache, + struct GPUVertBuf *uvs, + const int face_varying_channel, + const int dst_offset); + +void draw_subdiv_build_edge_fac_buffer(const DRWSubdivCache *cache, + struct GPUVertBuf *pos_nor, + struct GPUVertBuf *edge_idx, + struct GPUVertBuf *edge_fac); + +void draw_subdiv_build_tris_buffer(const DRWSubdivCache *cache, + struct GPUIndexBuf *subdiv_tris, + const int material_count); + +void draw_subdiv_build_lines_buffer(const DRWSubdivCache *cache, + struct GPUIndexBuf *lines_indices); + +void draw_subdiv_build_lines_loose_buffer(const DRWSubdivCache *cache, + struct GPUIndexBuf *lines_indices, + uint num_loose_edges); + +void draw_subdiv_build_fdots_buffers(const DRWSubdivCache *cache, + struct GPUVertBuf *fdots_pos, + struct GPUVertBuf *fdots_nor, + struct GPUIndexBuf *fdots_indices); + +void draw_subdiv_build_lnor_buffer(const DRWSubdivCache *cache, + struct GPUVertBuf *pos_nor, + struct GPUVertBuf *lnor); + +void draw_subdiv_build_edituv_stretch_area_buffer(const DRWSubdivCache *cache, + struct GPUVertBuf *coarse_data, + struct GPUVertBuf *subdiv_data); + +void draw_subdiv_build_edituv_stretch_angle_buffer(const DRWSubdivCache *cache, + struct GPUVertBuf *pos_nor, + struct GPUVertBuf *uvs, + int uvs_offset, + struct GPUVertBuf *stretch_angles); + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.h b/source/blender/draw/intern/mesh_extractors/extract_mesh.h index 7d21804c08f..35cc2cf986e 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh.h +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.h @@ -39,6 +39,8 @@ extern "C" { #endif +struct DRWSubdivCache; + #define MIN_RANGE_LEN 1024 /* ---------------------------------------------------------------------- */ @@ -203,6 +205,11 @@ typedef void(ExtractLVertMeshFn)(const MeshRenderData *mr, const MVert *mv, const int lvert_index, void *data); +typedef void(ExtractLooseGeomSubdivFn)(const struct DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + const MeshExtractLooseGeom *loose_geom, + void *buffer, + void *data); typedef void(ExtractInitFn)(const MeshRenderData *mr, struct MeshBatchCache *cache, void *buffer, @@ -213,6 +220,18 @@ typedef void(ExtractFinishFn)(const MeshRenderData *mr, void *data); typedef void(ExtractTaskReduceFn)(void *userdata, void *task_userdata); +typedef void(ExtractInitSubdivFn)(const struct DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buf, + void *data); +typedef void(ExtractIterSubdivFn)(const struct DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + void *data); +typedef void(ExtractFinishSubdivFn)(const struct DRWSubdivCache *subdiv_cache, + void *buf, + void *data); + typedef struct MeshExtract { /** Executed on main thread and return user data for iteration functions. */ ExtractInitFn *init; @@ -225,9 +244,14 @@ typedef struct MeshExtract { ExtractLEdgeMeshFn *iter_ledge_mesh; ExtractLVertBMeshFn *iter_lvert_bm; ExtractLVertMeshFn *iter_lvert_mesh; + ExtractLooseGeomSubdivFn *iter_loose_geom_subdiv; /** Executed on one worker thread after all elements iterations. */ ExtractTaskReduceFn *task_reduce; ExtractFinishFn *finish; + /** Executed on main thread for subdivision evaluation. */ + ExtractInitSubdivFn *init_subdiv; + ExtractIterSubdivFn *iter_subdiv; + ExtractFinishSubdivFn *finish_subdiv; /** Used to request common data. */ eMRDataType data_type; size_t data_size; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc index 4cc9a875f79..6a1691e8634 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc @@ -27,6 +27,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ /** \name Extract Edit UV Triangles Indices @@ -94,6 +96,57 @@ static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr), GPU_indexbuf_build_in_place(&data->elb, ibo); } +static void extract_edituv_tris_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *UNUSED(buf), + void *tls_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data); + GPU_indexbuf_init(&data->elb, + GPU_PRIM_TRIS, + subdiv_cache->num_subdiv_triangles, + subdiv_cache->num_subdiv_loops); + data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0; +} + +static void extract_edituv_tris_iter_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + void *_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index; + + for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) { + const uint loop_idx = i * 4; + const int poly_origindex = subdiv_loop_poly_index[loop_idx]; + BMFace *efa = bm_original_face_get(mr, poly_origindex); + + edituv_tri_add(data, + BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0, + BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0, + loop_idx, + loop_idx + 1, + loop_idx + 2); + + edituv_tri_add(data, + BM_elem_flag_test(efa, BM_ELEM_HIDDEN) != 0, + BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0, + loop_idx, + loop_idx + 2, + loop_idx + 3); + } +} + +static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache), + void *buf, + void *_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf); + GPU_indexbuf_build_in_place(&data->elb, ibo); +} + constexpr MeshExtract create_extractor_edituv_tris() { MeshExtract extractor = {nullptr}; @@ -101,6 +154,9 @@ constexpr MeshExtract create_extractor_edituv_tris() extractor.iter_looptri_bm = extract_edituv_tris_iter_looptri_bm; extractor.iter_looptri_mesh = extract_edituv_tris_iter_looptri_mesh; extractor.finish = extract_edituv_tris_finish; + extractor.init_subdiv = extract_edituv_tris_init_subdiv; + extractor.iter_subdiv = extract_edituv_tris_iter_subdiv; + extractor.finish_subdiv = extract_edituv_tris_finish_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_EditUvElem_Data); extractor.use_threading = false; @@ -184,6 +240,56 @@ static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr), GPU_indexbuf_build_in_place(&data->elb, ibo); } +static void extract_edituv_lines_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *UNUSED(buf), + void *tls_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data); + GPU_indexbuf_init( + &data->elb, GPU_PRIM_LINES, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops); + data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0; +} + +static void extract_edituv_lines_iter_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + void *_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index; + int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index); + + for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) { + + uint start_loop_idx = i * 4; + uint end_loop_idx = (i + 1) * 4; + + const int poly_origindex = subdiv_loop_poly_index[start_loop_idx]; + BMFace *efa = bm_original_face_get(mr, poly_origindex); + + for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) { + const int edge_origindex = subdiv_loop_edge_index[loop_idx]; + const bool real_edge = (edge_origindex != -1 && + mr->e_origindex[edge_origindex] != ORIGINDEX_NONE); + edituv_edge_add(data, + BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) != 0 || !real_edge, + BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) != 0, + loop_idx, + (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1)); + } + } +} + +static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache), + void *buf, + void *_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf); + GPU_indexbuf_build_in_place(&data->elb, ibo); +} + constexpr MeshExtract create_extractor_edituv_lines() { MeshExtract extractor = {nullptr}; @@ -191,6 +297,9 @@ constexpr MeshExtract create_extractor_edituv_lines() extractor.iter_poly_bm = extract_edituv_lines_iter_poly_bm; extractor.iter_poly_mesh = extract_edituv_lines_iter_poly_mesh; extractor.finish = extract_edituv_lines_finish; + extractor.init_subdiv = extract_edituv_lines_init_subdiv; + extractor.iter_subdiv = extract_edituv_lines_iter_subdiv; + extractor.finish_subdiv = extract_edituv_lines_finish_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_EditUvElem_Data); extractor.use_threading = false; @@ -268,6 +377,50 @@ static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr), GPU_indexbuf_build_in_place(&data->elb, ibo); } +static void extract_edituv_points_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *UNUSED(buf), + void *tls_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(tls_data); + GPU_indexbuf_init( + &data->elb, GPU_PRIM_POINTS, subdiv_cache->num_subdiv_loops, subdiv_cache->num_subdiv_loops); + data->sync_selection = (mr->toolsettings->uv_flag & UV_SYNC_SELECTION) != 0; +} + +static void extract_edituv_points_iter_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + void *_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index); + int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index; + + for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) { + const int vert_origindex = subdiv_loop_vert_index[i]; + const int poly_origindex = subdiv_loop_poly_index[i]; + BMFace *efa = bm_original_face_get(mr, poly_origindex); + + const bool real_vert = (mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) && + vert_origindex != -1 && + mr->v_origindex[vert_origindex] != ORIGINDEX_NONE); + edituv_point_add(data, + (BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) || !real_vert, + BM_elem_flag_test(efa, BM_ELEM_SELECT) != 0, + i); + } +} + +static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache), + void *buf, + void *_data) +{ + MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf); + GPU_indexbuf_build_in_place(&data->elb, ibo); +} + constexpr MeshExtract create_extractor_edituv_points() { MeshExtract extractor = {nullptr}; @@ -275,6 +428,9 @@ constexpr MeshExtract create_extractor_edituv_points() extractor.iter_poly_bm = extract_edituv_points_iter_poly_bm; extractor.iter_poly_mesh = extract_edituv_points_iter_poly_mesh; extractor.finish = extract_edituv_points_finish; + extractor.init_subdiv = extract_edituv_points_init_subdiv; + extractor.iter_subdiv = extract_edituv_points_iter_subdiv; + extractor.finish_subdiv = extract_edituv_points_finish_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_EditUvElem_Data); extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc index 54f5611106f..3d9729dea56 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc @@ -25,6 +25,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -155,6 +157,33 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr), GPU_indexbuf_build_in_place(elb, ibo); } +static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *UNUSED(cache), + void *buffer, + void *UNUSED(data)) +{ + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer); + GPU_indexbuf_init_build_on_device(ibo, + subdiv_cache->num_subdiv_loops * 2 + mr->edge_loose_len * 2); + + draw_subdiv_build_lines_buffer(subdiv_cache, ibo); +} + +static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + const MeshExtractLooseGeom *loose_geom, + void *buffer, + void *UNUSED(data)) +{ + if (loose_geom->edge_len == 0) { + return; + } + + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer); + draw_subdiv_build_lines_loose_buffer(subdiv_cache, ibo, static_cast<uint>(loose_geom->edge_len)); +} + constexpr MeshExtract create_extractor_lines() { MeshExtract extractor = {nullptr}; @@ -163,6 +192,8 @@ constexpr MeshExtract create_extractor_lines() extractor.iter_poly_mesh = extract_lines_iter_poly_mesh; extractor.iter_ledge_bm = extract_lines_iter_ledge_bm; extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh; + extractor.init_subdiv = extract_lines_init_subdiv; + extractor.iter_loose_geom_subdiv = extract_lines_loose_geom_subdiv; extractor.task_reduce = extract_lines_task_reduce; extractor.finish = extract_lines_finish; extractor.data_type = MR_DATA_NONE; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc index e7dabfa9ee2..6855feb51ed 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc @@ -26,6 +26,7 @@ #include "MEM_guardedalloc.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -44,6 +45,18 @@ struct MeshExtract_LineAdjacency_Data { uint *vert_to_loop; }; +static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data, + uint vert_len, + uint loop_len, + uint tess_edge_len) +{ + data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * vert_len, __func__)); + + GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, loop_len); + data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len); + data->is_manifold = true; +} + static void extract_lines_adjacency_init(const MeshRenderData *mr, struct MeshBatchCache *UNUSED(cache), void *UNUSED(buf), @@ -55,11 +68,7 @@ static void extract_lines_adjacency_init(const MeshRenderData *mr, uint tess_edge_len = mr->loop_len + mr->tri_len - mr->poly_len; MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(tls_data); - data->vert_to_loop = static_cast<uint *>(MEM_callocN(sizeof(uint) * mr->vert_len, __func__)); - - GPU_indexbuf_init(&data->elb, GPU_PRIM_LINES_ADJ, tess_edge_len, mr->loop_len); - data->eh = BLI_edgehash_new_ex(__func__, tess_edge_len); - data->is_manifold = true; + line_adjacency_data_init(data, mr->vert_len, mr->loop_len, tess_edge_len); } BLI_INLINE void lines_adjacency_triangle( @@ -171,6 +180,56 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr), MEM_freeN(data->vert_to_loop); } +static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *UNUSED(cache), + void *UNUSED(buf), + void *_data) +{ + MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data); + + /* For each polygon there is (loop + triangle - 1) edges. Since we only have quads, and a quad + * is split into 2 triangles, we have (loop + 2 - 1) = (loop + 1) edges for each quad, or in + * total: (number_of_loops + number_of_quads). */ + const uint tess_len = subdiv_cache->num_subdiv_loops + subdiv_cache->num_subdiv_quads; + line_adjacency_data_init( + data, tess_len, subdiv_cache->num_subdiv_verts, subdiv_cache->num_subdiv_loops); +} + +static void extract_lines_adjacency_iter_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + void *_data) +{ + MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data); + + for (uint i = 0; i < subdiv_cache->num_subdiv_quads; i++) { + const uint loop_index = i * 4; + const uint l0 = loop_index + 0; + const uint l1 = loop_index + 1; + const uint l2 = loop_index + 2; + const uint l3 = loop_index + 3; + + const uint v0 = subdiv_cache->subdiv_loop_subdiv_vert_index[l0]; + const uint v1 = subdiv_cache->subdiv_loop_subdiv_vert_index[l1]; + const uint v2 = subdiv_cache->subdiv_loop_subdiv_vert_index[l2]; + const uint v3 = subdiv_cache->subdiv_loop_subdiv_vert_index[l3]; + + lines_adjacency_triangle(v0, v1, v2, l0, l1, l2, data); + lines_adjacency_triangle(v0, v2, v3, l0, l2, l3, data); + } +} + +static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache), + void *buf, + void *_data) +{ + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf); + MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data); + GPU_indexbuf_build_in_place(&data->elb, ibo); + BLI_edgehash_free(data->eh, nullptr); + MEM_freeN(data->vert_to_loop); +} + #undef NO_EDGE constexpr MeshExtract create_extractor_lines_adjacency() @@ -180,6 +239,9 @@ constexpr MeshExtract create_extractor_lines_adjacency() extractor.iter_looptri_bm = extract_lines_adjacency_iter_looptri_bm; extractor.iter_looptri_mesh = extract_lines_adjacency_iter_looptri_mesh; extractor.finish = extract_lines_adjacency_finish; + extractor.init_subdiv = extract_lines_adjacency_init_subdiv; + extractor.iter_subdiv = extract_lines_adjacency_iter_subdiv; + extractor.finish_subdiv = extract_lines_adjacency_finish_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_LineAdjacency_Data); extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc index 01e14a004ed..19167772a42 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc @@ -25,6 +25,7 @@ #include "MEM_guardedalloc.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -155,6 +156,74 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr), GPU_indexbuf_build_in_place(elb, ibo); } +static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *UNUSED(cache), + void *UNUSED(buffer), + void *data) +{ + GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data); + /* Copy the points as the data upload will free them. */ + elb->data = (uint *)MEM_dupallocN(subdiv_cache->point_indices); + elb->index_len = subdiv_cache->num_subdiv_verts; + elb->index_min = 0; + elb->index_max = subdiv_cache->num_subdiv_loops - 1; + elb->prim_type = GPU_PRIM_POINTS; +} + +static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + const MeshExtractLooseGeom *loose_geom, + void *UNUSED(buffer), + void *data) +{ + const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len; + if (loop_loose_len == 0) { + return; + } + + GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data); + + elb->data = static_cast<uint32_t *>( + MEM_reallocN(elb->data, sizeof(uint) * (subdiv_cache->num_subdiv_loops + loop_loose_len))); + + const Mesh *coarse_mesh = subdiv_cache->mesh; + const MEdge *coarse_edges = coarse_mesh->medge; + + uint offset = subdiv_cache->num_subdiv_loops; + + for (int i = 0; i < loose_geom->edge_len; i++) { + const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]]; + if (elb->data[loose_edge->v1] == -1u) { + elb->data[loose_edge->v1] = offset; + } + if (elb->data[loose_edge->v2] == -1u) { + elb->data[loose_edge->v2] = offset + 1; + } + elb->index_max += 2; + elb->index_len += 2; + offset += 2; + } + + for (int i = 0; i < loose_geom->vert_len; i++) { + if (elb->data[loose_geom->verts[i]] == -1u) { + elb->data[loose_geom->verts[i]] = offset; + } + elb->index_max += 1; + elb->index_len += 1; + offset += 1; + } +} + +static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache), + void *buf, + void *_userdata) +{ + GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata); + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf); + GPU_indexbuf_build_in_place(elb, ibo); +} + constexpr MeshExtract create_extractor_points() { MeshExtract extractor = {nullptr}; @@ -167,6 +236,9 @@ constexpr MeshExtract create_extractor_points() extractor.iter_lvert_mesh = extract_points_iter_lvert_mesh; extractor.task_reduce = extract_points_task_reduce; extractor.finish = extract_points_finish; + extractor.init_subdiv = extract_points_init_subdiv; + extractor.iter_loose_geom_subdiv = extract_points_loose_geom_subdiv; + extractor.finish_subdiv = extract_points_finish_subdiv; extractor.use_threading = true; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(GPUIndexBufBuilder); diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc index 54e733d3d86..b1ace8bc6c9 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc @@ -25,6 +25,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_from) @@ -123,10 +125,37 @@ static void extract_tris_finish(const MeshRenderData *mr, } } +static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buffer); + /* Initialize the index buffer, it was already allocated, it will be filled on the device. */ + GPU_indexbuf_init_build_on_device(ibo, subdiv_cache->num_subdiv_triangles * 3); + + if (cache->tris_per_mat) { + for (int i = 0; i < cache->mat_len; i++) { + if (cache->tris_per_mat[i] == nullptr) { + cache->tris_per_mat[i] = GPU_indexbuf_calloc(); + } + + /* Multiply by 6 since we have 2 triangles per quad. */ + const int start = subdiv_cache->mat_start[i] * 6; + const int len = (subdiv_cache->mat_end[i] - subdiv_cache->mat_start[i]) * 6; + GPU_indexbuf_create_subrange_in_place(cache->tris_per_mat[i], ibo, start, len); + } + } + + draw_subdiv_build_tris_buffer(subdiv_cache, ibo, cache->mat_len); +} + constexpr MeshExtract create_extractor_tris() { MeshExtract extractor = {nullptr}; extractor.init = extract_tris_init; + extractor.init_subdiv = extract_tris_init_subdiv; extractor.iter_poly_bm = extract_tris_iter_poly_bm; extractor.iter_poly_mesh = extract_tris_iter_poly_mesh; extractor.task_reduce = extract_tris_mat_task_reduce; @@ -214,6 +243,7 @@ constexpr MeshExtract create_extractor_tris_single_mat() { MeshExtract extractor = {nullptr}; extractor.init = extract_tris_single_mat_init; + extractor.init_subdiv = extract_tris_init_subdiv; extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm; extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh; extractor.task_reduce = extract_tris_mat_task_reduce; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc index 8a5a8134ca7..ea702e5efdd 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc @@ -32,6 +32,7 @@ #include "BKE_attribute.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -153,7 +154,9 @@ static GPUVertCompType get_comp_type_for_type(CustomDataType type) static void init_vbo_for_attribute(const MeshRenderData *mr, GPUVertBuf *vbo, - const DRW_AttributeRequest &request) + const DRW_AttributeRequest &request, + bool build_on_device, + uint32_t len) { GPUVertCompType comp_type = get_comp_type_for_type(request.cd_type); GPUVertFetchMode fetch_mode = get_fetch_mode_for_type(request.cd_type); @@ -184,8 +187,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr, } } - GPU_vertbuf_init_with_format(vbo, &format); - GPU_vertbuf_data_alloc(vbo, static_cast<uint32_t>(mr->loop_len)); + if (build_on_device) { + GPU_vertbuf_init_build_on_device(vbo, &format, len); + } + else { + GPU_vertbuf_init_with_format(vbo, &format); + GPU_vertbuf_data_alloc(vbo, len); + } } template<typename AttributeType, typename VBOType> @@ -309,7 +317,7 @@ static void extract_attr_init(const MeshRenderData *mr, GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); - init_vbo_for_attribute(mr, vbo, request); + init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len)); /* TODO(kevindietrich) : float3 is used for scalar attributes as the implicit conversion done by * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the @@ -346,6 +354,68 @@ static void extract_attr_init(const MeshRenderData *mr, } } +static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *cache, + void *buffer, + void *UNUSED(tls_data), + int index) +{ + const DRW_MeshAttributes *attrs_used = &cache->attr_used; + const DRW_AttributeRequest &request = attrs_used->requests[index]; + + Mesh *coarse_mesh = subdiv_cache->mesh; + + const uint32_t dimensions = gpu_component_size_for_attribute_type(request.cd_type); + + /* Prepare VBO for coarse data. The compute shader only expects floats. */ + GPUVertBuf *src_data = GPU_vertbuf_calloc(); + static GPUVertFormat coarse_format = {0}; + GPU_vertformat_attr_add(&coarse_format, "data", GPU_COMP_F32, dimensions, GPU_FETCH_FLOAT); + GPU_vertbuf_init_with_format_ex(src_data, &coarse_format, GPU_USAGE_STATIC); + GPU_vertbuf_data_alloc(src_data, static_cast<uint32_t>(coarse_mesh->totloop)); + + switch (request.cd_type) { + case CD_PROP_BOOL: { + extract_attr_generic<bool, float3>(mr, src_data, request); + break; + } + case CD_PROP_INT32: { + extract_attr_generic<int32_t, float3>(mr, src_data, request); + break; + } + case CD_PROP_FLOAT: { + extract_attr_generic<float, float3>(mr, src_data, request); + break; + } + case CD_PROP_FLOAT2: { + extract_attr_generic<float2>(mr, src_data, request); + break; + } + case CD_PROP_FLOAT3: { + extract_attr_generic<float3>(mr, src_data, request); + break; + } + case CD_PROP_COLOR: { + extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request); + break; + } + default: { + BLI_assert(false); + } + } + + GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer); + init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops); + + /* Ensure data is uploaded properly. */ + GPU_vertbuf_tag_dirty(src_data); + draw_subdiv_interp_custom_data( + subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0); + + GPU_vertbuf_discard(src_data); +} + /* Wrappers around extract_attr_init so we can pass the index of the attribute that we want to * extract. The overall API does not allow us to pass this in a convenient way. */ #define EXTRACT_INIT_WRAPPER(index) \ @@ -353,6 +423,14 @@ static void extract_attr_init(const MeshRenderData *mr, const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \ { \ extract_attr_init(mr, cache, buf, tls_data, index); \ + } \ + static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \ + const MeshRenderData *mr, \ + struct MeshBatchCache *cache, \ + void *buf, \ + void *tls_data) \ + { \ + extract_attr_init_subdiv(subdiv_cache, mr, cache, buf, tls_data, index); \ } EXTRACT_INIT_WRAPPER(0) @@ -371,10 +449,12 @@ EXTRACT_INIT_WRAPPER(12) EXTRACT_INIT_WRAPPER(13) EXTRACT_INIT_WRAPPER(14) -template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn) +template<int index> +constexpr MeshExtract create_extractor_attr(ExtractInitFn fn, ExtractInitSubdivFn subdiv_fn) { MeshExtract extractor = {nullptr}; extractor.init = fn; + extractor.init_subdiv = subdiv_fn; extractor.data_type = MR_DATA_NONE; extractor.data_size = 0; extractor.use_threading = false; @@ -388,7 +468,8 @@ template<int index> constexpr MeshExtract create_extractor_attr(ExtractInitFn fn extern "C" { #define CREATE_EXTRACTOR_ATTR(index) \ - blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index) + blender::draw::create_extractor_attr<index>(blender::draw::extract_attr_init##index, \ + blender::draw::extract_attr_init_subdiv##index) const MeshExtract extract_attr[GPU_MAX_ATTR] = { CREATE_EXTRACTOR_ATTR(0), diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc index 2e2444a8e3d..5ee34d7fdb2 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc @@ -25,6 +25,7 @@ #include "GPU_capabilities.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -216,6 +217,86 @@ static void extract_edge_fac_finish(const MeshRenderData *mr, MEM_SAFE_FREE(data->edge_loop_count); } +/* Different function than the one used for the non-subdivision case, as we directly take care of + * the buggy AMD driver case. */ +static GPUVertFormat *get_subdiv_edge_fac_format() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + if (GPU_crappy_amd_driver()) { + GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + } + else { + GPU_vertformat_attr_add(&format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); + } + } + return &format; +} + +static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + GPUVertBuf *edge_idx = cache->final.buff.vbo.edge_idx; + GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor; + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + GPU_vertbuf_init_build_on_device( + vbo, get_subdiv_edge_fac_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len); + + /* Create a temporary buffer for the edge original indices if it was not requested. */ + const bool has_edge_idx = edge_idx != nullptr; + GPUVertBuf *loop_edge_idx = nullptr; + if (has_edge_idx) { + loop_edge_idx = edge_idx; + } + else { + loop_edge_idx = GPU_vertbuf_calloc(); + draw_subdiv_init_origindex_buffer( + loop_edge_idx, + static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)), + subdiv_cache->num_subdiv_loops, + 0); + } + + draw_subdiv_build_edge_fac_buffer(subdiv_cache, pos_nor, loop_edge_idx, vbo); + + if (!has_edge_idx) { + GPU_vertbuf_discard(loop_edge_idx); + } +} + +static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + const MeshExtractLooseGeom *loose_geom, + void *buffer, + void *UNUSED(data)) +{ + if (loose_geom->edge_len == 0) { + return; + } + + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + + /* Make sure buffer is active for sending loose data. */ + GPU_vertbuf_use(vbo); + + uint offset = subdiv_cache->num_subdiv_loops; + for (int i = 0; i < loose_geom->edge_len; i++) { + if (GPU_crappy_amd_driver()) { + float loose_edge_fac[2] = {1.0f, 1.0f}; + GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac); + } + else { + char loose_edge_fac[2] = {255, 255}; + GPU_vertbuf_update_sub(vbo, offset * sizeof(char), sizeof(loose_edge_fac), loose_edge_fac); + } + + offset += 2; + } +} + constexpr MeshExtract create_extractor_edge_fac() { MeshExtract extractor = {nullptr}; @@ -224,6 +305,8 @@ constexpr MeshExtract create_extractor_edge_fac() extractor.iter_poly_mesh = extract_edge_fac_iter_poly_mesh; extractor.iter_ledge_bm = extract_edge_fac_iter_ledge_bm; extractor.iter_ledge_mesh = extract_edge_fac_iter_ledge_mesh; + extractor.init_subdiv = extract_edge_fac_init_subdiv; + extractor.iter_loose_geom_subdiv = extract_edge_fac_loose_geom_subdiv; extractor.finish = extract_edge_fac_finish; extractor.data_type = MR_DATA_POLY_NOR; extractor.data_size = sizeof(MeshExtract_EdgeFac_Data); diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc index 5232346e51e..eef64085c95 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc @@ -25,6 +25,8 @@ #include "draw_cache_impl.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -107,19 +109,25 @@ static void mesh_render_data_vert_flag(const MeshRenderData *mr, } } -static void extract_edit_data_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), - void *buf, - void *tls_data) +static GPUVertFormat *get_edit_data_format(void) { - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); static GPUVertFormat format = {0}; if (format.attr_len == 0) { /* WARNING: Adjust #EditLoopData struct accordingly. */ GPU_vertformat_attr_add(&format, "data", GPU_COMP_U8, 4, GPU_FETCH_INT); GPU_vertformat_alias_add(&format, "flag"); } - GPU_vertbuf_init_with_format(vbo, &format); + return &format; +} + +static void extract_edit_data_init(const MeshRenderData *mr, + struct MeshBatchCache *UNUSED(cache), + void *buf, + void *tls_data) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + GPUVertFormat *format = get_edit_data_format(); + GPU_vertbuf_init_with_format(vbo, format); GPU_vertbuf_data_alloc(vbo, mr->loop_len + mr->loop_loose_len); EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo); *(EditLoopData **)tls_data = vbo_data; @@ -240,6 +248,80 @@ static void extract_edit_data_iter_lvert_mesh(const MeshRenderData *mr, } } +static void extract_edit_data_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *buf, + void *data) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + GPU_vertbuf_init_with_format(vbo, get_edit_data_format()); + GPU_vertbuf_data_alloc(vbo, subdiv_cache->num_subdiv_loops + mr->loop_loose_len); + EditLoopData *vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo); + *(EditLoopData **)data = vbo_data; +} + +static void extract_edit_data_iter_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + void *_data) +{ + EditLoopData *vbo_data = *(EditLoopData **)_data; + int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index); + int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index); + int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index; + + for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) { + const int vert_origindex = subdiv_loop_vert_index[i]; + const int edge_origindex = subdiv_loop_edge_index[i]; + const int poly_origindex = subdiv_loop_poly_index[i]; + + EditLoopData *edit_loop_data = &vbo_data[i]; + memset(edit_loop_data, 0, sizeof(EditLoopData)); + + if (vert_origindex != -1) { + const BMVert *eve = bm_original_vert_get(mr, vert_origindex); + if (eve) { + mesh_render_data_vert_flag(mr, eve, edit_loop_data); + } + } + + if (edge_origindex != -1) { + const BMEdge *eed = bm_original_edge_get(mr, edge_origindex); + if (eed) { + mesh_render_data_edge_flag(mr, eed, edit_loop_data); + } + } + + BMFace *efa = bm_original_face_get(mr, poly_origindex); + /* The -1 parameter is for edit_uvs, which we don't do here. */ + mesh_render_data_face_flag(mr, efa, -1, edit_loop_data); + } +} + +static void extract_edit_data_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + const MeshExtractLooseGeom *loose_geom, + void *UNUSED(buffer), + void *_data) +{ + if (loose_geom->edge_len == 0) { + return; + } + + EditLoopData *vbo_data = *(EditLoopData **)_data; + + for (int ledge_index = 0; ledge_index < loose_geom->edge_len; ledge_index++) { + const int offset = subdiv_cache->num_subdiv_loops + ledge_index * 2; + EditLoopData *data = &vbo_data[offset]; + memset(data, 0, sizeof(EditLoopData)); + BMEdge *eed = bm_original_edge_get(mr, loose_geom->edges[ledge_index]); + mesh_render_data_edge_flag(mr, eed, &data[0]); + data[1] = data[0]; + mesh_render_data_vert_flag(mr, eed->v1, &data[0]); + mesh_render_data_vert_flag(mr, eed->v2, &data[1]); + } +} + constexpr MeshExtract create_extractor_edit_data() { MeshExtract extractor = {nullptr}; @@ -250,6 +332,9 @@ constexpr MeshExtract create_extractor_edit_data() extractor.iter_ledge_mesh = extract_edit_data_iter_ledge_mesh; extractor.iter_lvert_bm = extract_edit_data_iter_lvert_bm; extractor.iter_lvert_mesh = extract_edit_data_iter_lvert_mesh; + extractor.init_subdiv = extract_edit_data_init_subdiv; + extractor.iter_subdiv = extract_edit_data_iter_subdiv; + extractor.iter_loose_geom_subdiv = extract_edit_data_loose_geom_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(EditLoopData *); extractor.use_threading = true; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc index b8494428eed..067d482bc2b 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc @@ -25,6 +25,8 @@ #include "draw_cache_impl.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -36,12 +38,11 @@ struct MeshExtract_EditUVData_Data { int cd_ofs; }; -static void extract_edituv_data_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), - void *buf, - void *tls_data) +static void extract_edituv_data_init_common(const MeshRenderData *mr, + GPUVertBuf *vbo, + MeshExtract_EditUVData_Data *data, + uint loop_len) { - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); static GPUVertFormat format = {0}; if (format.attr_len == 0) { /* WARNING: Adjust #EditLoopData struct accordingly. */ @@ -50,15 +51,23 @@ static void extract_edituv_data_init(const MeshRenderData *mr, } GPU_vertbuf_init_with_format(vbo, &format); - GPU_vertbuf_data_alloc(vbo, mr->loop_len); + GPU_vertbuf_data_alloc(vbo, loop_len); CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata; - - MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data); data->vbo_data = (EditLoopData *)GPU_vertbuf_get_data(vbo); data->cd_ofs = CustomData_get_offset(cd_ldata, CD_MLOOPUV); } +static void extract_edituv_data_init(const MeshRenderData *mr, + struct MeshBatchCache *UNUSED(cache), + void *buf, + void *tls_data) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data); + extract_edituv_data_init_common(mr, vbo, data, mr->loop_len); +} + static void extract_edituv_data_iter_poly_bm(const MeshRenderData *mr, const BMFace *f, const int UNUSED(f_index), @@ -119,12 +128,54 @@ static void extract_edituv_data_iter_poly_mesh(const MeshRenderData *mr, } } +static void extract_edituv_data_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *buf, + void *tls_data) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(tls_data); + extract_edituv_data_init_common(mr, vbo, data, subdiv_cache->num_subdiv_loops); +} + +static void extract_edituv_data_iter_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + void *_data) +{ + MeshExtract_EditUVData_Data *data = static_cast<MeshExtract_EditUVData_Data *>(_data); + int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index); + int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index); + int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index; + + for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) { + const int vert_origindex = subdiv_loop_vert_index[i]; + const int edge_origindex = subdiv_loop_edge_index[i]; + const int poly_origindex = subdiv_loop_poly_index[i]; + + EditLoopData *edit_loop_data = &data->vbo_data[i]; + memset(edit_loop_data, 0, sizeof(EditLoopData)); + + BMFace *efa = bm_original_face_get(mr, poly_origindex); + + if (vert_origindex != -1 && edge_origindex != -1) { + BMEdge *eed = bm_original_edge_get(mr, edge_origindex); + /* Loop on an edge endpoint. */ + BMLoop *l = BM_face_edge_share_loop(efa, eed); + mesh_render_data_loop_flag(mr, l, data->cd_ofs, edit_loop_data); + mesh_render_data_loop_edge_flag(mr, l, data->cd_ofs, edit_loop_data); + } + } +} + constexpr MeshExtract create_extractor_edituv_data() { MeshExtract extractor = {nullptr}; extractor.init = extract_edituv_data_init; extractor.iter_poly_bm = extract_edituv_data_iter_poly_bm; extractor.iter_poly_mesh = extract_edituv_data_iter_poly_mesh; + extractor.init_subdiv = extract_edituv_data_init_subdiv; + extractor.iter_subdiv = extract_edituv_data_iter_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_EditUVData_Data); extractor.use_threading = true; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc index a947d98f955..0ea4ef5d5db 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc @@ -27,6 +27,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -213,12 +215,69 @@ static void extract_edituv_stretch_angle_iter_poly_mesh(const MeshRenderData *mr } } +static GPUVertFormat *get_edituv_stretch_angle_format_subdiv() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + /* Waning: adjust #UVStretchAngle struct accordingly. */ + GPU_vertformat_attr_add(&format, "angle", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + GPU_vertformat_attr_add(&format, "uv_angles", GPU_COMP_F32, 2, GPU_FETCH_FLOAT); + } + return &format; +} + +static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(tls_data)) +{ + GPUVertBuf *refined_vbo = static_cast<GPUVertBuf *>(buffer); + + GPU_vertbuf_init_build_on_device( + refined_vbo, get_edituv_stretch_angle_format_subdiv(), subdiv_cache->num_subdiv_loops); + + GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor; + GPUVertBuf *uvs = cache->final.buff.vbo.uv; + + /* UVs are stored contiguouly so we need to compute the offset in the UVs buffer for the active + * UV layer. */ + CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_MESH) ? &mr->me->ldata : &mr->bm->ldata; + + uint32_t uv_layers = cache->cd_used.uv; + /* HACK to fix T68857 */ + if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) { + int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV); + if (layer != -1) { + uv_layers |= (1 << layer); + } + } + + int uvs_offset = 0; + for (int i = 0; i < MAX_MTFACE; i++) { + if (uv_layers & (1 << i)) { + if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) { + break; + } + + uvs_offset += 1; + } + } + + /* The data is at `offset * num loops`, and we have 2 values per index. */ + uvs_offset *= subdiv_cache->num_subdiv_loops * 2; + + draw_subdiv_build_edituv_stretch_angle_buffer( + subdiv_cache, pos_nor, uvs, uvs_offset, refined_vbo); +} + constexpr MeshExtract create_extractor_edituv_edituv_stretch_angle() { MeshExtract extractor = {nullptr}; extractor.init = extract_edituv_stretch_angle_init; extractor.iter_poly_bm = extract_edituv_stretch_angle_iter_poly_bm; extractor.iter_poly_mesh = extract_edituv_stretch_angle_iter_poly_mesh; + extractor.init_subdiv = extract_edituv_stretch_angle_init_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_StretchAngle_Data); extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc index 3db8cd79af5..3b40b3115f5 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc @@ -27,6 +27,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -63,14 +65,12 @@ BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_t return (ratio > 1.0f) ? (1.0f / ratio) : ratio; } -static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, - struct MeshBatchCache *cache, - void *buf, - void *UNUSED(data)) +static void compute_area_ratio(const MeshRenderData *mr, + float *r_area_ratio, + float &r_tot_area, + float &r_tot_uv_area) { - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); float tot_area = 0.0f, tot_uv_area = 0.0f; - float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__)); if (mr->extract_type == MR_EXTRACT_BMESH) { CustomData *cd_ldata = &mr->bm->ldata; @@ -84,7 +84,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, float uvarea = BM_face_calc_area_uv(efa, uv_ofs); tot_area += area; tot_uv_area += uvarea; - area_ratio[f] = area_ratio_get(area, uvarea); + r_area_ratio[f] = area_ratio_get(area, uvarea); } } else { @@ -96,12 +96,22 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, float uvarea = BKE_mesh_calc_poly_uv_area(mp, uv_data); tot_area += area; tot_uv_area += uvarea; - area_ratio[mp_index] = area_ratio_get(area, uvarea); + r_area_ratio[mp_index] = area_ratio_get(area, uvarea); } } - cache->tot_area = tot_area; - cache->tot_uv_area = tot_uv_area; + r_tot_area = tot_area; + r_tot_uv_area = tot_uv_area; +} + +static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buf, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__)); + compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area); /* Convert in place to avoid an extra allocation */ uint16_t *poly_stretch = (uint16_t *)area_ratio; @@ -135,11 +145,46 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, MEM_freeN(area_ratio); } +static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + + /* Initialise final buffer. */ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + } + + GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops); + + /* Initialize coarse data buffer. */ + + GPUVertBuf *coarse_data = GPU_vertbuf_calloc(); + + /* We use the same format as we just copy data around. */ + GPU_vertbuf_init_with_format(coarse_data, &format); + GPU_vertbuf_data_alloc(coarse_data, mr->loop_len); + + compute_area_ratio(mr, + static_cast<float *>(GPU_vertbuf_get_data(coarse_data)), + cache->tot_area, + cache->tot_uv_area); + + draw_subdiv_build_edituv_stretch_area_buffer(subdiv_cache, coarse_data, vbo); + + GPU_vertbuf_discard(coarse_data); +} + constexpr MeshExtract create_extractor_edituv_stretch_area() { MeshExtract extractor = {nullptr}; extractor.init = extract_edituv_stretch_area_init; extractor.finish = extract_edituv_stretch_area_finish; + extractor.init_subdiv = extract_edituv_stretch_area_init_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = 0; extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc index 33f9180e122..f65159f9b95 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc @@ -23,24 +23,40 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ /** \name Extract Face-dots positions * \{ */ -static void extract_fdots_pos_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), - void *buf, - void *tls_data) +static GPUVertFormat *get_fdots_pos_format() { - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); static GPUVertFormat format = {0}; if (format.attr_len == 0) { GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); } + return &format; +} + +static GPUVertFormat *get_fdots_nor_format_subdiv() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "norAndFlag", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + } + return &format; +} - GPU_vertbuf_init_with_format(vbo, &format); +static void extract_fdots_pos_init(const MeshRenderData *mr, + struct MeshBatchCache *UNUSED(cache), + void *buf, + void *tls_data) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + GPUVertFormat *format = get_fdots_pos_format(); + GPU_vertbuf_init_with_format(vbo, format); GPU_vertbuf_data_alloc(vbo, mr->poly_len); void *vbo_data = GPU_vertbuf_get_data(vbo); *(float(**)[3])tls_data = static_cast<float(*)[3]>(vbo_data); @@ -97,10 +113,30 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr, } } +static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + /* We "extract" positions, normals, and indices at once. */ + GPUVertBuf *fdots_pos_vbo = static_cast<GPUVertBuf *>(buffer); + GPUVertBuf *fdots_nor_vbo = cache->final.buff.vbo.fdots_nor; + GPUIndexBuf *fdots_pos_ibo = cache->final.buff.ibo.fdots; + + GPU_vertbuf_init_build_on_device( + fdots_nor_vbo, get_fdots_nor_format_subdiv(), subdiv_cache->num_coarse_poly); + GPU_vertbuf_init_build_on_device( + fdots_pos_vbo, get_fdots_pos_format(), subdiv_cache->num_coarse_poly); + GPU_indexbuf_init_build_on_device(fdots_pos_ibo, subdiv_cache->num_coarse_poly); + draw_subdiv_build_fdots_buffers(subdiv_cache, fdots_pos_vbo, fdots_nor_vbo, fdots_pos_ibo); +} + constexpr MeshExtract create_extractor_fdots_pos() { MeshExtract extractor = {nullptr}; extractor.init = extract_fdots_pos_init; + extractor.init_subdiv = extract_fdots_init_subdiv; extractor.iter_poly_bm = extract_fdots_pos_iter_poly_bm; extractor.iter_poly_mesh = extract_fdots_pos_iter_poly_mesh; extractor.data_type = MR_DATA_NONE; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc index 3c3ac7a7a0a..d30c38ef050 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc @@ -23,6 +23,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -107,10 +109,34 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr, } } +static GPUVertFormat *get_subdiv_lnor_format() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + GPU_vertformat_alias_add(&format, "lnor"); + } + return &format; +} + +static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + GPUVertBuf *pos_nor = cache->final.buff.vbo.pos_nor; + BLI_assert(pos_nor); + GPU_vertbuf_init_build_on_device(vbo, get_subdiv_lnor_format(), subdiv_cache->num_subdiv_loops); + draw_subdiv_build_lnor_buffer(subdiv_cache, pos_nor, vbo); +} + constexpr MeshExtract create_extractor_lnor() { MeshExtract extractor = {nullptr}; extractor.init = extract_lnor_init; + extractor.init_subdiv = extract_lnor_init_subdiv; extractor.iter_poly_bm = extract_lnor_iter_poly_bm; extractor.iter_poly_mesh = extract_lnor_iter_poly_mesh; extractor.data_type = MR_DATA_LOOP_NOR; @@ -210,6 +236,7 @@ constexpr MeshExtract create_extractor_lnor_hq() { MeshExtract extractor = {nullptr}; extractor.init = extract_lnor_hq_init; + extractor.init_subdiv = extract_lnor_init_subdiv; extractor.iter_poly_bm = extract_lnor_hq_iter_poly_bm; extractor.iter_poly_mesh = extract_lnor_hq_iter_poly_mesh; extractor.data_type = MR_DATA_LOOP_NOR; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc index eb9a138590c..00ed4ca6359 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc @@ -25,6 +25,8 @@ #include "extract_mesh.h" +#include "draw_subdivision.h" + namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -194,6 +196,123 @@ static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr), MEM_freeN(data->normals); } +static GPUVertFormat *get_pos_nor_format() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); + GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + GPU_vertformat_alias_add(&format, "vnor"); + } + return &format; +} + +static GPUVertFormat *get_normals_format() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "nor", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + GPU_vertformat_alias_add(&format, "lnor"); + } + return &format; +} + +static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *UNUSED(cache), + void *buffer, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + const bool do_limit_normals = subdiv_cache->do_limit_normals; + + /* Initialize the vertex buffer, it was already allocated. */ + GPU_vertbuf_init_build_on_device( + vbo, get_pos_nor_format(), subdiv_cache->num_subdiv_loops + mr->loop_loose_len); + + draw_subdiv_extract_pos_nor(subdiv_cache, vbo, do_limit_normals); + + if (!do_limit_normals) { + /* We cannot evaluate vertex normals using the limit surface, so compute them manually. */ + GPUVertBuf *subdiv_loop_subdiv_vert_index = draw_subdiv_build_origindex_buffer( + subdiv_cache->subdiv_loop_subdiv_vert_index, subdiv_cache->num_subdiv_loops); + + GPUVertBuf *vertex_normals = GPU_vertbuf_calloc(); + GPU_vertbuf_init_build_on_device( + vertex_normals, get_normals_format(), subdiv_cache->num_subdiv_verts); + + draw_subdiv_accumulate_normals(subdiv_cache, + vbo, + subdiv_cache->subdiv_vertex_face_adjacency_offsets, + subdiv_cache->subdiv_vertex_face_adjacency, + vertex_normals); + + draw_subdiv_finalize_normals(subdiv_cache, vertex_normals, subdiv_loop_subdiv_vert_index, vbo); + + GPU_vertbuf_discard(vertex_normals); + GPU_vertbuf_discard(subdiv_loop_subdiv_vert_index); + } +} + +static void extract_pos_nor_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + const MeshExtractLooseGeom *loose_geom, + void *buffer, + void *UNUSED(data)) +{ + const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len; + if (loop_loose_len == 0) { + return; + } + + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + const Mesh *coarse_mesh = subdiv_cache->mesh; + const MEdge *coarse_edges = coarse_mesh->medge; + const MVert *coarse_verts = coarse_mesh->mvert; + uint offset = subdiv_cache->num_subdiv_loops; + + /* TODO(kevindietrich) : replace this when compressed normals are supported. */ + struct SubdivPosNorLoop { + float pos[3]; + float nor[3]; + float flag; + }; + + SubdivPosNorLoop edge_data[2]; + for (int i = 0; i < loose_geom->edge_len; i++) { + const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]]; + const MVert *loose_vert1 = &coarse_verts[loose_edge->v1]; + const MVert *loose_vert2 = &coarse_verts[loose_edge->v2]; + + copy_v3_v3(edge_data[0].pos, loose_vert1->co); + normal_short_to_float_v3(edge_data[0].nor, loose_vert1->no); + edge_data[0].flag = 0.0f; + + copy_v3_v3(edge_data[1].pos, loose_vert2->co); + normal_short_to_float_v3(edge_data[1].nor, loose_vert2->no); + edge_data[1].flag = 0.0f; + + GPU_vertbuf_update_sub( + vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop) * 2, &edge_data); + + offset += 2; + } + + SubdivPosNorLoop vert_data; + vert_data.flag = 0.0f; + for (int i = 0; i < loose_geom->vert_len; i++) { + const MVert *loose_vertex = &coarse_verts[loose_geom->verts[i]]; + + copy_v3_v3(vert_data.pos, loose_vertex->co); + normal_short_to_float_v3(vert_data.nor, loose_vertex->no); + + GPU_vertbuf_update_sub( + vbo, offset * sizeof(SubdivPosNorLoop), sizeof(SubdivPosNorLoop), &vert_data); + + offset += 1; + } +} + constexpr MeshExtract create_extractor_pos_nor() { MeshExtract extractor = {nullptr}; @@ -205,6 +324,8 @@ constexpr MeshExtract create_extractor_pos_nor() extractor.iter_lvert_bm = extract_pos_nor_iter_lvert_bm; extractor.iter_lvert_mesh = extract_pos_nor_iter_lvert_mesh; extractor.finish = extract_pos_nor_finish; + extractor.init_subdiv = extract_pos_nor_init_subdiv; + extractor.iter_loose_geom_subdiv = extract_pos_nor_loose_geom_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(MeshExtract_PosNor_Data); extractor.use_threading = true; @@ -391,6 +512,7 @@ constexpr MeshExtract create_extractor_pos_nor_hq() { MeshExtract extractor = {nullptr}; extractor.init = extract_pos_nor_hq_init; + extractor.init_subdiv = extract_pos_nor_init_subdiv; extractor.iter_poly_bm = extract_pos_nor_hq_iter_poly_bm; extractor.iter_poly_mesh = extract_pos_nor_hq_iter_poly_mesh; extractor.iter_ledge_bm = extract_pos_nor_hq_iter_ledge_bm; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc index fd91bc5258f..753fbe7e0e2 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc @@ -27,6 +27,7 @@ #include "BKE_paint.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -35,13 +36,23 @@ namespace blender::draw { /** \name Extract Sculpt Data * \{ */ +static GPUVertFormat *get_sculpt_data_format() +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); + GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + } + return &format; +} + static void extract_sculpt_data_init(const MeshRenderData *mr, struct MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); - GPUVertFormat format = {0}; + GPUVertFormat *format = get_sculpt_data_format(); CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata; CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata : &mr->me->vdata; @@ -50,12 +61,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr, float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK); int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS); - if (format.attr_len == 0) { - GPU_vertformat_attr_add(&format, "fset", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); - GPU_vertformat_attr_add(&format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - } - - GPU_vertbuf_init_with_format(vbo, &format); + GPU_vertbuf_init_with_format(vbo, format); GPU_vertbuf_data_alloc(vbo, mr->loop_len); struct gpuSculptData { @@ -121,10 +127,99 @@ static void extract_sculpt_data_init(const MeshRenderData *mr, } } +static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + struct MeshBatchCache *UNUSED(cache), + void *buffer, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + + Mesh *coarse_mesh = mr->me; + CustomData *cd_vdata = &coarse_mesh->vdata; + CustomData *cd_pdata = &coarse_mesh->pdata; + + /* First, interpolate mask if available. */ + GPUVertBuf *mask_vbo = nullptr; + GPUVertBuf *subdiv_mask_vbo = nullptr; + float *cd_mask = (float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK); + + if (cd_mask) { + GPUVertFormat mask_format = {0}; + GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + + mask_vbo = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format(mask_vbo, &mask_format); + GPU_vertbuf_data_alloc(mask_vbo, coarse_mesh->totloop); + float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo)); + + for (int i = 0; i < coarse_mesh->totpoly; i++) { + const MPoly *mpoly = &coarse_mesh->mpoly[i]; + + for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop; + loop_index++) { + const MLoop *ml = &coarse_mesh->mloop[loop_index]; + *v_mask++ = cd_mask[ml->v]; + } + } + + subdiv_mask_vbo = GPU_vertbuf_calloc(); + GPU_vertbuf_init_build_on_device( + subdiv_mask_vbo, &mask_format, subdiv_cache->num_subdiv_loops); + + draw_subdiv_interp_custom_data(subdiv_cache, mask_vbo, subdiv_mask_vbo, 1, 0); + } + + /* Then, gather face sets. */ + GPUVertFormat face_set_format = {0}; + GPU_vertformat_attr_add(&face_set_format, "msk", GPU_COMP_U8, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); + + GPUVertBuf *face_set_vbo = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format(face_set_vbo, &face_set_format); + GPU_vertbuf_data_alloc(face_set_vbo, subdiv_cache->num_subdiv_loops); + + struct gpuFaceSet { + uint8_t color[4]; + }; + + gpuFaceSet *face_sets = (gpuFaceSet *)GPU_vertbuf_get_data(face_set_vbo); + int *cd_face_set = (int *)CustomData_get_layer(cd_pdata, CD_SCULPT_FACE_SETS); + + GPUVertFormat *format = get_sculpt_data_format(); + GPU_vertbuf_init_build_on_device(vbo, format, subdiv_cache->num_subdiv_loops); + int *subdiv_loop_poly_index = subdiv_cache->subdiv_loop_poly_index; + + for (uint i = 0; i < subdiv_cache->num_subdiv_loops; i++) { + const int mp_index = subdiv_loop_poly_index[i]; + + uchar face_set_color[4] = {UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX}; + if (cd_face_set) { + const int face_set_id = cd_face_set[mp_index]; + /* Skip for the default color Face Set to render it white. */ + if (face_set_id != coarse_mesh->face_sets_color_default) { + BKE_paint_face_set_overlay_color_get( + face_set_id, coarse_mesh->face_sets_color_seed, face_set_color); + } + } + copy_v3_v3_uchar(face_sets->color, face_set_color); + face_sets++; + } + + /* Finally, interleave mask and face sets. */ + draw_subdiv_build_sculpt_data_buffer(subdiv_cache, subdiv_mask_vbo, face_set_vbo, vbo); + + if (mask_vbo) { + GPU_vertbuf_discard(mask_vbo); + GPU_vertbuf_discard(subdiv_mask_vbo); + } + GPU_vertbuf_discard(face_set_vbo); +} + constexpr MeshExtract create_extractor_sculpt_data() { MeshExtract extractor = {nullptr}; extractor.init = extract_sculpt_data_init; + extractor.init_subdiv = extract_sculpt_data_init_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = 0; extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc index 5ac30dd3be9..33c27b45627 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc @@ -21,6 +21,7 @@ * \ingroup draw */ +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -196,12 +197,104 @@ static void extract_vert_idx_iter_lvert_mesh(const MeshRenderData *mr, (*(uint32_t **)data)[offset + lvert_index] = v_orig; } +static void extract_vert_idx_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *buf, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + /* Each element points to an element in the ibo.points. */ + draw_subdiv_init_origindex_buffer(vbo, + subdiv_cache->subdiv_loop_subdiv_vert_index, + subdiv_cache->num_subdiv_loops, + mr->loop_loose_len); +} + +static void extract_vert_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + const MeshExtractLooseGeom *loose_geom, + void *buffer, + void *UNUSED(data)) +{ + const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len; + if (loop_loose_len == 0) { + return; + } + + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo); + const Mesh *coarse_mesh = subdiv_cache->mesh; + const MEdge *coarse_edges = coarse_mesh->medge; + uint offset = subdiv_cache->num_subdiv_loops; + + for (int i = 0; i < loose_geom->edge_len; i++) { + const MEdge *loose_edge = &coarse_edges[loose_geom->edges[i]]; + vert_idx_data[offset] = loose_edge->v1; + vert_idx_data[offset + 1] = loose_edge->v2; + offset += 2; + } + + for (int i = 0; i < loose_geom->vert_len; i++) { + vert_idx_data[offset] = loose_geom->verts[i]; + offset += 1; + } +} + +static void extract_edge_idx_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *mr, + MeshBatchCache *UNUSED(cache), + void *buf, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + draw_subdiv_init_origindex_buffer( + vbo, + static_cast<int *>(GPU_vertbuf_get_data(subdiv_cache->edges_orig_index)), + subdiv_cache->num_subdiv_loops, + mr->edge_loose_len * 2); +} + +static void extract_edge_idx_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + const MeshExtractLooseGeom *loose_geom, + void *buffer, + void *UNUSED(data)) +{ + const int loop_loose_len = loose_geom->edge_len + loose_geom->vert_len; + if (loop_loose_len == 0) { + return; + } + + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + uint *vert_idx_data = (uint *)GPU_vertbuf_get_data(vbo); + uint offset = subdiv_cache->num_subdiv_loops; + + for (int i = 0; i < loose_geom->edge_len; i++) { + vert_idx_data[offset] = loose_geom->edges[i]; + vert_idx_data[offset + 1] = loose_geom->edges[i]; + offset += 2; + } +} + +static void extract_poly_idx_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + MeshBatchCache *UNUSED(cache), + void *buf, + void *UNUSED(data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + draw_subdiv_init_origindex_buffer( + vbo, subdiv_cache->subdiv_loop_poly_index, subdiv_cache->num_subdiv_loops, 0); +} + constexpr MeshExtract create_extractor_poly_idx() { MeshExtract extractor = {nullptr}; extractor.init = extract_select_idx_init; extractor.iter_poly_bm = extract_poly_idx_iter_poly_bm; extractor.iter_poly_mesh = extract_poly_idx_iter_poly_mesh; + extractor.init_subdiv = extract_poly_idx_init_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(uint32_t *); extractor.use_threading = true; @@ -217,6 +310,8 @@ constexpr MeshExtract create_extractor_edge_idx() extractor.iter_poly_mesh = extract_edge_idx_iter_poly_mesh; extractor.iter_ledge_bm = extract_edge_idx_iter_ledge_bm; extractor.iter_ledge_mesh = extract_edge_idx_iter_ledge_mesh; + extractor.init_subdiv = extract_edge_idx_init_subdiv; + extractor.iter_loose_geom_subdiv = extract_edge_idx_loose_geom_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(uint32_t *); extractor.use_threading = true; @@ -234,6 +329,8 @@ constexpr MeshExtract create_extractor_vert_idx() extractor.iter_ledge_mesh = extract_vert_idx_iter_ledge_mesh; extractor.iter_lvert_bm = extract_vert_idx_iter_lvert_bm; extractor.iter_lvert_mesh = extract_vert_idx_iter_lvert_mesh; + extractor.init_subdiv = extract_vert_idx_init_subdiv; + extractor.iter_loose_geom_subdiv = extract_vert_idx_loose_geom_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(uint32_t *); extractor.use_threading = true; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc index af279b08a59..6e9d8ef6926 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc @@ -23,6 +23,7 @@ #include "BLI_string.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -31,25 +32,27 @@ namespace blender::draw { /** \name Extract UV layers * \{ */ -static void extract_uv_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, - void *buf, - void *UNUSED(tls_data)) +/* Initialize the vertex format to be used for UVs. Return true if any UV layer is + * found, false otherwise. */ +static bool mesh_extract_uv_format_init(GPUVertFormat *format, + struct MeshBatchCache *cache, + CustomData *cd_ldata, + eMRExtractType extract_type, + uint32_t &r_uv_layers) { - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); - GPUVertFormat format = {0}; - GPU_vertformat_deinterleave(&format); + GPU_vertformat_deinterleave(format); - CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata; uint32_t uv_layers = cache->cd_used.uv; /* HACK to fix T68857 */ - if (mr->extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) { + if (extract_type == MR_EXTRACT_BMESH && cache->cd_used.edit_uv == 1) { int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV); if (layer != -1) { uv_layers |= (1 << layer); } } + r_uv_layers = uv_layers; + for (int i = 0; i < MAX_MTFACE; i++) { if (uv_layers & (1 << i)) { char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME]; @@ -58,30 +61,47 @@ static void extract_uv_init(const MeshRenderData *mr, GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME); /* UV layer name. */ BLI_snprintf(attr_name, sizeof(attr_name), "u%s", attr_safe_name); - GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT); + GPU_vertformat_attr_add(format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT); /* Auto layer name. */ BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name); - GPU_vertformat_alias_add(&format, attr_name); + GPU_vertformat_alias_add(format, attr_name); /* Active render layer name. */ if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPUV)) { - GPU_vertformat_alias_add(&format, "u"); + GPU_vertformat_alias_add(format, "u"); } /* Active display layer name. */ if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPUV)) { - GPU_vertformat_alias_add(&format, "au"); + GPU_vertformat_alias_add(format, "au"); /* Alias to `pos` for edit uvs. */ - GPU_vertformat_alias_add(&format, "pos"); + GPU_vertformat_alias_add(format, "pos"); } /* Stencil mask uv layer name. */ if (i == CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV)) { - GPU_vertformat_alias_add(&format, "mu"); + GPU_vertformat_alias_add(format, "mu"); } } } + if (format->attr_len == 0) { + GPU_vertformat_attr_add(format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + return false; + } + + return true; +} + +static void extract_uv_init(const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buf, + void *UNUSED(tls_data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + GPUVertFormat format = {0}; + + CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata; int v_len = mr->loop_len; - if (format.attr_len == 0) { - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + uint32_t uv_layers = cache->cd_used.uv; + if (!mesh_extract_uv_format_init(&format, cache, cd_ldata, mr->extract_type, uv_layers)) { /* VBO will not be used, only allocate minimum of memory. */ v_len = 1; } @@ -116,10 +136,45 @@ static void extract_uv_init(const MeshRenderData *mr, } } +static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + Mesh *coarse_mesh = subdiv_cache->mesh; + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + GPUVertFormat format = {0}; + + uint v_len = subdiv_cache->num_subdiv_loops; + uint uv_layers; + if (!mesh_extract_uv_format_init( + &format, cache, &coarse_mesh->ldata, MR_EXTRACT_MESH, uv_layers)) { + // TODO(kevindietrich): handle this more gracefully. + v_len = 1; + } + + GPU_vertbuf_init_build_on_device(vbo, &format, v_len); + + if (uv_layers == 0) { + return; + } + + /* Index of the UV layer in the compact buffer. Used UV layers are stored in a single buffer. */ + int pack_layer_index = 0; + for (int i = 0; i < MAX_MTFACE; i++) { + if (uv_layers & (1 << i)) { + const int offset = (int)subdiv_cache->num_subdiv_loops * pack_layer_index++; + draw_subdiv_extract_uvs(subdiv_cache, vbo, i, offset); + } + } +} + constexpr MeshExtract create_extractor_uv() { MeshExtract extractor = {nullptr}; extractor.init = extract_uv_init; + extractor.init_subdiv = extract_uv_init_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = 0; extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc index f8878eb2617..ea7810bcf6b 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc @@ -25,6 +25,7 @@ #include "BLI_string.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -33,17 +34,14 @@ namespace blender::draw { /** \name Extract VCol * \{ */ -static void extract_vcol_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, - void *buf, - void *UNUSED(tls_data)) +/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */ +static void init_vcol_format(GPUVertFormat *format, + const MeshBatchCache *cache, + CustomData *cd_ldata) { - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); - GPUVertFormat format = {0}; - GPU_vertformat_deinterleave(&format); + GPU_vertformat_deinterleave(format); - CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata; - uint32_t vcol_layers = cache->cd_used.vcol; + const uint32_t vcol_layers = cache->cd_used.vcol; for (int i = 0; i < MAX_MCOL; i++) { if (vcol_layers & (1 << i)) { @@ -52,31 +50,56 @@ static void extract_vcol_init(const MeshRenderData *mr, GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME); BLI_snprintf(attr_name, sizeof(attr_name), "c%s", attr_safe_name); - GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); + GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); if (i == CustomData_get_render_layer(cd_ldata, CD_MLOOPCOL)) { - GPU_vertformat_alias_add(&format, "c"); + GPU_vertformat_alias_add(format, "c"); } if (i == CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL)) { - GPU_vertformat_alias_add(&format, "ac"); + GPU_vertformat_alias_add(format, "ac"); } /* Gather number of auto layers. */ /* We only do `vcols` that are not overridden by `uvs`. */ if (CustomData_get_named_layer_index(cd_ldata, CD_MLOOPUV, layer_name) == -1) { BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name); - GPU_vertformat_alias_add(&format, attr_name); + GPU_vertformat_alias_add(format, attr_name); } } } +} + +/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision + * case. */ +static GPUVertFormat *get_coarse_vcol_format(void) +{ + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + GPU_vertformat_alias_add(&format, "c"); + GPU_vertformat_alias_add(&format, "ac"); + } + return &format; +} + +using gpuMeshVcol = struct gpuMeshVcol { + ushort r, g, b, a; +}; + +static void extract_vcol_init(const MeshRenderData *mr, + struct MeshBatchCache *cache, + void *buf, + void *UNUSED(tls_data)) +{ + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); + GPUVertFormat format = {0}; + CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata; + const uint32_t vcol_layers = cache->cd_used.vcol; + init_vcol_format(&format, cache, cd_ldata); GPU_vertbuf_init_with_format(vbo, &format); GPU_vertbuf_data_alloc(vbo, mr->loop_len); - using gpuMeshVcol = struct gpuMeshVcol { - ushort r, g, b, a; - }; - gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo); for (int i = 0; i < MAX_MCOL; i++) { @@ -111,10 +134,64 @@ static void extract_vcol_init(const MeshRenderData *mr, } } +static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer); + Mesh *coarse_mesh = subdiv_cache->mesh; + + GPUVertFormat format = {0}; + init_vcol_format(&format, cache, &coarse_mesh->ldata); + + GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops); + + GPUVertBuf *src_data = GPU_vertbuf_calloc(); + /* Dynamic as we upload and interpolate layers one at a time. */ + GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC); + + GPU_vertbuf_data_alloc(src_data, coarse_mesh->totloop); + + gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data); + + const CustomData *cd_ldata = &coarse_mesh->ldata; + + const uint vcol_layers = cache->cd_used.vcol; + + /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in + * a single buffer. */ + int pack_layer_index = 0; + for (int i = 0; i < MAX_MTFACE; i++) { + if (vcol_layers & (1 << i)) { + /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */ + const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++; + const MLoopCol *mloopcol = (MLoopCol *)CustomData_get_layer_n(cd_ldata, CD_MLOOPCOL, i); + + gpuMeshVcol *vcol = mesh_vcol; + + for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, mloopcol++) { + vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]); + vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]); + vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]); + vcol->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f)); + } + + /* Ensure data is uploaded properly. */ + GPU_vertbuf_tag_dirty(src_data); + draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset); + } + } + + GPU_vertbuf_discard(src_data); +} + constexpr MeshExtract create_extractor_vcol() { MeshExtract extractor = {nullptr}; extractor.init = extract_vcol_init; + extractor.init_subdiv = extract_vcol_init_subdiv; extractor.data_type = MR_DATA_NONE; extractor.data_size = 0; extractor.use_threading = false; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc index bdb1410a755..bb8853b8154 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc @@ -25,6 +25,7 @@ #include "BKE_deform.h" +#include "draw_subdivision.h" #include "extract_mesh.h" namespace blender::draw { @@ -167,10 +168,57 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr, } } +static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache, + const MeshRenderData *UNUSED(mr), + struct MeshBatchCache *cache, + void *buffer, + void *UNUSED(data)) +{ + Mesh *coarse_mesh = subdiv_cache->mesh; + GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buffer); + + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "weight", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + } + GPU_vertbuf_init_build_on_device(vbo, &format, subdiv_cache->num_subdiv_loops); + + GPUVertBuf *coarse_weights = GPU_vertbuf_calloc(); + GPU_vertbuf_init_with_format(coarse_weights, &format); + GPU_vertbuf_data_alloc(coarse_weights, coarse_mesh->totloop); + float *coarse_weights_data = static_cast<float *>(GPU_vertbuf_get_data(coarse_weights)); + + const DRW_MeshWeightState *wstate = &cache->weight_state; + const MDeformVert *dverts = static_cast<const MDeformVert *>( + CustomData_get_layer(&coarse_mesh->vdata, CD_MDEFORMVERT)); + + for (int i = 0; i < coarse_mesh->totpoly; i++) { + const MPoly *mpoly = &coarse_mesh->mpoly[i]; + + for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop; + loop_index++) { + const MLoop *ml = &coarse_mesh->mloop[loop_index]; + + if (dverts != nullptr) { + const MDeformVert *dvert = &dverts[ml->v]; + coarse_weights_data[loop_index] = evaluate_vertex_weight(dvert, wstate); + } + else { + coarse_weights_data[loop_index] = evaluate_vertex_weight(nullptr, wstate); + } + } + } + + draw_subdiv_interp_custom_data(subdiv_cache, coarse_weights, vbo, 1, 0); + + GPU_vertbuf_discard(coarse_weights); +} + constexpr MeshExtract create_extractor_weights() { MeshExtract extractor = {nullptr}; extractor.init = extract_weights_init; + extractor.init_subdiv = extract_weights_init_subdiv; extractor.iter_poly_bm = extract_weights_iter_poly_bm; extractor.iter_poly_mesh = extract_weights_iter_poly_mesh; extractor.data_type = MR_DATA_NONE; diff --git a/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl new file mode 100644 index 00000000000..36c3970d9a0 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl @@ -0,0 +1,230 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 1) readonly restrict buffer sourceBuffer +{ +#ifdef GPU_FETCH_U16_TO_FLOAT + uint src_data[]; +#else + float src_data[]; +#endif +}; + +layout(std430, binding = 2) readonly restrict buffer facePTexOffset +{ + uint face_ptex_offset[]; +}; + +layout(std430, binding = 3) readonly restrict buffer patchCoords +{ + BlenderPatchCoord patch_coords[]; +}; + +layout(std430, binding = 4) readonly restrict buffer extraCoarseFaceData +{ + uint extra_coarse_face_data[]; +}; + +layout(std430, binding = 5) writeonly restrict buffer destBuffer +{ +#ifdef GPU_FETCH_U16_TO_FLOAT + uint dst_data[]; +#else + float dst_data[]; +#endif +}; + +struct Vertex { + float vertex_data[DIMENSIONS]; +}; + +void clear(inout Vertex v) +{ + for (int i = 0; i < DIMENSIONS; i++) { + v.vertex_data[i] = 0.0; + } +} + +Vertex read_vertex(uint index) +{ + Vertex result; +#ifdef GPU_FETCH_U16_TO_FLOAT + uint base_index = index * 2; + if (DIMENSIONS == 4) { + uint xy = src_data[base_index]; + uint zw = src_data[base_index + 1]; + + float x = float((xy >> 16) & 0xffff) / 65535.0; + float y = float(xy & 0xffff) / 65535.0; + float z = float((zw >> 16) & 0xffff) / 65535.0; + float w = float(zw & 0xffff) / 65535.0; + + result.vertex_data[0] = x; + result.vertex_data[1] = y; + result.vertex_data[2] = z; + result.vertex_data[3] = w; + } + else { + /* This case is unsupported for now. */ + clear(result); + } +#else + uint base_index = index * DIMENSIONS; + for (int i = 0; i < DIMENSIONS; i++) { + result.vertex_data[i] = src_data[base_index + i]; + } +#endif + return result; +} + +void write_vertex(uint index, Vertex v) +{ +#ifdef GPU_FETCH_U16_TO_FLOAT + uint base_index = dst_offset + index * 2; + if (DIMENSIONS == 4) { + uint x = uint(v.vertex_data[0] * 65535.0); + uint y = uint(v.vertex_data[1] * 65535.0); + uint z = uint(v.vertex_data[2] * 65535.0); + uint w = uint(v.vertex_data[3] * 65535.0); + + uint xy = x << 16 | y; + uint zw = z << 16 | w; + + dst_data[base_index] = xy; + dst_data[base_index + 1] = zw; + } + else { + /* This case is unsupported for now. */ + dst_data[base_index] = 0; + } +#else + uint base_index = dst_offset + index * DIMENSIONS; + for (int i = 0; i < DIMENSIONS; i++) { + dst_data[base_index + i] = v.vertex_data[i]; + } +#endif +} + +Vertex interp_vertex(Vertex v0, Vertex v1, Vertex v2, Vertex v3, vec2 uv) +{ + Vertex result; + for (int i = 0; i < DIMENSIONS; i++) { + float e = mix(v0.vertex_data[i], v1.vertex_data[i], uv.x); + float f = mix(v2.vertex_data[i], v3.vertex_data[i], uv.x); + result.vertex_data[i] = mix(e, f, uv.y); + } + return result; +} + +void add_with_weight(inout Vertex v0, Vertex v1, float weight) +{ + for (int i = 0; i < DIMENSIONS; i++) { + v0.vertex_data[i] += v1.vertex_data[i] * weight; + } +} + +Vertex average(Vertex v0, Vertex v1) +{ + Vertex result; + for (int i = 0; i < DIMENSIONS; i++) { + result.vertex_data[i] = (v0.vertex_data[i] + v1.vertex_data[i]) * 0.5; + } + return result; +} + +uint get_vertex_count(uint coarse_polygon) +{ + uint number_of_patches = face_ptex_offset[coarse_polygon + 1] - face_ptex_offset[coarse_polygon]; + if (number_of_patches == 1) { + /* If there is only one patch for the current coarse polygon, then it is a quad. */ + return 4; + } + /* Otherwise, the number of patches is the number of vertices. */ + return number_of_patches; +} + +uint get_polygon_corner_index(uint coarse_polygon, uint patch_index) +{ + uint patch_offset = face_ptex_offset[coarse_polygon]; + return patch_index - patch_offset; +} + +uint get_loop_start(uint coarse_polygon) +{ + return extra_coarse_face_data[coarse_polygon] & coarse_face_loopstart_mask; +} + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint start_loop_index = quad_index * 4; + + /* Find which coarse polygon we came from. */ + uint coarse_polygon = coarse_polygon_index_from_subdiv_quad_index(quad_index, coarse_poly_count); + uint loop_start = get_loop_start(coarse_polygon); + + /* Find the number of vertices for the coarse polygon. */ + Vertex v0, v1, v2, v3; + clear(v0); + clear(v1); + clear(v2); + clear(v3); + + uint number_of_vertices = get_vertex_count(coarse_polygon); + if (number_of_vertices == 4) { + /* Interpolate the src data. */ + v0 = read_vertex(loop_start + 0); + v1 = read_vertex(loop_start + 1); + v2 = read_vertex(loop_start + 2); + v3 = read_vertex(loop_start + 3); + } + else { + /* Interpolate the src data for the center. */ + uint loop_end = loop_start + number_of_vertices - 1; + Vertex center_value; + clear(center_value); + + float weight = 1.0 / float(number_of_vertices); + + for (uint l = loop_start; l < loop_end; l++) { + add_with_weight(center_value, read_vertex(l), weight); + } + + /* Interpolate between the previous and next corner for the middle values for the edges. */ + uint patch_index = uint(patch_coords[start_loop_index].patch_index); + uint current_coarse_corner = get_polygon_corner_index(coarse_polygon, patch_index); + uint next_coarse_corner = (current_coarse_corner + 1) % number_of_vertices; + uint prev_coarse_corner = (current_coarse_corner + number_of_vertices - 1) % + number_of_vertices; + + v0 = read_vertex(loop_start); + v1 = average(v0, read_vertex(loop_start + next_coarse_corner)); + v3 = average(v0, read_vertex(loop_start + prev_coarse_corner)); + + /* Interpolate between the current value, and the ones for the center and mid-edges. */ + v2 = center_value; + } + + /* Do a linear interpolation of the data based on the UVs for each loop of this subdivided quad. + */ + for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) { + BlenderPatchCoord co = patch_coords[loop_index]; + vec2 uv = decode_uv(co.encoded_uv); + /* NOTE: v2 and v3 are reversed to stay consistent with the interpolation weight on the x-axis: + * + * v3 +-----+ v2 + * | | + * | | + * v0 +-----+ v1 + * + * otherwise, weight would be `1.0 - uv.x` for `v2 <-> v3`, but `uv.x` for `v0 <-> v1`. + */ + Vertex result = interp_vertex(v0, v1, v3, v2, uv); + write_vertex(loop_index, result); + } +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl new file mode 100644 index 00000000000..f11c0f6427e --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl @@ -0,0 +1,57 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 0) readonly buffer inputEdgeOrigIndex +{ + int input_origindex[]; +}; + +layout(std430, binding = 1) writeonly buffer outputLinesIndices +{ + uint output_lines[]; +}; + +#ifndef LINES_LOOSE +void emit_line(uint line_offset, uint start_loop_index, uint corner_index) +{ + uint vertex_index = start_loop_index + corner_index; + + if (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display) { + output_lines[line_offset + 0] = 0xffffffff; + output_lines[line_offset + 1] = 0xffffffff; + } + else { + /* Mod 4 so we loop back at the first vertex on the last loop index (3). */ + uint next_vertex_index = start_loop_index + (corner_index + 1) % 4; + + output_lines[line_offset + 0] = vertex_index; + output_lines[line_offset + 1] = next_vertex_index; + } +} +#endif + +void main() +{ + uint index = get_global_invocation_index(); + if (index >= total_dispatch_size) { + return; + } + +#ifdef LINES_LOOSE + /* In the loose lines case, we execute for each line, with two vertices per line. */ + uint line_offset = edge_loose_offset + index * 2; + uint loop_index = num_subdiv_loops + index * 2; + output_lines[line_offset] = loop_index; + output_lines[line_offset + 1] = loop_index + 1; +#else + /* We execute for each quad, so the start index of the loop is quad_index * 4. */ + uint start_loop_index = index * 4; + /* We execute for each quad, so the start index of the line is quad_index * 8 (with 2 vertices + * per line). */ + uint start_line_index = index * 8; + + for (int i = 0; i < 4; i++) { + emit_line(start_line_index + i * 2, start_loop_index, i); + } +#endif +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl new file mode 100644 index 00000000000..3257ebdae17 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl @@ -0,0 +1,43 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +/* Generate triangles from subdivision quads indices. */ + +layout(std430, binding = 1) writeonly buffer outputTriangles +{ + uint output_tris[]; +}; + +#ifndef SINGLE_MATERIAL +layout(std430, binding = 2) readonly buffer inputPolygonMatOffset +{ + int polygon_mat_offset[]; +}; +#endif + +void main() +{ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint loop_index = quad_index * 4; + +#ifdef SINGLE_MATERIAL + uint triangle_loop_index = quad_index * 6; +#else + uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index, + coarse_poly_count); + int mat_offset = polygon_mat_offset[coarse_quad_index]; + + int triangle_loop_index = (int(quad_index) + mat_offset) * 6; +#endif + + output_tris[triangle_loop_index + 0] = loop_index + 0; + output_tris[triangle_loop_index + 1] = loop_index + 1; + output_tris[triangle_loop_index + 2] = loop_index + 2; + output_tris[triangle_loop_index + 3] = loop_index + 0; + output_tris[triangle_loop_index + 4] = loop_index + 2; + output_tris[triangle_loop_index + 5] = loop_index + 3; +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl new file mode 100644 index 00000000000..005561964b8 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl @@ -0,0 +1,176 @@ + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +/* Uniform block for #DRWSubivUboStorage. */ +layout(std140) uniform shader_data +{ + /* Offsets in the buffers data where the source and destination data start. */ + int src_offset; + int dst_offset; + + /* Parameters for the DRWPatchMap. */ + int min_patch_face; + int max_patch_face; + int max_depth; + int patches_are_triangular; + + /* Coarse topology information. */ + int coarse_poly_count; + uint edge_loose_offset; + + /* Subdiv topology information. */ + uint num_subdiv_loops; + + /* Subdivision settings. */ + bool optimal_display; + + /* Sculpt data. */ + bool has_sculpt_mask; + + /* Masks for the extra coarse face data. */ + uint coarse_face_select_mask; + uint coarse_face_smooth_mask; + uint coarse_face_active_mask; + uint coarse_face_loopstart_mask; + + /* Total number of elements to process. */ + uint total_dispatch_size; +}; + +uint get_global_invocation_index() +{ + uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x; + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row; +} + +/* Structure for #CompressedPatchCoord. */ +struct BlenderPatchCoord { + int patch_index; + uint encoded_uv; +}; + +vec2 decode_uv(uint encoded_uv) +{ + float u = float((encoded_uv >> 16) & 0xFFFFu) / 65535.0; + float v = float(encoded_uv & 0xFFFFu) / 65535.0; + return vec2(u, v); +} + +/* This structure is a carbon copy of OpenSubDiv's PatchTable::PatchHandle. */ +struct PatchHandle { + int array_index; + int patch_index; + int vertex_index; +}; + +/* This structure is a carbon copy of OpenSubDiv's PatchCoord. */ +struct PatchCoord { + int array_index; + int patch_index; + int vertex_index; + float u; + float v; +}; + +/* This structure is a carbon copy of OpenSubDiv's PatchCoord.QuadNode. + * Each child is a bitfield. */ +struct QuadNode { + uvec4 child; +}; + +bool is_set(uint i) +{ + /* QuadNode.Child.isSet is the first bit of the bitfield. */ + return (i & 0x1u) != 0; +} + +bool is_leaf(uint i) +{ + /* QuadNode.Child.isLeaf is the second bit of the bitfield. */ + return (i & 0x2u) != 0; +} + +uint get_index(uint i) +{ + /* QuadNode.Child.index is made of the remaining bits. */ + return (i >> 2) & 0x3FFFFFFFu; +} + +/* Duplicate of #PosNorLoop from the mesh extract CPU code. + * We do not use a vec3 for the position as it will be padded to a vec4 which is incompatible with + * the format. */ +struct PosNorLoop { + float x, y, z; + /* TODO(kevindietrich) : figure how to compress properly as GLSL does not have char/short types, + * bit operations get tricky. */ + float nx, ny, nz; + float flag; +}; + +vec3 get_vertex_pos(PosNorLoop vertex_data) +{ + return vec3(vertex_data.x, vertex_data.y, vertex_data.z); +} + +vec3 get_vertex_nor(PosNorLoop vertex_data) +{ + return vec3(vertex_data.nx, vertex_data.ny, vertex_data.nz); +} + +void set_vertex_pos(inout PosNorLoop vertex_data, vec3 pos) +{ + vertex_data.x = pos.x; + vertex_data.y = pos.y; + vertex_data.z = pos.z; +} + +void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor, uint flag) +{ + vertex_data.nx = nor.x; + vertex_data.ny = nor.y; + vertex_data.nz = nor.z; + vertex_data.flag = float(flag); +} + +/* Set the vertex normal but preserve the existing flag. This is for when we compute manually the + * vertex normals when we cannot use the limit surface, in which case the flag and the normal are + * set by two separate compute pass. */ +void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor) +{ + set_vertex_nor(vertex_data, nor, 0); +} + +#define ORIGINDEX_NONE -1 + +#ifdef SUBDIV_POLYGON_OFFSET +layout(std430, binding = 0) readonly buffer inputSubdivPolygonOffset +{ + uint subdiv_polygon_offset[]; +}; + +/* Given the index of the subdivision quad, return the index of the corresponding coarse polygon. + * This uses subdiv_polygon_offset and since it is a growing list of offsets, we can use binary + * search to locate the right index. */ +uint coarse_polygon_index_from_subdiv_quad_index(uint subdiv_quad_index, uint coarse_poly_count) +{ + uint first = 0; + uint last = coarse_poly_count; + + while (first != last) { + uint middle = (first + last) / 2; + + if (subdiv_polygon_offset[middle] < subdiv_quad_index) { + first = middle + 1; + } + else { + last = middle; + } + } + + if (subdiv_polygon_offset[first] == subdiv_quad_index) { + return first; + } + + return first - 1; +} +#endif diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl new file mode 100644 index 00000000000..575090472b1 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl @@ -0,0 +1,56 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 0) readonly buffer inputVertexData +{ + PosNorLoop pos_nor[]; +}; + +layout(std430, binding = 1) readonly buffer faceAdjacencyOffsets +{ + uint face_adjacency_offsets[]; +}; + +layout(std430, binding = 2) readonly buffer faceAdjacencyLists +{ + uint face_adjacency_lists[]; +}; + +layout(std430, binding = 3) writeonly buffer vertexNormals +{ + vec3 normals[]; +}; + +void main() +{ + uint vertex_index = get_global_invocation_index(); + if (vertex_index >= total_dispatch_size) { + return; + } + + uint first_adjacent_face_offset = face_adjacency_offsets[vertex_index]; + uint number_of_adjacent_faces = face_adjacency_offsets[vertex_index + 1] - + first_adjacent_face_offset; + + vec3 accumulated_normal = vec3(0.0); + + /* For each adjacent face. */ + for (uint i = 0; i < number_of_adjacent_faces; i++) { + uint adjacent_face = face_adjacency_lists[first_adjacent_face_offset + i]; + uint start_loop_index = adjacent_face * 4; + + /* Compute face normal. */ + vec3 adjacent_verts[3]; + for (uint j = 0; j < 3; j++) { + adjacent_verts[j] = get_vertex_pos(pos_nor[start_loop_index + j]); + } + + vec3 face_normal = normalize( + cross(adjacent_verts[1] - adjacent_verts[0], adjacent_verts[2] - adjacent_verts[0])); + accumulated_normal += face_normal; + } + + float weight = 1.0 / float(number_of_adjacent_faces); + vec3 normal = normalize(accumulated_normal); + normals[vertex_index] = normal; +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl new file mode 100644 index 00000000000..84cd65d4161 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl @@ -0,0 +1,34 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 0) readonly buffer inputNormals +{ + vec3 vertex_normals[]; +}; + +layout(std430, binding = 1) readonly buffer inputSubdivVertLoopMap +{ + uint vert_loop_map[]; +}; + +layout(std430, binding = 2) buffer outputPosNor +{ + PosNorLoop pos_nor[]; +}; + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint start_loop_index = quad_index * 4; + + for (int i = 0; i < 4; i++) { + uint subdiv_vert_index = vert_loop_map[start_loop_index + i]; + vec3 nor = vertex_normals[subdiv_vert_index]; + set_vertex_nor(pos_nor[start_loop_index + i], nor); + } +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl new file mode 100644 index 00000000000..5dd7decf663 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl @@ -0,0 +1,416 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +/* Source buffer. */ +layout(std430, binding = 0) buffer src_buffer +{ + float srcVertexBuffer[]; +}; + +/* #DRWPatchMap */ +layout(std430, binding = 1) readonly buffer inputPatchHandles +{ + PatchHandle input_patch_handles[]; +}; + +layout(std430, binding = 2) readonly buffer inputQuadNodes +{ + QuadNode quad_nodes[]; +}; + +layout(std430, binding = 3) readonly buffer inputPatchCoords +{ + BlenderPatchCoord patch_coords[]; +}; + +layout(std430, binding = 4) readonly buffer inputVertOrigIndices +{ + int input_vert_origindex[]; +}; + +/* Patch buffers. */ +layout(std430, binding = 5) buffer patchArray_buffer +{ + OsdPatchArray patchArrayBuffer[]; +}; + +layout(std430, binding = 6) buffer patchIndex_buffer +{ + int patchIndexBuffer[]; +}; + +layout(std430, binding = 7) buffer patchParam_buffer +{ + OsdPatchParam patchParamBuffer[]; +}; + + /* Output buffer(s). */ + +#if defined(FVAR_EVALUATION) +layout(std430, binding = 8) writeonly buffer outputFVarData +{ + vec2 output_fvar[]; +}; +#elif defined(FDOTS_EVALUATION) +/* For face dots, we build the position, normals, and index buffers in one go. */ + +/* vec3 is padded to vec4, but the format used for fdots does not have any padding. */ +struct FDotVert { + float x, y, z; +}; + +/* Same here, do not use vec3. */ +struct FDotNor { + float x, y, z; + float flag; +}; + +layout(std430, binding = 8) writeonly buffer outputVertices +{ + FDotVert output_verts[]; +}; + +layout(std430, binding = 9) writeonly buffer outputNormals +{ + FDotNor output_nors[]; +}; + +layout(std430, binding = 10) writeonly buffer outputFdotsIndices +{ + uint output_indices[]; +}; + +layout(std430, binding = 11) readonly buffer extraCoarseFaceData +{ + uint extra_coarse_face_data[]; +}; +#else +layout(std430, binding = 8) writeonly buffer outputVertexData +{ + PosNorLoop output_verts[]; +}; +#endif + +vec2 read_vec2(int index) +{ + vec2 result; + result.x = srcVertexBuffer[index * 2]; + result.y = srcVertexBuffer[index * 2 + 1]; + return result; +} + +vec3 read_vec3(int index) +{ + vec3 result; + result.x = srcVertexBuffer[index * 3]; + result.y = srcVertexBuffer[index * 3 + 1]; + result.z = srcVertexBuffer[index * 3 + 2]; + return result; +} + +OsdPatchArray GetPatchArray(int arrayIndex) +{ + return patchArrayBuffer[arrayIndex]; +} + +OsdPatchParam GetPatchParam(int patchIndex) +{ + return patchParamBuffer[patchIndex]; +} + +/* ------------------------------------------------------------------------------ + * Patch Coordinate lookup. Return an OsdPatchCoord for the given patch_index and uvs. + * This code is a port of the OpenSubdiv PatchMap lookup code. + */ + +PatchHandle bogus_patch_handle() +{ + PatchHandle ret; + ret.array_index = -1; + ret.vertex_index = -1; + ret.patch_index = -1; + return ret; +} + +int transformUVToQuadQuadrant(float median, inout float u, inout float v) +{ + int uHalf = (u >= median) ? 1 : 0; + if (uHalf != 0) + u -= median; + + int vHalf = (v >= median) ? 1 : 0; + if (vHalf != 0) + v -= median; + + return (vHalf << 1) | uHalf; +} + +int transformUVToTriQuadrant(float median, inout float u, inout float v, inout bool rotated) +{ + + if (!rotated) { + if (u >= median) { + u -= median; + return 1; + } + if (v >= median) { + v -= median; + return 2; + } + if ((u + v) >= median) { + rotated = true; + return 3; + } + return 0; + } + else { + if (u < median) { + v -= median; + return 1; + } + if (v < median) { + u -= median; + return 2; + } + u -= median; + v -= median; + if ((u + v) < median) { + rotated = false; + return 3; + } + return 0; + } +} + +PatchHandle find_patch(int face_index, float u, float v) +{ + if (face_index < min_patch_face || face_index > max_patch_face) { + return bogus_patch_handle(); + } + + QuadNode node = quad_nodes[face_index - min_patch_face]; + + if (!is_set(node.child[0])) { + return bogus_patch_handle(); + } + + float median = 0.5; + bool tri_rotated = false; + + for (int depth = 0; depth <= max_depth; ++depth, median *= 0.5) { + int quadrant = (patches_are_triangular != 0) ? + transformUVToTriQuadrant(median, u, v, tri_rotated) : + transformUVToQuadQuadrant(median, u, v); + + if (is_leaf(node.child[quadrant])) { + return input_patch_handles[get_index(node.child[quadrant])]; + } + + node = quad_nodes[get_index(node.child[quadrant])]; + } +} + +OsdPatchCoord bogus_patch_coord(int face_index, float u, float v) +{ + OsdPatchCoord coord; + coord.arrayIndex = 0; + coord.patchIndex = face_index; + coord.vertIndex = 0; + coord.s = u; + coord.t = v; + return coord; +} + +OsdPatchCoord GetPatchCoord(int face_index, float u, float v) +{ + PatchHandle patch_handle = find_patch(face_index, u, v); + + if (patch_handle.array_index == -1) { + return bogus_patch_coord(face_index, u, v); + } + + OsdPatchCoord coord; + coord.arrayIndex = patch_handle.array_index; + coord.patchIndex = patch_handle.patch_index; + coord.vertIndex = patch_handle.vertex_index; + coord.s = u; + coord.t = v; + return coord; +} + +/* ------------------------------------------------------------------------------ + * Patch evaluation. Note that the 1st and 2nd derivatives are always computed, although we + * only return and use the 1st derivatives if adaptive patches are used. This could + * perhaps be optimized. + */ + +#if defined(FVAR_EVALUATION) +void evaluate_patches_limits(int patch_index, float u, float v, inout vec2 dst) +{ + OsdPatchCoord coord = GetPatchCoord(patch_index, u, v); + OsdPatchArray array = GetPatchArray(coord.arrayIndex); + OsdPatchParam param = GetPatchParam(coord.patchIndex); + + int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc; + + float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20]; + int nPoints = OsdEvaluatePatchBasis( + patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv); + + int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase); + + for (int cv = 0; cv < nPoints; ++cv) { + int index = patchIndexBuffer[indexBase + cv]; + vec2 src_fvar = read_vec2(src_offset + index); + dst += src_fvar * wP[cv]; + } +} +#else +void evaluate_patches_limits( + int patch_index, float u, float v, inout vec3 dst, inout vec3 du, inout vec3 dv) +{ + OsdPatchCoord coord = GetPatchCoord(patch_index, u, v); + OsdPatchArray array = GetPatchArray(coord.arrayIndex); + OsdPatchParam param = GetPatchParam(coord.patchIndex); + + int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc; + + float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20]; + int nPoints = OsdEvaluatePatchBasis( + patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv); + + int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase); + + for (int cv = 0; cv < nPoints; ++cv) { + int index = patchIndexBuffer[indexBase + cv]; + vec3 src_vertex = read_vec3(index); + + dst += src_vertex * wP[cv]; + du += src_vertex * wDu[cv]; + dv += src_vertex * wDv[cv]; + } +} +#endif + +/* ------------------------------------------------------------------------------ + * Entry point. + */ + +#if defined(FVAR_EVALUATION) +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint start_loop_index = quad_index * 4; + + for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) { + vec2 fvar = vec2(0.0); + + BlenderPatchCoord patch_co = patch_coords[loop_index]; + vec2 uv = decode_uv(patch_co.encoded_uv); + + evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, fvar); + output_fvar[dst_offset + loop_index] = fvar; + } +} +#elif defined(FDOTS_EVALUATION) +bool is_face_selected(uint coarse_quad_index) +{ + return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0; +} + +bool is_face_active(uint coarse_quad_index) +{ + return (extra_coarse_face_data[coarse_quad_index] & coarse_face_active_mask) != 0; +} + +float get_face_flag(uint coarse_quad_index) +{ + if (is_face_active(coarse_quad_index)) { + return -1.0; + } + + if (is_face_selected(coarse_quad_index)) { + return 1.0; + } + + return 0.0; +} + +void main() +{ + /* We execute for each coarse quad. */ + uint coarse_quad_index = get_global_invocation_index(); + if (coarse_quad_index >= total_dispatch_size) { + return; + } + + BlenderPatchCoord patch_co = patch_coords[coarse_quad_index]; + vec2 uv = decode_uv(patch_co.encoded_uv); + + vec3 pos = vec3(0.0); + vec3 du = vec3(0.0); + vec3 dv = vec3(0.0); + evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv); + vec3 nor = normalize(cross(du, dv)); + + FDotVert vert; + vert.x = pos.x; + vert.y = pos.y; + vert.z = pos.z; + + FDotNor fnor; + fnor.x = nor.x; + fnor.y = nor.y; + fnor.z = nor.z; + fnor.flag = get_face_flag(coarse_quad_index); + + output_verts[coarse_quad_index] = vert; + output_nors[coarse_quad_index] = fnor; + output_indices[coarse_quad_index] = coarse_quad_index; +} +#else +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint start_loop_index = quad_index * 4; + + for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) { + vec3 pos = vec3(0.0); + vec3 du = vec3(0.0); + vec3 dv = vec3(0.0); + + BlenderPatchCoord patch_co = patch_coords[loop_index]; + vec2 uv = decode_uv(patch_co.encoded_uv); + + evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv); + +# if defined(LIMIT_NORMALS) + vec3 nor = normalize(cross(du, dv)); +# else + /* This will be computed later. */ + vec3 nor = vec3(0.0); +# endif + + int origindex = input_vert_origindex[loop_index]; + uint flag = 0; + if (origindex == -1) { + flag = -1; + } + + PosNorLoop vertex_data; + set_vertex_pos(vertex_data, pos); + set_vertex_nor(vertex_data, nor, flag); + output_verts[loop_index] = vertex_data; + } +} +#endif diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl new file mode 100644 index 00000000000..6c76cd41ca4 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl @@ -0,0 +1,97 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 0) readonly buffer inputVertexData +{ + PosNorLoop pos_nor[]; +}; + +layout(std430, binding = 1) readonly buffer inputEdgeIndex +{ + uint input_edge_index[]; +}; + +layout(std430, binding = 2) writeonly buffer outputEdgeFactors +{ +#ifdef GPU_AMD_DRIVER_BYTE_BUG + float output_edge_fac[]; +#else + uint output_edge_fac[]; +#endif +}; + +void write_vec4(uint index, vec4 edge_facs) +{ +#ifdef GPU_AMD_DRIVER_BYTE_BUG + for (uint i = 0; i < 4; i++) { + output_edge_fac[index + i] = edge_facs[i]; + } +#else + /* Use same scaling as in extract_edge_fac_iter_poly_mesh. */ + uint a = uint(clamp(edge_facs.x * 253.0 + 1.0, 0.0, 255.0)); + uint b = uint(clamp(edge_facs.y * 253.0 + 1.0, 0.0, 255.0)); + uint c = uint(clamp(edge_facs.z * 253.0 + 1.0, 0.0, 255.0)); + uint d = uint(clamp(edge_facs.w * 253.0 + 1.0, 0.0, 255.0)); + uint packed_edge_fac = a << 24 | b << 16 | c << 8 | d; + output_edge_fac[index] = packed_edge_fac; +#endif +} + +/* From extract_mesh_vbo_edge_fac.cc, keep in sync! */ +float loop_edge_factor_get(vec3 f_no, vec3 v_co, vec3 v_no, vec3 v_next_co) +{ + vec3 evec = v_next_co - v_co; + vec3 enor = normalize(cross(v_no, evec)); + float d = abs(dot(enor, f_no)); + /* Re-scale to the slider range. */ + d *= (1.0 / 0.065); + return clamp(d, 0.0, 1.0); +} + +float compute_line_factor(uint start_loop_index, uint corner_index, vec3 face_normal) +{ + uint vertex_index = start_loop_index + corner_index; + uint edge_index = input_edge_index[vertex_index]; + + if (edge_index == -1 && optimal_display) { + return 0.0; + } + + /* Mod 4 so we loop back at the first vertex on the last loop index (3), but only the corner + * index needs to be wrapped. */ + uint next_vertex_index = start_loop_index + (corner_index + 1) % 4; + vec3 vertex_pos = get_vertex_pos(pos_nor[vertex_index]); + vec3 vertex_nor = get_vertex_nor(pos_nor[vertex_index]); + vec3 next_vertex_pos = get_vertex_pos(pos_nor[next_vertex_index]); + return loop_edge_factor_get(face_normal, vertex_pos, vertex_nor, next_vertex_pos); +} + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + /* The start index of the loop is quad_index * 4. */ + uint start_loop_index = quad_index * 4; + + /* First compute the face normal, we need it to compute the bihedral edge angle. */ + vec3 v0 = get_vertex_pos(pos_nor[start_loop_index + 0]); + vec3 v1 = get_vertex_pos(pos_nor[start_loop_index + 1]); + vec3 v2 = get_vertex_pos(pos_nor[start_loop_index + 2]); + vec3 face_normal = normalize(cross(v1 - v0, v2 - v0)); + + vec4 edge_facs = vec4(0.0); + for (int i = 0; i < 4; i++) { + edge_facs[i] = compute_line_factor(start_loop_index, i, face_normal); + } + +#ifdef GPU_AMD_DRIVER_BYTE_BUG + write_vec4(start_loop_index, edge_facs); +#else + /* When packed into bytes, the index is the same as for the quad. */ + write_vec4(quad_index, edge_facs); +#endif +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl new file mode 100644 index 00000000000..ea73b9482d3 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl @@ -0,0 +1,80 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 0) readonly buffer inputVerts +{ + PosNorLoop pos_nor[]; +}; + +layout(std430, binding = 1) readonly buffer inputUVs +{ + vec2 uvs[]; +}; + +/* Mirror of #UVStretchAngle in the C++ code, but using floats until proper data compression + * is implemented for all subdivision data. */ +struct UVStretchAngle { + float angle; + float uv_angle0; + float uv_angle1; +}; + +layout(std430, binding = 2) writeonly buffer outputStretchAngles +{ + UVStretchAngle uv_stretches[]; +}; + +#define M_PI 3.1415926535897932 +#define M_1_PI 0.31830988618379067154 + +/* Adapted from BLI_math_vector.h */ +float angle_normalized_v3v3(vec3 v1, vec3 v2) +{ + /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */ + bool q = (dot(v1, v2) >= 0.0); + vec3 v = (q) ? (v1 - v2) : (v1 + v2); + float a = 2.0 * asin(length(v) / 2.0); + return (q) ? a : M_PI - a; +} + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint start_loop_index = quad_index * 4; + + for (uint i = 0; i < 4; i++) { + uint cur_loop_index = start_loop_index + i; + uint next_loop_index = start_loop_index + (i + 1) % 4; + uint prev_loop_index = start_loop_index + (i + 3) % 4; + + /* Compute 2d edge vectors from UVs. */ + vec2 cur_uv = uvs[src_offset + cur_loop_index]; + vec2 next_uv = uvs[src_offset + next_loop_index]; + vec2 prev_uv = uvs[src_offset + prev_loop_index]; + + vec2 norm_uv_edge0 = normalize(prev_uv - cur_uv); + vec2 norm_uv_edge1 = normalize(cur_uv - next_uv); + + /* Compute 3d edge vectors from positions. */ + vec3 cur_pos = get_vertex_pos(pos_nor[cur_loop_index]); + vec3 next_pos = get_vertex_pos(pos_nor[next_loop_index]); + vec3 prev_pos = get_vertex_pos(pos_nor[prev_loop_index]); + + vec3 norm_pos_edge0 = normalize(prev_pos - cur_pos); + vec3 norm_pos_edge1 = normalize(cur_pos - next_pos); + + /* Compute stretches, this logic is adapted from #edituv_get_edituv_stretch_angle. + * Keep in sync! */ + UVStretchAngle stretch; + stretch.uv_angle0 = atan(norm_uv_edge0.y, norm_uv_edge0.x) * M_1_PI; + stretch.uv_angle1 = atan(norm_uv_edge1.y, norm_uv_edge1.x) * M_1_PI; + stretch.angle = angle_normalized_v3v3(norm_pos_edge0, norm_pos_edge1) * M_1_PI; + + uv_stretches[cur_loop_index] = stretch; + } +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl new file mode 100644 index 00000000000..e897fb3f3c0 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl @@ -0,0 +1,31 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 1) readonly buffer inputCoarseData +{ + float coarse_stretch_area[]; +}; + +layout(std430, binding = 2) writeonly buffer outputSubdivData +{ + float subdiv_stretch_area[]; +}; + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + /* The start index of the loop is quad_index * 4. */ + uint start_loop_index = quad_index * 4; + + uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index, + coarse_poly_count); + + for (int i = 0; i < 4; i++) { + subdiv_stretch_area[start_loop_index + i] = coarse_stretch_area[coarse_quad_index]; + } +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl new file mode 100644 index 00000000000..41a8df3cf82 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl @@ -0,0 +1,52 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +layout(std430, binding = 1) readonly buffer inputVertexData +{ + PosNorLoop pos_nor[]; +}; + +layout(std430, binding = 2) readonly buffer extraCoarseFaceData +{ + uint extra_coarse_face_data[]; +}; + +layout(std430, binding = 3) writeonly buffer outputLoopNormals +{ + vec3 output_lnor[]; +}; + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + /* The start index of the loop is quad_index * 4. */ + uint start_loop_index = quad_index * 4; + + uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index, + coarse_poly_count); + + if ((extra_coarse_face_data[coarse_quad_index] & coarse_face_smooth_mask) != 0) { + /* Face is smooth, use vertex normals. */ + for (int i = 0; i < 4; i++) { + PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i]; + output_lnor[start_loop_index + i] = get_vertex_nor(pos_nor_loop); + } + } + else { + /* Face is flat shaded, compute flat face normal from an inscribed triangle. */ + vec3 verts[3]; + for (int i = 0; i < 3; i++) { + verts[i] = get_vertex_pos(pos_nor[start_loop_index + i]); + } + + vec3 face_normal = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + for (int i = 0; i < 4; i++) { + output_lnor[start_loop_index + i] = face_normal; + } + } +} diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl new file mode 100644 index 00000000000..7182ce57ad3 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl @@ -0,0 +1,47 @@ + +/* To be compile with common_subdiv_lib.glsl */ + +struct SculptData { + uint face_set_color; + float mask; +}; + +layout(std430, binding = 0) readonly restrict buffer sculptMask +{ + float sculpt_mask[]; +}; + +layout(std430, binding = 1) readonly restrict buffer faceSetColor +{ + uint face_set_color[]; +}; + +layout(std430, binding = 2) writeonly restrict buffer sculptData +{ + SculptData sculpt_data[]; +}; + +void main() +{ + /* We execute for each quad. */ + uint quad_index = get_global_invocation_index(); + if (quad_index >= total_dispatch_size) { + return; + } + + uint start_loop_index = quad_index * 4; + + for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) { + SculptData data; + data.face_set_color = face_set_color[loop_index]; + + if (has_sculpt_mask) { + data.mask = sculpt_mask[loop_index]; + } + else { + data.mask = 0.0; + } + + sculpt_data[loop_index] = data; + } +} diff --git a/source/blender/editors/space_view3d/view3d_draw.c b/source/blender/editors/space_view3d/view3d_draw.c index a7d170982ed..b1f19581543 100644 --- a/source/blender/editors/space_view3d/view3d_draw.c +++ b/source/blender/editors/space_view3d/view3d_draw.c @@ -1581,6 +1581,7 @@ void view3d_main_region_draw(const bContext *C, ARegion *region) view3d_draw_view(C, region); + DRW_cache_free_old_subdiv(); DRW_cache_free_old_batches(bmain); BKE_image_free_old_gputextures(bmain); GPU_pass_cache_garbage_collect(); diff --git a/source/blender/editors/transform/transform_snap_object.c b/source/blender/editors/transform/transform_snap_object.c index 350d3a2676c..e3a2d1f6531 100644 --- a/source/blender/editors/transform/transform_snap_object.c +++ b/source/blender/editors/transform/transform_snap_object.c @@ -146,7 +146,7 @@ struct SnapObjectContext { * If NULL the BMesh should be used. */ static Mesh *mesh_for_snap(Object *ob_eval, eSnapEditType edit_mode_type, bool *r_use_hide) { - Mesh *me_eval = ob_eval->data; + Mesh *me_eval = BKE_object_get_evaluated_mesh(ob_eval); bool use_hide = false; if (BKE_object_is_in_editmode(ob_eval)) { if (edit_mode_type == SNAP_GEOM_EDIT) { diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h index 5189fa1ae41..5e67441be27 100644 --- a/source/blender/gpu/GPU_context.h +++ b/source/blender/gpu/GPU_context.h @@ -40,6 +40,8 @@ typedef enum eGPUBackendType { void GPU_backend_init(eGPUBackendType backend); void GPU_backend_exit(void); +eGPUBackendType GPU_backend_get_type(void); + /** Opaque type hiding blender::gpu::Context. */ typedef struct GPUContext GPUContext; diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h index e4f1709173e..0f83e590597 100644 --- a/source/blender/gpu/GPU_index_buffer.h +++ b/source/blender/gpu/GPU_index_buffer.h @@ -53,6 +53,8 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *, GPUPrimType, uint index_len, uin void GPU_indexbuf_init(GPUIndexBufBuilder *, GPUPrimType, uint prim_len, uint vertex_len); GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len); +void GPU_indexbuf_init_build_on_device(GPUIndexBuf *elem, uint index_len); + /* * Thread safe. * @@ -82,6 +84,16 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *); void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding); +/* Upload data to the GPU (if not built on the device) and bind the buffer to its default target. + */ +void GPU_indexbuf_use(GPUIndexBuf *elem); + +/* Partially update the GPUIndexBuf which was already sent to the device, or built directly on the + * device. The data needs to be compatible with potential compression applied to the original + * indices when the index buffer was built, i.e., if the data was compressed to use shorts instead + * of ints, shorts should passed here. */ +void GPU_indexbuf_update_sub(GPUIndexBuf *elem, uint start, uint len, const void *data); + /* Create a sub-range of an existing index-buffer. */ GPUIndexBuf *GPU_indexbuf_create_subrange(GPUIndexBuf *elem_src, uint start, uint length); void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem, diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h index 62a495abfb3..43a8e7fc4cb 100644 --- a/source/blender/gpu/GPU_vertex_buffer.h +++ b/source/blender/gpu/GPU_vertex_buffer.h @@ -91,6 +91,8 @@ void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts); void GPU_vertbuf_init_with_format_ex(GPUVertBuf *, const GPUVertFormat *, GPUUsageType); +void GPU_vertbuf_init_build_on_device(GPUVertBuf *verts, GPUVertFormat *format, uint v_len); + #define GPU_vertbuf_init_with_format(verts, format) \ GPU_vertbuf_init_with_format_ex(verts, format, GPU_USAGE_STATIC) @@ -172,6 +174,7 @@ const GPUVertFormat *GPU_vertbuf_get_format(const GPUVertBuf *verts); uint GPU_vertbuf_get_vertex_alloc(const GPUVertBuf *verts); uint GPU_vertbuf_get_vertex_len(const GPUVertBuf *verts); GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts); +void GPU_vertbuf_tag_dirty(GPUVertBuf *verts); /** * Should be rename to #GPU_vertbuf_data_upload. @@ -179,12 +182,14 @@ GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts); void GPU_vertbuf_use(GPUVertBuf *); void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding); +void GPU_vertbuf_wrap_handle(GPUVertBuf *verts, uint64_t handle); + /** * XXX: do not use! * This is just a wrapper for the use of the Hair refine workaround. * To be used with #GPU_vertbuf_use(). */ -void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data); +void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, const void *data); /* Metrics */ uint GPU_vertbuf_get_memory_usage(void); diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index 5af15d1bc3d..98714269402 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -186,6 +186,15 @@ void GPU_backend_exit() g_backend = nullptr; } +eGPUBackendType GPU_backend_get_type() +{ + if (g_backend && dynamic_cast<GLBackend *>(g_backend) != nullptr) { + return GPU_BACKEND_OPENGL; + } + + return GPU_BACKEND_NONE; +} + GPUBackend *GPUBackend::get() { return g_backend; diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc index 3472cc24a74..895b2a8461b 100644 --- a/source/blender/gpu/intern/gpu_index_buffer.cc +++ b/source/blender/gpu/intern/gpu_index_buffer.cc @@ -74,11 +74,16 @@ void GPU_indexbuf_init(GPUIndexBufBuilder *builder, GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len) { GPUIndexBuf *elem_ = GPU_indexbuf_calloc(); - IndexBuf *elem = unwrap(elem_); - elem->init_build_on_device(index_len); + GPU_indexbuf_init_build_on_device(elem_, index_len); return elem_; } +void GPU_indexbuf_init_build_on_device(GPUIndexBuf *elem, uint index_len) +{ + IndexBuf *elem_ = unwrap(elem); + elem_->init_build_on_device(index_len); +} + void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from) { BLI_assert(builder_to->data == builder_from->data); @@ -410,9 +415,19 @@ int GPU_indexbuf_primitive_len(GPUPrimType prim_type) return indices_per_primitive(prim_type); } +void GPU_indexbuf_use(GPUIndexBuf *elem) +{ + unwrap(elem)->upload_data(); +} + void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding) { unwrap(elem)->bind_as_ssbo(binding); } +void GPU_indexbuf_update_sub(GPUIndexBuf *elem, uint start, uint len, const void *data) +{ + unwrap(elem)->update_sub(start, len, data); +} + /** \} */ diff --git a/source/blender/gpu/intern/gpu_index_buffer_private.hh b/source/blender/gpu/intern/gpu_index_buffer_private.hh index ed7dd830c8c..adc0145f867 100644 --- a/source/blender/gpu/intern/gpu_index_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_index_buffer_private.hh @@ -92,11 +92,15 @@ class IndexBuf { return is_init_; }; + virtual void upload_data(void) = 0; + virtual void bind_as_ssbo(uint binding) = 0; virtual const uint32_t *read() const = 0; uint32_t *unmap(const uint32_t *mapped_memory) const; + virtual void update_sub(uint start, uint len, const void *data) = 0; + private: inline void squeeze_indices_short(uint min_idx, uint max_idx); inline uint index_range(uint *r_min, uint *r_max); diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc index 5ed9648387f..dba31f501f2 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer.cc +++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc @@ -144,6 +144,12 @@ void GPU_vertbuf_init_with_format_ex(GPUVertBuf *verts_, unwrap(verts_)->init(format, usage); } +void GPU_vertbuf_init_build_on_device(GPUVertBuf *verts, GPUVertFormat *format, uint v_len) +{ + GPU_vertbuf_init_with_format_ex(verts, format, GPU_USAGE_DEVICE_ONLY); + GPU_vertbuf_data_alloc(verts, v_len); +} + GPUVertBuf *GPU_vertbuf_duplicate(GPUVertBuf *verts_) { return wrap(unwrap(verts_)->duplicate()); @@ -313,6 +319,11 @@ GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts) return unwrap(verts)->flag; } +void GPU_vertbuf_tag_dirty(GPUVertBuf *verts) +{ + unwrap(verts)->flag |= GPU_VERTBUF_DATA_DIRTY; +} + uint GPU_vertbuf_get_memory_usage() { return VertBuf::memory_usage; @@ -323,12 +334,17 @@ void GPU_vertbuf_use(GPUVertBuf *verts) unwrap(verts)->upload(); } +void GPU_vertbuf_wrap_handle(GPUVertBuf *verts, uint64_t handle) +{ + unwrap(verts)->wrap_handle(handle); +} + void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding) { unwrap(verts)->bind_as_ssbo(binding); } -void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data) +void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, const void *data) { unwrap(verts)->update_sub(start, len, data); } diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh index 9531c2c1a5f..2f46295f45a 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh @@ -68,6 +68,8 @@ class VertBuf { void upload(void); virtual void bind_as_ssbo(uint binding) = 0; + virtual void wrap_handle(uint64_t handle) = 0; + VertBuf *duplicate(void); /* Size of the data allocated. */ @@ -96,7 +98,7 @@ class VertBuf { } } - virtual void update_sub(uint start, uint len, void *data) = 0; + virtual void update_sub(uint start, uint len, const void *data) = 0; virtual const void *read() const = 0; virtual void *unmap(const void *mapped_data) const = 0; diff --git a/source/blender/gpu/opengl/gl_index_buffer.cc b/source/blender/gpu/opengl/gl_index_buffer.cc index e305f765ad9..82bab460ae3 100644 --- a/source/blender/gpu/opengl/gl_index_buffer.cc +++ b/source/blender/gpu/opengl/gl_index_buffer.cc @@ -81,4 +81,14 @@ bool GLIndexBuf::is_active() const return ibo_id_ == active_ibo_id; } +void GLIndexBuf::upload_data() +{ + bind(); +} + +void GLIndexBuf::update_sub(uint start, uint len, const void *data) +{ + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, start, len, data); +} + } // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_index_buffer.hh b/source/blender/gpu/opengl/gl_index_buffer.hh index 0dbdaa6d398..85d52447bc6 100644 --- a/source/blender/gpu/opengl/gl_index_buffer.hh +++ b/source/blender/gpu/opengl/gl_index_buffer.hh @@ -61,6 +61,10 @@ class GLIndexBuf : public IndexBuf { return (index_type_ == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu; } + void upload_data(void) override; + + void update_sub(uint start, uint len, const void *data) override; + private: bool is_active() const; diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.cc b/source/blender/gpu/opengl/gl_vertex_buffer.cc index ce16a491528..469ac2cf8d6 100644 --- a/source/blender/gpu/opengl/gl_vertex_buffer.cc +++ b/source/blender/gpu/opengl/gl_vertex_buffer.cc @@ -49,6 +49,10 @@ void GLVertBuf::resize_data() void GLVertBuf::release_data() { + if (is_wrapper_) { + return; + } + if (vbo_id_ != 0) { GLContext::buf_free(vbo_id_); vbo_id_ = 0; @@ -137,6 +141,16 @@ void *GLVertBuf::unmap(const void *mapped_data) const return result; } +void GLVertBuf::wrap_handle(uint64_t handle) +{ + BLI_assert(vbo_id_ == 0); + BLI_assert(glIsBuffer(static_cast<uint>(handle))); + is_wrapper_ = true; + vbo_id_ = static_cast<uint>(handle); + /* We assume the data is already on the device, so no need to allocate or send it. */ + flag = GPU_VERTBUF_DATA_UPLOADED; +} + bool GLVertBuf::is_active() const { if (!vbo_id_) { @@ -147,7 +161,7 @@ bool GLVertBuf::is_active() const return vbo_id_ == active_vbo_id; } -void GLVertBuf::update_sub(uint start, uint len, void *data) +void GLVertBuf::update_sub(uint start, uint len, const void *data) { glBufferSubData(GL_ARRAY_BUFFER, start, len, data); } diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.hh b/source/blender/gpu/opengl/gl_vertex_buffer.hh index 6c38a2225b3..27e4cc4f8e2 100644 --- a/source/blender/gpu/opengl/gl_vertex_buffer.hh +++ b/source/blender/gpu/opengl/gl_vertex_buffer.hh @@ -39,17 +39,22 @@ class GLVertBuf : public VertBuf { private: /** OpenGL buffer handle. Init on first upload. Immutable after that. */ GLuint vbo_id_ = 0; + /** Defines whether the buffer handle is wrapped by this GLVertBuf, i.e. we do not own it and + * should not free it. */ + bool is_wrapper_ = false; /** Size on the GPU. */ size_t vbo_size_ = 0; public: void bind(void); - void update_sub(uint start, uint len, void *data) override; + void update_sub(uint start, uint len, const void *data) override; const void *read() const override; void *unmap(const void *mapped_data) const override; + void wrap_handle(uint64_t handle) override; + protected: void acquire_data(void) override; void resize_data(void) override; diff --git a/source/blender/makesdna/DNA_mesh_types.h b/source/blender/makesdna/DNA_mesh_types.h index c053baf9f7e..94e88bdaca6 100644 --- a/source/blender/makesdna/DNA_mesh_types.h +++ b/source/blender/makesdna/DNA_mesh_types.h @@ -138,6 +138,15 @@ typedef struct Mesh_Runtime { int64_t cd_dirty_loop; int64_t cd_dirty_poly; + /** + * Settings for lazily evaluating the subdivision on the CPU if needed. These are + * set in the modifier when GPU subdivision can be performed. + */ + char subsurf_apply_render; + char subsurf_use_optimal_display; + char _pad[2]; + int subsurf_resolution; + } Mesh_Runtime; typedef struct Mesh { @@ -356,7 +365,8 @@ typedef enum eMeshWrapperType { ME_WRAPPER_TYPE_MDATA = 0, /** Use edit-mesh data (#Mesh.edit_mesh, #Mesh_Runtime.edit_data). */ ME_WRAPPER_TYPE_BMESH = 1, - /* ME_WRAPPER_TYPE_SUBD = 2, */ /* TODO */ + /** Use subdivision mesh data (#Mesh_Runtime.mesh_eval). */ + ME_WRAPPER_TYPE_SUBD = 2, } eMeshWrapperType; /** #Mesh.texflag */ diff --git a/source/blender/makesdna/DNA_modifier_types.h b/source/blender/makesdna/DNA_modifier_types.h index 85cc1361adf..fc041e257b0 100644 --- a/source/blender/makesdna/DNA_modifier_types.h +++ b/source/blender/makesdna/DNA_modifier_types.h @@ -196,6 +196,13 @@ typedef enum { SUBSURF_BOUNDARY_SMOOTH_PRESERVE_CORNERS = 1, } eSubsurfBoundarySmooth; +typedef struct SubsurfRuntimeData { + /* Cached subdivision surface descriptor, with topology and settings. */ + struct Subdiv *subdiv; + char set_by_draw_code; + char _pad[7]; +} SubsurfRuntimeData; + typedef struct SubsurfModifierData { ModifierData modifier; diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h index c99651f0717..34415308ef6 100644 --- a/source/blender/makesdna/DNA_userdef_types.h +++ b/source/blender/makesdna/DNA_userdef_types.h @@ -1145,6 +1145,7 @@ typedef enum eUserpref_GPU_Flag { USER_GPU_FLAG_NO_DEPT_PICK = (1 << 0), USER_GPU_FLAG_NO_EDIT_MODE_SMOOTH_WIRE = (1 << 1), USER_GPU_FLAG_OVERLAY_SMOOTH_WIRE = (1 << 2), + USER_GPU_FLAG_SUBDIVISION_EVALUATION = (1 << 3), } eUserpref_GPU_Flag; /** #UserDef.tablet_api */ diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c index 929cf94615b..71c38311124 100644 --- a/source/blender/makesrna/intern/rna_userdef.c +++ b/source/blender/makesrna/intern/rna_userdef.c @@ -182,6 +182,7 @@ static const EnumPropertyItem rna_enum_userdef_viewport_aa_items[] = { # include "BKE_image.h" # include "BKE_main.h" # include "BKE_mesh_runtime.h" +# include "BKE_object.h" # include "BKE_paint.h" # include "BKE_pbvh.h" # include "BKE_preferences.h" @@ -578,6 +579,20 @@ static PointerRNA rna_UserDef_apps_get(PointerRNA *ptr) return rna_pointer_inherit_refine(ptr, &RNA_PreferencesApps, ptr->data); } +/* Reevaluate objects with a subsurf modifier as the last in their modifiers stacks. */ +static void rna_UserDef_subdivision_update(Main *bmain, Scene *scene, PointerRNA *ptr) +{ + Object *ob; + + for (ob = bmain->objects.first; ob; ob = ob->id.next) { + if (BKE_object_get_last_subsurf_modifier(ob) != NULL) { + DEG_id_tag_update(&ob->id, ID_RECALC_GEOMETRY); + } + } + + rna_userdef_update(bmain, scene, ptr); +} + static void rna_UserDef_audio_update(Main *bmain, Scene *UNUSED(scene), PointerRNA *UNUSED(ptr)) { BKE_sound_init(bmain); @@ -5651,6 +5666,16 @@ static void rna_def_userdef_system(BlenderRNA *brna) "Use the depth buffer for picking 3D View selection " "(without this the front most object may not be selected first)"); + /* GPU subdivision evaluation. */ + + prop = RNA_def_property(srna, "use_gpu_subdivision", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_sdna(prop, NULL, "gpu_flag", USER_GPU_FLAG_SUBDIVISION_EVALUATION); + RNA_def_property_ui_text(prop, + "GPU Subdivision", + "Enable GPU acceleration for evaluating the last subdivision surface " + "modifiers in the stack"); + RNA_def_property_update(prop, 0, "rna_UserDef_subdivision_update"); + /* Audio */ prop = RNA_def_property(srna, "audio_mixing_buffer", PROP_ENUM, PROP_NONE); diff --git a/source/blender/modifiers/intern/MOD_subsurf.c b/source/blender/modifiers/intern/MOD_subsurf.c index 7470f2abb15..00870d076ef 100644 --- a/source/blender/modifiers/intern/MOD_subsurf.c +++ b/source/blender/modifiers/intern/MOD_subsurf.c @@ -39,6 +39,7 @@ #include "DNA_screen_types.h" #include "BKE_context.h" +#include "BKE_editmesh.h" #include "BKE_mesh.h" #include "BKE_scene.h" #include "BKE_screen.h" @@ -46,6 +47,7 @@ #include "BKE_subdiv_ccg.h" #include "BKE_subdiv_deform.h" #include "BKE_subdiv_mesh.h" +#include "BKE_subdiv_modifier.h" #include "BKE_subsurf.h" #include "UI_interface.h" @@ -65,11 +67,6 @@ #include "intern/CCGSubSurf.h" -typedef struct SubsurfRuntimeData { - /* Cached subdivision surface descriptor, with topology and settings. */ - struct Subdiv *subdiv; -} SubsurfRuntimeData; - static void initData(ModifierData *md) { SubsurfModifierData *smd = (SubsurfModifierData *)md; @@ -155,37 +152,6 @@ static int subdiv_levels_for_modifier_get(const SubsurfModifierData *smd, return get_render_subsurf_level(&scene->r, requested_levels, use_render_params); } -static void subdiv_settings_init(SubdivSettings *settings, - const SubsurfModifierData *smd, - const ModifierEvalContext *ctx) -{ - const bool use_render_params = (ctx->flag & MOD_APPLY_RENDER); - const int requested_levels = (use_render_params) ? smd->renderLevels : smd->levels; - - settings->is_simple = (smd->subdivType == SUBSURF_TYPE_SIMPLE); - settings->is_adaptive = !(smd->flags & eSubsurfModifierFlag_UseRecursiveSubdivision); - settings->level = settings->is_simple ? - 1 : - (settings->is_adaptive ? smd->quality : requested_levels); - settings->use_creases = (smd->flags & eSubsurfModifierFlag_UseCrease); - settings->vtx_boundary_interpolation = BKE_subdiv_vtx_boundary_interpolation_from_subsurf( - smd->boundary_smooth); - settings->fvar_linear_interpolation = BKE_subdiv_fvar_interpolation_from_uv_smooth( - smd->uv_smooth); -} - -/* Main goal of this function is to give usable subdivision surface descriptor - * which matches settings and topology. */ -static Subdiv *subdiv_descriptor_ensure(SubsurfModifierData *smd, - const SubdivSettings *subdiv_settings, - const Mesh *mesh) -{ - SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime; - Subdiv *subdiv = BKE_subdiv_update_from_mesh(runtime_data->subdiv, subdiv_settings, mesh); - runtime_data->subdiv = subdiv; - return subdiv; -} - /* Subdivide into fully qualified mesh. */ static void subdiv_mesh_settings_init(SubdivToMeshSettings *settings, @@ -240,14 +206,17 @@ static Mesh *subdiv_as_ccg(SubsurfModifierData *smd, return result; } -static SubsurfRuntimeData *subsurf_ensure_runtime(SubsurfModifierData *smd) +/* Cache settings for lazy CPU evaluation. */ + +static void subdiv_cache_cpu_evaluation_settings(const ModifierEvalContext *ctx, + Mesh *me, + SubsurfModifierData *smd) { - SubsurfRuntimeData *runtime_data = (SubsurfRuntimeData *)smd->modifier.runtime; - if (runtime_data == NULL) { - runtime_data = MEM_callocN(sizeof(*runtime_data), "subsurf runtime"); - smd->modifier.runtime = runtime_data; - } - return runtime_data; + SubdivToMeshSettings mesh_settings; + subdiv_mesh_settings_init(&mesh_settings, smd, ctx); + me->runtime.subsurf_apply_render = (ctx->flag & MOD_APPLY_RENDER) != 0; + me->runtime.subsurf_resolution = mesh_settings.resolution; + me->runtime.subsurf_use_optimal_display = mesh_settings.use_optimal_display; } /* Modifier itself. */ @@ -261,12 +230,30 @@ static Mesh *modifyMesh(ModifierData *md, const ModifierEvalContext *ctx, Mesh * #endif SubsurfModifierData *smd = (SubsurfModifierData *)md; SubdivSettings subdiv_settings; - subdiv_settings_init(&subdiv_settings, smd, ctx); + BKE_subsurf_modifier_subdiv_settings_init( + &subdiv_settings, smd, (ctx->flag & MOD_APPLY_RENDER) != 0); if (subdiv_settings.level == 0) { return result; } - SubsurfRuntimeData *runtime_data = subsurf_ensure_runtime(smd); - Subdiv *subdiv = subdiv_descriptor_ensure(smd, &subdiv_settings, mesh); + SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd); + + /* Delay evaluation to the draw code if possible, provided we do not have to apply the modifier. + */ + if ((ctx->flag & MOD_APPLY_TO_BASE_MESH) == 0) { + Scene *scene = DEG_get_evaluated_scene(ctx->depsgraph); + const bool is_render_mode = (ctx->flag & MOD_APPLY_RENDER) != 0; + /* Same check as in `DRW_mesh_batch_cache_create_requested` to keep both code coherent. */ + const bool is_editmode = (mesh->edit_mesh != NULL) && + (mesh->edit_mesh->mesh_eval_final != NULL); + const int required_mode = BKE_subsurf_modifier_eval_required_mode(is_render_mode, is_editmode); + if (BKE_subsurf_modifier_can_do_gpu_subdiv_ex(scene, ctx->object, smd, required_mode, false)) { + subdiv_cache_cpu_evaluation_settings(ctx, mesh, smd); + return result; + } + } + + Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure( + smd, &subdiv_settings, mesh, false); if (subdiv == NULL) { /* Happens on bad topology, but also on empty input mesh. */ return result; @@ -320,12 +307,14 @@ static void deformMatrices(ModifierData *md, SubsurfModifierData *smd = (SubsurfModifierData *)md; SubdivSettings subdiv_settings; - subdiv_settings_init(&subdiv_settings, smd, ctx); + BKE_subsurf_modifier_subdiv_settings_init( + &subdiv_settings, smd, (ctx->flag & MOD_APPLY_RENDER) != 0); if (subdiv_settings.level == 0) { return; } - SubsurfRuntimeData *runtime_data = subsurf_ensure_runtime(smd); - Subdiv *subdiv = subdiv_descriptor_ensure(smd, &subdiv_settings, mesh); + SubsurfRuntimeData *runtime_data = BKE_subsurf_modifier_ensure_runtime(smd); + Subdiv *subdiv = BKE_subsurf_modifier_subdiv_descriptor_ensure( + smd, &subdiv_settings, mesh, false); if (subdiv == NULL) { /* Happens on bad topology, but also on empty input mesh. */ return; diff --git a/source/blender/windowmanager/intern/wm_init_exit.c b/source/blender/windowmanager/intern/wm_init_exit.c index 2f87e5789fe..957ec7d800d 100644 --- a/source/blender/windowmanager/intern/wm_init_exit.c +++ b/source/blender/windowmanager/intern/wm_init_exit.c @@ -562,6 +562,13 @@ void WM_exit_ex(bContext *C, const bool do_python) BKE_blender_free(); /* blender.c, does entire library and spacetypes */ // BKE_material_copybuf_free(); + + /* Free the GPU subdivision data after the database to ensure that subdivision structs used by + * the modifiers were garbage collected. */ + if (opengl_is_init) { + DRW_subdiv_free(); + } + ANIM_fcurves_copybuf_free(); ANIM_drivers_copybuf_free(); ANIM_driver_vars_copybuf_free(); |