Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKévin Dietrich <kevin.dietrich@mailoo.org>2021-12-27 18:34:47 +0300
committerKévin Dietrich <kevin.dietrich@mailoo.org>2021-12-27 18:35:54 +0300
commiteed45d2a239a2a18a2420ba15dfb55e0f8dc5630 (patch)
treeaa55ce966caa8e28db4853d7d755003ed249805b /source/blender/draw/intern/shaders
parent31e120ef4997583332aa9b5af93521e7e666e9f3 (diff)
OpenSubDiv: add support for an OpenGL evaluator
This evaluator is used in order to evaluate subdivision at render time, allowing for faster renders of meshes with a subdivision surface modifier placed at the last position in the modifier list. When evaluating the subsurf modifier, we detect whether we can delegate evaluation to the draw code. If so, the subdivision is first evaluated on the GPU using our own custom evaluator (only the coarse data needs to be initially sent to the GPU), then, buffers for the final `MeshBufferCache` are filled on the GPU using a set of compute shaders. However, some buffers are still filled on the CPU side, if doing so on the GPU is impractical (e.g. the line adjacency buffer used for x-ray, whose logic is hardly GPU compatible). This is done at the mesh buffer extraction level so that the result can be readily used in the various OpenGL engines, without having to write custom geometry or tesselation shaders. We use our own subdivision evaluation shaders, instead of OpenSubDiv's vanilla one, in order to control the data layout, and interpolation. For example, we store vertex colors as compressed 16-bit integers, while OpenSubDiv's default evaluator only work for float types. In order to still access the modified geometry on the CPU side, for use in modifiers or transform operators, a dedicated wrapper type is added `MESH_WRAPPER_TYPE_SUBD`. Subdivision will be lazily evaluated via `BKE_object_get_evaluated_mesh` which will create such a wrapper if possible. If the final subdivision surface is not needed on the CPU side, `BKE_object_get_evaluated_mesh_no_subsurf` should be used. Enabling or disabling GPU subdivision can be done through the user preferences (under Viewport -> Subdivision). See patch description for benchmarks. Reviewed By: campbellbarton, jbakker, fclem, brecht, #eevee_viewport Differential Revision: https://developer.blender.org/D12406
Diffstat (limited to 'source/blender/draw/intern/shaders')
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl230
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl57
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl43
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_lib.glsl176
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl56
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl34
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl416
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl97
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl80
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl31
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl52
-rw-r--r--source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl47
12 files changed, 1319 insertions, 0 deletions
diff --git a/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
new file mode 100644
index 00000000000..36c3970d9a0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_custom_data_interp_comp.glsl
@@ -0,0 +1,230 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly restrict buffer sourceBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+ uint src_data[];
+#else
+ float src_data[];
+#endif
+};
+
+layout(std430, binding = 2) readonly restrict buffer facePTexOffset
+{
+ uint face_ptex_offset[];
+};
+
+layout(std430, binding = 3) readonly restrict buffer patchCoords
+{
+ BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer extraCoarseFaceData
+{
+ uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 5) writeonly restrict buffer destBuffer
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+ uint dst_data[];
+#else
+ float dst_data[];
+#endif
+};
+
+struct Vertex {
+ float vertex_data[DIMENSIONS];
+};
+
+void clear(inout Vertex v)
+{
+ for (int i = 0; i < DIMENSIONS; i++) {
+ v.vertex_data[i] = 0.0;
+ }
+}
+
+Vertex read_vertex(uint index)
+{
+ Vertex result;
+#ifdef GPU_FETCH_U16_TO_FLOAT
+ uint base_index = index * 2;
+ if (DIMENSIONS == 4) {
+ uint xy = src_data[base_index];
+ uint zw = src_data[base_index + 1];
+
+ float x = float((xy >> 16) & 0xffff) / 65535.0;
+ float y = float(xy & 0xffff) / 65535.0;
+ float z = float((zw >> 16) & 0xffff) / 65535.0;
+ float w = float(zw & 0xffff) / 65535.0;
+
+ result.vertex_data[0] = x;
+ result.vertex_data[1] = y;
+ result.vertex_data[2] = z;
+ result.vertex_data[3] = w;
+ }
+ else {
+ /* This case is unsupported for now. */
+ clear(result);
+ }
+#else
+ uint base_index = index * DIMENSIONS;
+ for (int i = 0; i < DIMENSIONS; i++) {
+ result.vertex_data[i] = src_data[base_index + i];
+ }
+#endif
+ return result;
+}
+
+void write_vertex(uint index, Vertex v)
+{
+#ifdef GPU_FETCH_U16_TO_FLOAT
+ uint base_index = dst_offset + index * 2;
+ if (DIMENSIONS == 4) {
+ uint x = uint(v.vertex_data[0] * 65535.0);
+ uint y = uint(v.vertex_data[1] * 65535.0);
+ uint z = uint(v.vertex_data[2] * 65535.0);
+ uint w = uint(v.vertex_data[3] * 65535.0);
+
+ uint xy = x << 16 | y;
+ uint zw = z << 16 | w;
+
+ dst_data[base_index] = xy;
+ dst_data[base_index + 1] = zw;
+ }
+ else {
+ /* This case is unsupported for now. */
+ dst_data[base_index] = 0;
+ }
+#else
+ uint base_index = dst_offset + index * DIMENSIONS;
+ for (int i = 0; i < DIMENSIONS; i++) {
+ dst_data[base_index + i] = v.vertex_data[i];
+ }
+#endif
+}
+
+Vertex interp_vertex(Vertex v0, Vertex v1, Vertex v2, Vertex v3, vec2 uv)
+{
+ Vertex result;
+ for (int i = 0; i < DIMENSIONS; i++) {
+ float e = mix(v0.vertex_data[i], v1.vertex_data[i], uv.x);
+ float f = mix(v2.vertex_data[i], v3.vertex_data[i], uv.x);
+ result.vertex_data[i] = mix(e, f, uv.y);
+ }
+ return result;
+}
+
+void add_with_weight(inout Vertex v0, Vertex v1, float weight)
+{
+ for (int i = 0; i < DIMENSIONS; i++) {
+ v0.vertex_data[i] += v1.vertex_data[i] * weight;
+ }
+}
+
+Vertex average(Vertex v0, Vertex v1)
+{
+ Vertex result;
+ for (int i = 0; i < DIMENSIONS; i++) {
+ result.vertex_data[i] = (v0.vertex_data[i] + v1.vertex_data[i]) * 0.5;
+ }
+ return result;
+}
+
+uint get_vertex_count(uint coarse_polygon)
+{
+ uint number_of_patches = face_ptex_offset[coarse_polygon + 1] - face_ptex_offset[coarse_polygon];
+ if (number_of_patches == 1) {
+ /* If there is only one patch for the current coarse polygon, then it is a quad. */
+ return 4;
+ }
+ /* Otherwise, the number of patches is the number of vertices. */
+ return number_of_patches;
+}
+
+uint get_polygon_corner_index(uint coarse_polygon, uint patch_index)
+{
+ uint patch_offset = face_ptex_offset[coarse_polygon];
+ return patch_index - patch_offset;
+}
+
+uint get_loop_start(uint coarse_polygon)
+{
+ return extra_coarse_face_data[coarse_polygon] & coarse_face_loopstart_mask;
+}
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint start_loop_index = quad_index * 4;
+
+ /* Find which coarse polygon we came from. */
+ uint coarse_polygon = coarse_polygon_index_from_subdiv_quad_index(quad_index, coarse_poly_count);
+ uint loop_start = get_loop_start(coarse_polygon);
+
+ /* Find the number of vertices for the coarse polygon. */
+ Vertex v0, v1, v2, v3;
+ clear(v0);
+ clear(v1);
+ clear(v2);
+ clear(v3);
+
+ uint number_of_vertices = get_vertex_count(coarse_polygon);
+ if (number_of_vertices == 4) {
+ /* Interpolate the src data. */
+ v0 = read_vertex(loop_start + 0);
+ v1 = read_vertex(loop_start + 1);
+ v2 = read_vertex(loop_start + 2);
+ v3 = read_vertex(loop_start + 3);
+ }
+ else {
+ /* Interpolate the src data for the center. */
+ uint loop_end = loop_start + number_of_vertices - 1;
+ Vertex center_value;
+ clear(center_value);
+
+ float weight = 1.0 / float(number_of_vertices);
+
+ for (uint l = loop_start; l < loop_end; l++) {
+ add_with_weight(center_value, read_vertex(l), weight);
+ }
+
+ /* Interpolate between the previous and next corner for the middle values for the edges. */
+ uint patch_index = uint(patch_coords[start_loop_index].patch_index);
+ uint current_coarse_corner = get_polygon_corner_index(coarse_polygon, patch_index);
+ uint next_coarse_corner = (current_coarse_corner + 1) % number_of_vertices;
+ uint prev_coarse_corner = (current_coarse_corner + number_of_vertices - 1) %
+ number_of_vertices;
+
+ v0 = read_vertex(loop_start);
+ v1 = average(v0, read_vertex(loop_start + next_coarse_corner));
+ v3 = average(v0, read_vertex(loop_start + prev_coarse_corner));
+
+ /* Interpolate between the current value, and the ones for the center and mid-edges. */
+ v2 = center_value;
+ }
+
+ /* Do a linear interpolation of the data based on the UVs for each loop of this subdivided quad.
+ */
+ for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+ BlenderPatchCoord co = patch_coords[loop_index];
+ vec2 uv = decode_uv(co.encoded_uv);
+ /* NOTE: v2 and v3 are reversed to stay consistent with the interpolation weight on the x-axis:
+ *
+ * v3 +-----+ v2
+ * | |
+ * | |
+ * v0 +-----+ v1
+ *
+ * otherwise, weight would be `1.0 - uv.x` for `v2 <-> v3`, but `uv.x` for `v0 <-> v1`.
+ */
+ Vertex result = interp_vertex(v0, v1, v3, v2, uv);
+ write_vertex(loop_index, result);
+ }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
new file mode 100644
index 00000000000..f11c0f6427e
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
@@ -0,0 +1,57 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputEdgeOrigIndex
+{
+ int input_origindex[];
+};
+
+layout(std430, binding = 1) writeonly buffer outputLinesIndices
+{
+ uint output_lines[];
+};
+
+#ifndef LINES_LOOSE
+void emit_line(uint line_offset, uint start_loop_index, uint corner_index)
+{
+ uint vertex_index = start_loop_index + corner_index;
+
+ if (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display) {
+ output_lines[line_offset + 0] = 0xffffffff;
+ output_lines[line_offset + 1] = 0xffffffff;
+ }
+ else {
+ /* Mod 4 so we loop back at the first vertex on the last loop index (3). */
+ uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+
+ output_lines[line_offset + 0] = vertex_index;
+ output_lines[line_offset + 1] = next_vertex_index;
+ }
+}
+#endif
+
+void main()
+{
+ uint index = get_global_invocation_index();
+ if (index >= total_dispatch_size) {
+ return;
+ }
+
+#ifdef LINES_LOOSE
+ /* In the loose lines case, we execute for each line, with two vertices per line. */
+ uint line_offset = edge_loose_offset + index * 2;
+ uint loop_index = num_subdiv_loops + index * 2;
+ output_lines[line_offset] = loop_index;
+ output_lines[line_offset + 1] = loop_index + 1;
+#else
+ /* We execute for each quad, so the start index of the loop is quad_index * 4. */
+ uint start_loop_index = index * 4;
+ /* We execute for each quad, so the start index of the line is quad_index * 8 (with 2 vertices
+ * per line). */
+ uint start_line_index = index * 8;
+
+ for (int i = 0; i < 4; i++) {
+ emit_line(start_line_index + i * 2, start_loop_index, i);
+ }
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
new file mode 100644
index 00000000000..3257ebdae17
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -0,0 +1,43 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Generate triangles from subdivision quads indices. */
+
+layout(std430, binding = 1) writeonly buffer outputTriangles
+{
+ uint output_tris[];
+};
+
+#ifndef SINGLE_MATERIAL
+layout(std430, binding = 2) readonly buffer inputPolygonMatOffset
+{
+ int polygon_mat_offset[];
+};
+#endif
+
+void main()
+{
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint loop_index = quad_index * 4;
+
+#ifdef SINGLE_MATERIAL
+ uint triangle_loop_index = quad_index * 6;
+#else
+ uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+ coarse_poly_count);
+ int mat_offset = polygon_mat_offset[coarse_quad_index];
+
+ int triangle_loop_index = (int(quad_index) + mat_offset) * 6;
+#endif
+
+ output_tris[triangle_loop_index + 0] = loop_index + 0;
+ output_tris[triangle_loop_index + 1] = loop_index + 1;
+ output_tris[triangle_loop_index + 2] = loop_index + 2;
+ output_tris[triangle_loop_index + 3] = loop_index + 0;
+ output_tris[triangle_loop_index + 4] = loop_index + 2;
+ output_tris[triangle_loop_index + 5] = loop_index + 3;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
new file mode 100644
index 00000000000..005561964b8
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
@@ -0,0 +1,176 @@
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+/* Uniform block for #DRWSubivUboStorage. */
+layout(std140) uniform shader_data
+{
+ /* Offsets in the buffers data where the source and destination data start. */
+ int src_offset;
+ int dst_offset;
+
+ /* Parameters for the DRWPatchMap. */
+ int min_patch_face;
+ int max_patch_face;
+ int max_depth;
+ int patches_are_triangular;
+
+ /* Coarse topology information. */
+ int coarse_poly_count;
+ uint edge_loose_offset;
+
+ /* Subdiv topology information. */
+ uint num_subdiv_loops;
+
+ /* Subdivision settings. */
+ bool optimal_display;
+
+ /* Sculpt data. */
+ bool has_sculpt_mask;
+
+ /* Masks for the extra coarse face data. */
+ uint coarse_face_select_mask;
+ uint coarse_face_smooth_mask;
+ uint coarse_face_active_mask;
+ uint coarse_face_loopstart_mask;
+
+ /* Total number of elements to process. */
+ uint total_dispatch_size;
+};
+
+uint get_global_invocation_index()
+{
+ uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
+ return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
+}
+
+/* Structure for #CompressedPatchCoord. */
+struct BlenderPatchCoord {
+ int patch_index;
+ uint encoded_uv;
+};
+
+vec2 decode_uv(uint encoded_uv)
+{
+ float u = float((encoded_uv >> 16) & 0xFFFFu) / 65535.0;
+ float v = float(encoded_uv & 0xFFFFu) / 65535.0;
+ return vec2(u, v);
+}
+
+/* This structure is a carbon copy of OpenSubDiv's PatchTable::PatchHandle. */
+struct PatchHandle {
+ int array_index;
+ int patch_index;
+ int vertex_index;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord. */
+struct PatchCoord {
+ int array_index;
+ int patch_index;
+ int vertex_index;
+ float u;
+ float v;
+};
+
+/* This structure is a carbon copy of OpenSubDiv's PatchCoord.QuadNode.
+ * Each child is a bitfield. */
+struct QuadNode {
+ uvec4 child;
+};
+
+bool is_set(uint i)
+{
+ /* QuadNode.Child.isSet is the first bit of the bitfield. */
+ return (i & 0x1u) != 0;
+}
+
+bool is_leaf(uint i)
+{
+ /* QuadNode.Child.isLeaf is the second bit of the bitfield. */
+ return (i & 0x2u) != 0;
+}
+
+uint get_index(uint i)
+{
+ /* QuadNode.Child.index is made of the remaining bits. */
+ return (i >> 2) & 0x3FFFFFFFu;
+}
+
+/* Duplicate of #PosNorLoop from the mesh extract CPU code.
+ * We do not use a vec3 for the position as it will be padded to a vec4 which is incompatible with
+ * the format. */
+struct PosNorLoop {
+ float x, y, z;
+ /* TODO(kevindietrich) : figure how to compress properly as GLSL does not have char/short types,
+ * bit operations get tricky. */
+ float nx, ny, nz;
+ float flag;
+};
+
+vec3 get_vertex_pos(PosNorLoop vertex_data)
+{
+ return vec3(vertex_data.x, vertex_data.y, vertex_data.z);
+}
+
+vec3 get_vertex_nor(PosNorLoop vertex_data)
+{
+ return vec3(vertex_data.nx, vertex_data.ny, vertex_data.nz);
+}
+
+void set_vertex_pos(inout PosNorLoop vertex_data, vec3 pos)
+{
+ vertex_data.x = pos.x;
+ vertex_data.y = pos.y;
+ vertex_data.z = pos.z;
+}
+
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor, uint flag)
+{
+ vertex_data.nx = nor.x;
+ vertex_data.ny = nor.y;
+ vertex_data.nz = nor.z;
+ vertex_data.flag = float(flag);
+}
+
+/* Set the vertex normal but preserve the existing flag. This is for when we compute manually the
+ * vertex normals when we cannot use the limit surface, in which case the flag and the normal are
+ * set by two separate compute pass. */
+void set_vertex_nor(inout PosNorLoop vertex_data, vec3 nor)
+{
+ set_vertex_nor(vertex_data, nor, 0);
+}
+
+#define ORIGINDEX_NONE -1
+
+#ifdef SUBDIV_POLYGON_OFFSET
+layout(std430, binding = 0) readonly buffer inputSubdivPolygonOffset
+{
+ uint subdiv_polygon_offset[];
+};
+
+/* Given the index of the subdivision quad, return the index of the corresponding coarse polygon.
+ * This uses subdiv_polygon_offset and since it is a growing list of offsets, we can use binary
+ * search to locate the right index. */
+uint coarse_polygon_index_from_subdiv_quad_index(uint subdiv_quad_index, uint coarse_poly_count)
+{
+ uint first = 0;
+ uint last = coarse_poly_count;
+
+ while (first != last) {
+ uint middle = (first + last) / 2;
+
+ if (subdiv_polygon_offset[middle] < subdiv_quad_index) {
+ first = middle + 1;
+ }
+ else {
+ last = middle;
+ }
+ }
+
+ if (subdiv_polygon_offset[first] == subdiv_quad_index) {
+ return first;
+ }
+
+ return first - 1;
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
new file mode 100644
index 00000000000..575090472b1
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_accumulate_comp.glsl
@@ -0,0 +1,56 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+ PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer faceAdjacencyOffsets
+{
+ uint face_adjacency_offsets[];
+};
+
+layout(std430, binding = 2) readonly buffer faceAdjacencyLists
+{
+ uint face_adjacency_lists[];
+};
+
+layout(std430, binding = 3) writeonly buffer vertexNormals
+{
+ vec3 normals[];
+};
+
+void main()
+{
+ uint vertex_index = get_global_invocation_index();
+ if (vertex_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint first_adjacent_face_offset = face_adjacency_offsets[vertex_index];
+ uint number_of_adjacent_faces = face_adjacency_offsets[vertex_index + 1] -
+ first_adjacent_face_offset;
+
+ vec3 accumulated_normal = vec3(0.0);
+
+ /* For each adjacent face. */
+ for (uint i = 0; i < number_of_adjacent_faces; i++) {
+ uint adjacent_face = face_adjacency_lists[first_adjacent_face_offset + i];
+ uint start_loop_index = adjacent_face * 4;
+
+ /* Compute face normal. */
+ vec3 adjacent_verts[3];
+ for (uint j = 0; j < 3; j++) {
+ adjacent_verts[j] = get_vertex_pos(pos_nor[start_loop_index + j]);
+ }
+
+ vec3 face_normal = normalize(
+ cross(adjacent_verts[1] - adjacent_verts[0], adjacent_verts[2] - adjacent_verts[0]));
+ accumulated_normal += face_normal;
+ }
+
+ float weight = 1.0 / float(number_of_adjacent_faces);
+ vec3 normal = normalize(accumulated_normal);
+ normals[vertex_index] = normal;
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
new file mode 100644
index 00000000000..84cd65d4161
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_normals_finalize_comp.glsl
@@ -0,0 +1,34 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputNormals
+{
+ vec3 vertex_normals[];
+};
+
+layout(std430, binding = 1) readonly buffer inputSubdivVertLoopMap
+{
+ uint vert_loop_map[];
+};
+
+layout(std430, binding = 2) buffer outputPosNor
+{
+ PosNorLoop pos_nor[];
+};
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint start_loop_index = quad_index * 4;
+
+ for (int i = 0; i < 4; i++) {
+ uint subdiv_vert_index = vert_loop_map[start_loop_index + i];
+ vec3 nor = vertex_normals[subdiv_vert_index];
+ set_vertex_nor(pos_nor[start_loop_index + i], nor);
+ }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
new file mode 100644
index 00000000000..5dd7decf663
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
@@ -0,0 +1,416 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+/* Source buffer. */
+layout(std430, binding = 0) buffer src_buffer
+{
+ float srcVertexBuffer[];
+};
+
+/* #DRWPatchMap */
+layout(std430, binding = 1) readonly buffer inputPatchHandles
+{
+ PatchHandle input_patch_handles[];
+};
+
+layout(std430, binding = 2) readonly buffer inputQuadNodes
+{
+ QuadNode quad_nodes[];
+};
+
+layout(std430, binding = 3) readonly buffer inputPatchCoords
+{
+ BlenderPatchCoord patch_coords[];
+};
+
+layout(std430, binding = 4) readonly buffer inputVertOrigIndices
+{
+ int input_vert_origindex[];
+};
+
+/* Patch buffers. */
+layout(std430, binding = 5) buffer patchArray_buffer
+{
+ OsdPatchArray patchArrayBuffer[];
+};
+
+layout(std430, binding = 6) buffer patchIndex_buffer
+{
+ int patchIndexBuffer[];
+};
+
+layout(std430, binding = 7) buffer patchParam_buffer
+{
+ OsdPatchParam patchParamBuffer[];
+};
+
+ /* Output buffer(s). */
+
+#if defined(FVAR_EVALUATION)
+layout(std430, binding = 8) writeonly buffer outputFVarData
+{
+ vec2 output_fvar[];
+};
+#elif defined(FDOTS_EVALUATION)
+/* For face dots, we build the position, normals, and index buffers in one go. */
+
+/* vec3 is padded to vec4, but the format used for fdots does not have any padding. */
+struct FDotVert {
+ float x, y, z;
+};
+
+/* Same here, do not use vec3. */
+struct FDotNor {
+ float x, y, z;
+ float flag;
+};
+
+layout(std430, binding = 8) writeonly buffer outputVertices
+{
+ FDotVert output_verts[];
+};
+
+layout(std430, binding = 9) writeonly buffer outputNormals
+{
+ FDotNor output_nors[];
+};
+
+layout(std430, binding = 10) writeonly buffer outputFdotsIndices
+{
+ uint output_indices[];
+};
+
+layout(std430, binding = 11) readonly buffer extraCoarseFaceData
+{
+ uint extra_coarse_face_data[];
+};
+#else
+layout(std430, binding = 8) writeonly buffer outputVertexData
+{
+ PosNorLoop output_verts[];
+};
+#endif
+
+vec2 read_vec2(int index)
+{
+ vec2 result;
+ result.x = srcVertexBuffer[index * 2];
+ result.y = srcVertexBuffer[index * 2 + 1];
+ return result;
+}
+
+vec3 read_vec3(int index)
+{
+ vec3 result;
+ result.x = srcVertexBuffer[index * 3];
+ result.y = srcVertexBuffer[index * 3 + 1];
+ result.z = srcVertexBuffer[index * 3 + 2];
+ return result;
+}
+
+OsdPatchArray GetPatchArray(int arrayIndex)
+{
+ return patchArrayBuffer[arrayIndex];
+}
+
+OsdPatchParam GetPatchParam(int patchIndex)
+{
+ return patchParamBuffer[patchIndex];
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch Coordinate lookup. Return an OsdPatchCoord for the given patch_index and uvs.
+ * This code is a port of the OpenSubdiv PatchMap lookup code.
+ */
+
+PatchHandle bogus_patch_handle()
+{
+ PatchHandle ret;
+ ret.array_index = -1;
+ ret.vertex_index = -1;
+ ret.patch_index = -1;
+ return ret;
+}
+
+int transformUVToQuadQuadrant(float median, inout float u, inout float v)
+{
+ int uHalf = (u >= median) ? 1 : 0;
+ if (uHalf != 0)
+ u -= median;
+
+ int vHalf = (v >= median) ? 1 : 0;
+ if (vHalf != 0)
+ v -= median;
+
+ return (vHalf << 1) | uHalf;
+}
+
+int transformUVToTriQuadrant(float median, inout float u, inout float v, inout bool rotated)
+{
+
+ if (!rotated) {
+ if (u >= median) {
+ u -= median;
+ return 1;
+ }
+ if (v >= median) {
+ v -= median;
+ return 2;
+ }
+ if ((u + v) >= median) {
+ rotated = true;
+ return 3;
+ }
+ return 0;
+ }
+ else {
+ if (u < median) {
+ v -= median;
+ return 1;
+ }
+ if (v < median) {
+ u -= median;
+ return 2;
+ }
+ u -= median;
+ v -= median;
+ if ((u + v) < median) {
+ rotated = false;
+ return 3;
+ }
+ return 0;
+ }
+}
+
+PatchHandle find_patch(int face_index, float u, float v)
+{
+ if (face_index < min_patch_face || face_index > max_patch_face) {
+ return bogus_patch_handle();
+ }
+
+ QuadNode node = quad_nodes[face_index - min_patch_face];
+
+ if (!is_set(node.child[0])) {
+ return bogus_patch_handle();
+ }
+
+ float median = 0.5;
+ bool tri_rotated = false;
+
+ for (int depth = 0; depth <= max_depth; ++depth, median *= 0.5) {
+ int quadrant = (patches_are_triangular != 0) ?
+ transformUVToTriQuadrant(median, u, v, tri_rotated) :
+ transformUVToQuadQuadrant(median, u, v);
+
+ if (is_leaf(node.child[quadrant])) {
+ return input_patch_handles[get_index(node.child[quadrant])];
+ }
+
+ node = quad_nodes[get_index(node.child[quadrant])];
+ }
+}
+
+OsdPatchCoord bogus_patch_coord(int face_index, float u, float v)
+{
+ OsdPatchCoord coord;
+ coord.arrayIndex = 0;
+ coord.patchIndex = face_index;
+ coord.vertIndex = 0;
+ coord.s = u;
+ coord.t = v;
+ return coord;
+}
+
+OsdPatchCoord GetPatchCoord(int face_index, float u, float v)
+{
+ PatchHandle patch_handle = find_patch(face_index, u, v);
+
+ if (patch_handle.array_index == -1) {
+ return bogus_patch_coord(face_index, u, v);
+ }
+
+ OsdPatchCoord coord;
+ coord.arrayIndex = patch_handle.array_index;
+ coord.patchIndex = patch_handle.patch_index;
+ coord.vertIndex = patch_handle.vertex_index;
+ coord.s = u;
+ coord.t = v;
+ return coord;
+}
+
+/* ------------------------------------------------------------------------------
+ * Patch evaluation. Note that the 1st and 2nd derivatives are always computed, although we
+ * only return and use the 1st derivatives if adaptive patches are used. This could
+ * perhaps be optimized.
+ */
+
+#if defined(FVAR_EVALUATION)
+void evaluate_patches_limits(int patch_index, float u, float v, inout vec2 dst)
+{
+ OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+ OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+ OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+ int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+ float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+ int nPoints = OsdEvaluatePatchBasis(
+ patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+ int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+ for (int cv = 0; cv < nPoints; ++cv) {
+ int index = patchIndexBuffer[indexBase + cv];
+ vec2 src_fvar = read_vec2(src_offset + index);
+ dst += src_fvar * wP[cv];
+ }
+}
+#else
+void evaluate_patches_limits(
+ int patch_index, float u, float v, inout vec3 dst, inout vec3 du, inout vec3 dv)
+{
+ OsdPatchCoord coord = GetPatchCoord(patch_index, u, v);
+ OsdPatchArray array = GetPatchArray(coord.arrayIndex);
+ OsdPatchParam param = GetPatchParam(coord.patchIndex);
+
+ int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc;
+
+ float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
+ int nPoints = OsdEvaluatePatchBasis(
+ patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv);
+
+ int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase);
+
+ for (int cv = 0; cv < nPoints; ++cv) {
+ int index = patchIndexBuffer[indexBase + cv];
+ vec3 src_vertex = read_vec3(index);
+
+ dst += src_vertex * wP[cv];
+ du += src_vertex * wDu[cv];
+ dv += src_vertex * wDv[cv];
+ }
+}
+#endif
+
+/* ------------------------------------------------------------------------------
+ * Entry point.
+ */
+
+#if defined(FVAR_EVALUATION)
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint start_loop_index = quad_index * 4;
+
+ for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+ vec2 fvar = vec2(0.0);
+
+ BlenderPatchCoord patch_co = patch_coords[loop_index];
+ vec2 uv = decode_uv(patch_co.encoded_uv);
+
+ evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, fvar);
+ output_fvar[dst_offset + loop_index] = fvar;
+ }
+}
+#elif defined(FDOTS_EVALUATION)
+bool is_face_selected(uint coarse_quad_index)
+{
+ return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0;
+}
+
+bool is_face_active(uint coarse_quad_index)
+{
+ return (extra_coarse_face_data[coarse_quad_index] & coarse_face_active_mask) != 0;
+}
+
+float get_face_flag(uint coarse_quad_index)
+{
+ if (is_face_active(coarse_quad_index)) {
+ return -1.0;
+ }
+
+ if (is_face_selected(coarse_quad_index)) {
+ return 1.0;
+ }
+
+ return 0.0;
+}
+
+void main()
+{
+ /* We execute for each coarse quad. */
+ uint coarse_quad_index = get_global_invocation_index();
+ if (coarse_quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ BlenderPatchCoord patch_co = patch_coords[coarse_quad_index];
+ vec2 uv = decode_uv(patch_co.encoded_uv);
+
+ vec3 pos = vec3(0.0);
+ vec3 du = vec3(0.0);
+ vec3 dv = vec3(0.0);
+ evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+ vec3 nor = normalize(cross(du, dv));
+
+ FDotVert vert;
+ vert.x = pos.x;
+ vert.y = pos.y;
+ vert.z = pos.z;
+
+ FDotNor fnor;
+ fnor.x = nor.x;
+ fnor.y = nor.y;
+ fnor.z = nor.z;
+ fnor.flag = get_face_flag(coarse_quad_index);
+
+ output_verts[coarse_quad_index] = vert;
+ output_nors[coarse_quad_index] = fnor;
+ output_indices[coarse_quad_index] = coarse_quad_index;
+}
+#else
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint start_loop_index = quad_index * 4;
+
+ for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+ vec3 pos = vec3(0.0);
+ vec3 du = vec3(0.0);
+ vec3 dv = vec3(0.0);
+
+ BlenderPatchCoord patch_co = patch_coords[loop_index];
+ vec2 uv = decode_uv(patch_co.encoded_uv);
+
+ evaluate_patches_limits(patch_co.patch_index, uv.x, uv.y, pos, du, dv);
+
+# if defined(LIMIT_NORMALS)
+ vec3 nor = normalize(cross(du, dv));
+# else
+ /* This will be computed later. */
+ vec3 nor = vec3(0.0);
+# endif
+
+ int origindex = input_vert_origindex[loop_index];
+ uint flag = 0;
+ if (origindex == -1) {
+ flag = -1;
+ }
+
+ PosNorLoop vertex_data;
+ set_vertex_pos(vertex_data, pos);
+ set_vertex_nor(vertex_data, nor, flag);
+ output_verts[loop_index] = vertex_data;
+ }
+}
+#endif
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
new file mode 100644
index 00000000000..6c76cd41ca4
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edge_fac_comp.glsl
@@ -0,0 +1,97 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVertexData
+{
+ PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputEdgeIndex
+{
+ uint input_edge_index[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputEdgeFactors
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+ float output_edge_fac[];
+#else
+ uint output_edge_fac[];
+#endif
+};
+
+void write_vec4(uint index, vec4 edge_facs)
+{
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+ for (uint i = 0; i < 4; i++) {
+ output_edge_fac[index + i] = edge_facs[i];
+ }
+#else
+ /* Use same scaling as in extract_edge_fac_iter_poly_mesh. */
+ uint a = uint(clamp(edge_facs.x * 253.0 + 1.0, 0.0, 255.0));
+ uint b = uint(clamp(edge_facs.y * 253.0 + 1.0, 0.0, 255.0));
+ uint c = uint(clamp(edge_facs.z * 253.0 + 1.0, 0.0, 255.0));
+ uint d = uint(clamp(edge_facs.w * 253.0 + 1.0, 0.0, 255.0));
+ uint packed_edge_fac = a << 24 | b << 16 | c << 8 | d;
+ output_edge_fac[index] = packed_edge_fac;
+#endif
+}
+
+/* From extract_mesh_vbo_edge_fac.cc, keep in sync! */
+float loop_edge_factor_get(vec3 f_no, vec3 v_co, vec3 v_no, vec3 v_next_co)
+{
+ vec3 evec = v_next_co - v_co;
+ vec3 enor = normalize(cross(v_no, evec));
+ float d = abs(dot(enor, f_no));
+ /* Re-scale to the slider range. */
+ d *= (1.0 / 0.065);
+ return clamp(d, 0.0, 1.0);
+}
+
+float compute_line_factor(uint start_loop_index, uint corner_index, vec3 face_normal)
+{
+ uint vertex_index = start_loop_index + corner_index;
+ uint edge_index = input_edge_index[vertex_index];
+
+ if (edge_index == -1 && optimal_display) {
+ return 0.0;
+ }
+
+ /* Mod 4 so we loop back at the first vertex on the last loop index (3), but only the corner
+ * index needs to be wrapped. */
+ uint next_vertex_index = start_loop_index + (corner_index + 1) % 4;
+ vec3 vertex_pos = get_vertex_pos(pos_nor[vertex_index]);
+ vec3 vertex_nor = get_vertex_nor(pos_nor[vertex_index]);
+ vec3 next_vertex_pos = get_vertex_pos(pos_nor[next_vertex_index]);
+ return loop_edge_factor_get(face_normal, vertex_pos, vertex_nor, next_vertex_pos);
+}
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ /* The start index of the loop is quad_index * 4. */
+ uint start_loop_index = quad_index * 4;
+
+ /* First compute the face normal, we need it to compute the bihedral edge angle. */
+ vec3 v0 = get_vertex_pos(pos_nor[start_loop_index + 0]);
+ vec3 v1 = get_vertex_pos(pos_nor[start_loop_index + 1]);
+ vec3 v2 = get_vertex_pos(pos_nor[start_loop_index + 2]);
+ vec3 face_normal = normalize(cross(v1 - v0, v2 - v0));
+
+ vec4 edge_facs = vec4(0.0);
+ for (int i = 0; i < 4; i++) {
+ edge_facs[i] = compute_line_factor(start_loop_index, i, face_normal);
+ }
+
+#ifdef GPU_AMD_DRIVER_BYTE_BUG
+ write_vec4(start_loop_index, edge_facs);
+#else
+ /* When packed into bytes, the index is the same as for the quad. */
+ write_vec4(quad_index, edge_facs);
+#endif
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
new file mode 100644
index 00000000000..ea73b9482d3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_angle_comp.glsl
@@ -0,0 +1,80 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 0) readonly buffer inputVerts
+{
+ PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 1) readonly buffer inputUVs
+{
+ vec2 uvs[];
+};
+
+/* Mirror of #UVStretchAngle in the C++ code, but using floats until proper data compression
+ * is implemented for all subdivision data. */
+struct UVStretchAngle {
+ float angle;
+ float uv_angle0;
+ float uv_angle1;
+};
+
+layout(std430, binding = 2) writeonly buffer outputStretchAngles
+{
+ UVStretchAngle uv_stretches[];
+};
+
+#define M_PI 3.1415926535897932
+#define M_1_PI 0.31830988618379067154
+
+/* Adapted from BLI_math_vector.h */
+float angle_normalized_v3v3(vec3 v1, vec3 v2)
+{
+ /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */
+ bool q = (dot(v1, v2) >= 0.0);
+ vec3 v = (q) ? (v1 - v2) : (v1 + v2);
+ float a = 2.0 * asin(length(v) / 2.0);
+ return (q) ? a : M_PI - a;
+}
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint start_loop_index = quad_index * 4;
+
+ for (uint i = 0; i < 4; i++) {
+ uint cur_loop_index = start_loop_index + i;
+ uint next_loop_index = start_loop_index + (i + 1) % 4;
+ uint prev_loop_index = start_loop_index + (i + 3) % 4;
+
+ /* Compute 2d edge vectors from UVs. */
+ vec2 cur_uv = uvs[src_offset + cur_loop_index];
+ vec2 next_uv = uvs[src_offset + next_loop_index];
+ vec2 prev_uv = uvs[src_offset + prev_loop_index];
+
+ vec2 norm_uv_edge0 = normalize(prev_uv - cur_uv);
+ vec2 norm_uv_edge1 = normalize(cur_uv - next_uv);
+
+ /* Compute 3d edge vectors from positions. */
+ vec3 cur_pos = get_vertex_pos(pos_nor[cur_loop_index]);
+ vec3 next_pos = get_vertex_pos(pos_nor[next_loop_index]);
+ vec3 prev_pos = get_vertex_pos(pos_nor[prev_loop_index]);
+
+ vec3 norm_pos_edge0 = normalize(prev_pos - cur_pos);
+ vec3 norm_pos_edge1 = normalize(cur_pos - next_pos);
+
+ /* Compute stretches, this logic is adapted from #edituv_get_edituv_stretch_angle.
+ * Keep in sync! */
+ UVStretchAngle stretch;
+ stretch.uv_angle0 = atan(norm_uv_edge0.y, norm_uv_edge0.x) * M_1_PI;
+ stretch.uv_angle1 = atan(norm_uv_edge1.y, norm_uv_edge1.x) * M_1_PI;
+ stretch.angle = angle_normalized_v3v3(norm_pos_edge0, norm_pos_edge1) * M_1_PI;
+
+ uv_stretches[cur_loop_index] = stretch;
+ }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
new file mode 100644
index 00000000000..e897fb3f3c0
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl
@@ -0,0 +1,31 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputCoarseData
+{
+ float coarse_stretch_area[];
+};
+
+layout(std430, binding = 2) writeonly buffer outputSubdivData
+{
+ float subdiv_stretch_area[];
+};
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ /* The start index of the loop is quad_index * 4. */
+ uint start_loop_index = quad_index * 4;
+
+ uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+ coarse_poly_count);
+
+ for (int i = 0; i < 4; i++) {
+ subdiv_stretch_area[start_loop_index + i] = coarse_stretch_area[coarse_quad_index];
+ }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
new file mode 100644
index 00000000000..41a8df3cf82
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
@@ -0,0 +1,52 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+layout(std430, binding = 1) readonly buffer inputVertexData
+{
+ PosNorLoop pos_nor[];
+};
+
+layout(std430, binding = 2) readonly buffer extraCoarseFaceData
+{
+ uint extra_coarse_face_data[];
+};
+
+layout(std430, binding = 3) writeonly buffer outputLoopNormals
+{
+ vec3 output_lnor[];
+};
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ /* The start index of the loop is quad_index * 4. */
+ uint start_loop_index = quad_index * 4;
+
+ uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
+ coarse_poly_count);
+
+ if ((extra_coarse_face_data[coarse_quad_index] & coarse_face_smooth_mask) != 0) {
+ /* Face is smooth, use vertex normals. */
+ for (int i = 0; i < 4; i++) {
+ PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i];
+ output_lnor[start_loop_index + i] = get_vertex_nor(pos_nor_loop);
+ }
+ }
+ else {
+ /* Face is flat shaded, compute flat face normal from an inscribed triangle. */
+ vec3 verts[3];
+ for (int i = 0; i < 3; i++) {
+ verts[i] = get_vertex_pos(pos_nor[start_loop_index + i]);
+ }
+
+ vec3 face_normal = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+ for (int i = 0; i < 4; i++) {
+ output_lnor[start_loop_index + i] = face_normal;
+ }
+ }
+}
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
new file mode 100644
index 00000000000..7182ce57ad3
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
@@ -0,0 +1,47 @@
+
+/* To be compile with common_subdiv_lib.glsl */
+
+struct SculptData {
+ uint face_set_color;
+ float mask;
+};
+
+layout(std430, binding = 0) readonly restrict buffer sculptMask
+{
+ float sculpt_mask[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer faceSetColor
+{
+ uint face_set_color[];
+};
+
+layout(std430, binding = 2) writeonly restrict buffer sculptData
+{
+ SculptData sculpt_data[];
+};
+
+void main()
+{
+ /* We execute for each quad. */
+ uint quad_index = get_global_invocation_index();
+ if (quad_index >= total_dispatch_size) {
+ return;
+ }
+
+ uint start_loop_index = quad_index * 4;
+
+ for (uint loop_index = start_loop_index; loop_index < start_loop_index + 4; loop_index++) {
+ SculptData data;
+ data.face_set_color = face_set_color[loop_index];
+
+ if (has_sculpt_mask) {
+ data.mask = sculpt_mask[loop_index];
+ }
+ else {
+ data.mask = 0.0;
+ }
+
+ sculpt_data[loop_index] = data;
+ }
+}