1 files changed, 112 insertions, 7 deletions
diff --git a/source/blender/gpu/intern/gpu_vertex_format.cc b/source/blender/gpu/intern/gpu_vertex_format.cc
index 59ae862aa51..897e80293bf 100644
--- a/source/blender/gpu/intern/gpu_vertex_format.cc
+++ b/source/blender/gpu/intern/gpu_vertex_format.cc
@@ -8,6 +8,9 @@
  */
 
 #include "GPU_vertex_format.h"
+#include "GPU_capabilities.h"
+
+#include "gpu_shader_create_info.hh"
 #include "gpu_shader_private.hh"
 #include "gpu_vertex_format_private.h"
 
@@ -25,6 +28,7 @@
 #endif
 
 using namespace blender::gpu;
+using namespace blender::gpu::shader;
 
 void GPU_vertformat_clear(GPUVertFormat *format)
 {
@@ -66,7 +70,7 @@ static uint attr_size(const GPUVertAttr *a)
   return a->comp_len * comp_size(static_cast<GPUVertCompType>(a->comp_type));
 }
 
-static uint attr_align(const GPUVertAttr *a)
+static uint attr_align(const GPUVertAttr *a, uint minimum_stride)
 {
   if (a->comp_type == GPU_COMP_I10) {
     return 4; /* always packed as 10_10_10_2 */
@@ -76,7 +80,10 @@ static uint attr_align(const GPUVertAttr *a)
     return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */
   }
 
-  return c; /* most fetches are ok if components are naturally aligned */
+  /* Most fetches are ok if components are naturally aligned.
+   * However, in Metal,the minimum supported per-vertex stride is 4,
+   * so we must query the GPU and pad out the size accordingly. */
+  return max_ii(minimum_stride, c);
 }
 
 uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len)
@@ -306,7 +313,7 @@ static void show_pack(uint a_idx, uint size, uint pad)
 }
 #endif
 
-void VertexFormat_pack(GPUVertFormat *format)
+static void VertexFormat_pack_impl(GPUVertFormat *format, uint minimum_stride)
 {
   GPUVertAttr *a0 = &format->attrs[0];
   a0->offset = 0;
@@ -318,7 +325,7 @@ void VertexFormat_pack(GPUVertFormat *format)
 
   for (uint a_idx = 1; a_idx < format->attr_len; a_idx++) {
     GPUVertAttr *a = &format->attrs[a_idx];
-    uint mid_padding = padding(offset, attr_align(a));
+    uint mid_padding = padding(offset, attr_align(a, minimum_stride));
     offset += mid_padding;
     a->offset = offset;
     offset += a->size;
@@ -328,7 +335,7 @@ void VertexFormat_pack(GPUVertFormat *format)
 #endif
   }
 
-  uint end_padding = padding(offset, attr_align(a0));
+  uint end_padding = padding(offset, attr_align(a0, minimum_stride));
 
 #if PACK_DEBUG
   show_pack(0, 0, end_padding);
@@ -338,8 +345,106 @@ void VertexFormat_pack(GPUVertFormat *format)
   format->packed = true;
 }
 
+void VertexFormat_pack(GPUVertFormat *format)
+{
+  /* Perform standard vertex packing, ensuring vertex format satisfies
+   * minimum stride requirements for vertex assembly. */
+  VertexFormat_pack_impl(format, GPU_minimum_per_vertex_stride());
+}
+
+void VertexFormat_texture_buffer_pack(GPUVertFormat *format)
+{
+  /* Validates packing for vertex formats used with texture buffers.
+   * In these cases, there must only be a single vertex attribute.
+   * This attribute should be tightly packed without padding, to ensure
+   * it aligns with the backing texture data format, skipping
+   * minimum per-vertex stride, which mandates 4-byte alignment in Metal.
+   * This additional alignment padding caused smaller data types, e.g. U16,
+   * to mis-align. */
+  BLI_assert_msg(format->attr_len == 1,
+                 "Texture buffer mode should only use a single vertex attribute.");
+
+  /* Pack vertex format without minimum stride, as this is not required by texture buffers. */
+  VertexFormat_pack_impl(format, 1);
+}
+
+static uint component_size_get(const Type gpu_type)
+{
+  switch (gpu_type) {
+    case Type::VEC2:
+    case Type::IVEC2:
+    case Type::UVEC2:
+      return 2;
+    case Type::VEC3:
+    case Type::IVEC3:
+    case Type::UVEC3:
+      return 3;
+    case Type::VEC4:
+    case Type::IVEC4:
+    case Type::UVEC4:
+      return 4;
+    case Type::MAT3:
+      return 12;
+    case Type::MAT4:
+      return 16;
+    default:
+      return 1;
+  }
+}
+
+static void recommended_fetch_mode_and_comp_type(Type gpu_type,
+                                                 GPUVertCompType *r_comp_type,
+                                                 GPUVertFetchMode *r_fetch_mode)
+{
+  switch (gpu_type) {
+    case Type::FLOAT:
+    case Type::VEC2:
+    case Type::VEC3:
+    case Type::VEC4:
+    case Type::MAT3:
+    case Type::MAT4:
+      *r_comp_type = GPU_COMP_F32;
+      *r_fetch_mode = GPU_FETCH_FLOAT;
+      break;
+    case Type::INT:
+    case Type::IVEC2:
+    case Type::IVEC3:
+    case Type::IVEC4:
+      *r_comp_type = GPU_COMP_I32;
+      *r_fetch_mode = GPU_FETCH_INT;
+      break;
+    case Type::UINT:
+    case Type::UVEC2:
+    case Type::UVEC3:
+    case Type::UVEC4:
+      *r_comp_type = GPU_COMP_U32;
+      *r_fetch_mode = GPU_FETCH_INT;
+      break;
+    default:
+      BLI_assert(0);
+  }
+}
+
 void GPU_vertformat_from_shader(GPUVertFormat *format, const struct GPUShader *gpushader)
 {
-  const Shader *shader = reinterpret_cast<const Shader *>(gpushader);
-  shader->vertformat_from_shader(format);
+  GPU_vertformat_clear(format);
+
+  uint attr_len = GPU_shader_get_attribute_len(gpushader);
+  int location_test = 0, attrs_added = 0;
+  while (attrs_added < attr_len) {
+    char name[256];
+    Type gpu_type;
+    if (!GPU_shader_get_attribute_info(gpushader, location_test++, name, (int *)&gpu_type)) {
+      continue;
+    }
+
+    GPUVertCompType comp_type;
+    GPUVertFetchMode fetch_mode;
+    recommended_fetch_mode_and_comp_type(gpu_type, &comp_type, &fetch_mode);
+
+    int comp_len = component_size_get(gpu_type);
+
+    GPU_vertformat_attr_add(format, name, comp_type, comp_len, fetch_mode);
+    attrs_added++;
+  }
 }