Mesh Batch Cache: Refactor + Multithread

For clarity sake, the batch cache now uses exclusively per Loop attributes. While this is a bit of a waste of VRAM (for the few case where per vert attribs are enough) it reduces the complexity and amount of overall VBO to update in general situations. This patch also makes the VertexBuffers filling multithreaded. This make the update of dense meshes a bit faster. The main bottleneck is the IndexBuffers update which cannot be multithreaded efficiently (have to increment a counter and/or do a final sorting pass). We introduce the concept of "extract" functions/step. All extract functions are executed in one thread each and if possible, using multiple thread for looping over all elements. Reviewed By: brecht Differential Revision: http://developer.blender.org/D5424
author: Clément Foucault <foucault.clem@gmail.com> 2019-07-14 17:49:44 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2019-08-14 20:05:26 +0300
commit: 9c010c44f4201ab114b3facc69d0343525a1779f (patch)
tree: 744271becd24cead6c900e4f023d8c0bfa6138b6 /source/blender/gpu/GPU_vertex_format.h
parent: 45a45f7d66211e82a3a3288782ad9523e8fdc516 (diff)
1 files changed, 59 insertions, 3 deletions
diff --git a/source/blender/gpu/GPU_vertex_format.h b/source/blender/gpu/GPU_vertex_format.h
index 68608a98a79..dc60c52122c 100644
--- a/source/blender/gpu/GPU_vertex_format.h
+++ b/source/blender/gpu/GPU_vertex_format.h
@@ -31,7 +31,7 @@
 #include "BLI_assert.h"
 
 #define GPU_VERT_ATTR_MAX_LEN 16
-#define GPU_VERT_ATTR_MAX_NAMES 5
+#define GPU_VERT_ATTR_MAX_NAMES 6
 #define GPU_VERT_ATTR_NAME_AVERAGE_LEN 11
 #define GPU_VERT_ATTR_NAMES_BUF_LEN ((GPU_VERT_ATTR_NAME_AVERAGE_LEN + 1) * GPU_VERT_ATTR_MAX_LEN)
 
@@ -88,6 +88,8 @@ typedef struct GPUVertFormat {
   uint packed : 1;
   /** Current offset in names[]. */
   uint name_offset : 8;
+  /** Store each attrib in one contiguous buffer region. */
+  uint deinterleaved : 1;
 
   GPUVertAttr attrs[GPU_VERT_ATTR_MAX_LEN];
   char names[GPU_VERT_ATTR_NAMES_BUF_LEN];
@@ -104,6 +106,8 @@ uint GPU_vertformat_attr_add(
     GPUVertFormat *, const char *name, GPUVertCompType, uint comp_len, GPUVertFetchMode);
 void GPU_vertformat_alias_add(GPUVertFormat *, const char *alias);
 
+void GPU_vertformat_deinterleave(GPUVertFormat *format);
+
 int GPU_vertformat_attr_id_get(const GPUVertFormat *, const char *name);
 
 BLI_INLINE const char *GPU_vertformat_attr_name_get(const GPUVertFormat *format,
@@ -122,7 +126,59 @@ typedef struct GPUPackedNormal {
   int w : 2; /* 0 by default, can manually set to { -2, -1, 0, 1 } */
 } GPUPackedNormal;
 
-GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3]);
-GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3]);
+/* OpenGL ES packs in a different order as desktop GL but component conversion is the same.
+ * Of the code here, only struct GPUPackedNormal needs to change. */
+
+#define SIGNED_INT_10_MAX 511
+#define SIGNED_INT_10_MIN -512
+
+BLI_INLINE int clampi(int x, int min_allowed, int max_allowed)
+{
+#if TRUST_NO_ONE
+  assert(min_allowed <= max_allowed);
+#endif
+  if (x < min_allowed) {
+    return min_allowed;
+  }
+  else if (x > max_allowed) {
+    return max_allowed;
+  }
+  else {
+    return x;
+  }
+}
+
+BLI_INLINE int gpu_convert_normalized_f32_to_i10(float x)
+{
+  int qx = x * 511.0f;
+  return clampi(qx, SIGNED_INT_10_MIN, SIGNED_INT_10_MAX);
+}
+
+BLI_INLINE int gpu_convert_i16_to_i10(short x)
+{
+  /* 16-bit signed --> 10-bit signed */
+  /* TODO: round? */
+  return x >> 6;
+}
+
+BLI_INLINE GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3])
+{
+  GPUPackedNormal n = {
+      .x = gpu_convert_normalized_f32_to_i10(data[0]),
+      .y = gpu_convert_normalized_f32_to_i10(data[1]),
+      .z = gpu_convert_normalized_f32_to_i10(data[2]),
+  };
+  return n;
+}
+
+BLI_INLINE GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3])
+{
+  GPUPackedNormal n = {
+      .x = gpu_convert_i16_to_i10(data[0]),
+      .y = gpu_convert_i16_to_i10(data[1]),
+      .z = gpu_convert_i16_to_i10(data[2]),
+  };
+  return n;
+}
 
 #endif /* __GPU_VERTEX_FORMAT_H__ */
author	Clément Foucault <foucault.clem@gmail.com>	2019-07-14 17:49:44 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2019-08-14 20:05:26 +0300
commit	9c010c44f4201ab114b3facc69d0343525a1779f (patch)
tree	744271becd24cead6c900e4f023d8c0bfa6138b6 /source/blender/gpu/GPU_vertex_format.h
parent	45a45f7d66211e82a3a3288782ad9523e8fdc516 (diff)