From 922d53a791d53b77e5ffcf65003555fae0a0e883 Mon Sep 17 00:00:00 2001
From: Jason Fielder <jason_apple>
Date: Wed, 30 Mar 2022 20:24:39 +0200
Subject: Metal: Adding alternative support for GPU_PRIM_TRI_FAN/LINE_LOOP For
 Metal backend.

- Metal uniform array compatibility in DRW module.
- Guard OpenGL-specific workarounds and flushes behind GPU_type_matches_ex API guard. Add further render boundaries for render paths called outside of the main loop.

Authored by Apple: Michael Parkin-White

Ref: T96261

Reviewed By: fclem

Differential Revision: https://developer.blender.org/D14438
---
 source/blender/draw/intern/draw_cache.c          |  5 +-
 source/blender/draw/intern/draw_manager.c        | 29 +++++++--
 source/blender/draw/intern/draw_manager_data.c   |  6 +-
 source/blender/draw/intern/draw_manager_exec.c   | 75 ++++++++++++++++++++++--
 source/blender/draw/intern/draw_manager_shader.c |  6 +-
 5 files changed, 109 insertions(+), 12 deletions(-)

(limited to 'source/blender/draw/intern')

diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 8fc97ddcfc2..1c2a580e26d 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -26,6 +26,7 @@
 
 #include "GPU_batch.h"
 #include "GPU_batch_utils.h"
+#include "GPU_capabilities.h"
 
 #include "MEM_guardedalloc.h"
 
@@ -395,12 +396,12 @@ GPUBatch *DRW_cache_quad_get(void)
 
     int v = 0;
     int flag = VCLASS_EMPTY_SCALED;
-    const float p[4][2] = {{-1.0f, -1.0f}, {-1.0f, 1.0f}, {1.0f, 1.0f}, {1.0f, -1.0f}};
+    const float p[4][2] = {{-1.0f, 1.0f}, {1.0f, 1.0f}, {-1.0f, -1.0f}, {1.0f, -1.0f}};
     for (int a = 0; a < 4; a++) {
       GPU_vertbuf_vert_set(vbo, v++, &(Vert){{p[a][0], p[a][1], 0.0f}, flag});
     }
 
-    SHC.drw_quad = GPU_batch_create_ex(GPU_PRIM_TRI_FAN, vbo, NULL, GPU_BATCH_OWNS_VBO);
+    SHC.drw_quad = GPU_batch_create_ex(GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO);
   }
   return SHC.drw_quad;
 }
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 39ae01697a1..75c27937f25 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -55,6 +55,7 @@
 #include "GPU_framebuffer.h"
 #include "GPU_immediate.h"
 #include "GPU_matrix.h"
+#include "GPU_platform.h"
 #include "GPU_shader_shared.h"
 #include "GPU_state.h"
 #include "GPU_uniform_buffer.h"
@@ -1706,7 +1707,9 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
   drw_engines_draw_scene();
 
   /* Fix 3D view "lagging" on APPLE and WIN32+NVIDIA. (See T56996, T61474) */
-  GPU_flush();
+  if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+    GPU_flush();
+  }
 
   DRW_stats_reset();
 
@@ -1938,6 +1941,9 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
   };
   drw_context_state_init();
 
+  /* Begin GPU workload Boundary */
+  GPU_render_begin();
+
   const int size[2] = {engine->resolution_x, engine->resolution_y};
 
   drw_manager_init(&DST, NULL, size);
@@ -1993,6 +1999,9 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
 
   /* Reset state after drawing */
   DRW_state_reset();
+
+  /* End GPU workload Boundary */
+  GPU_render_end();
 }
 
 void DRW_render_object_iter(
@@ -2072,7 +2081,10 @@ void DRW_custom_pipeline(DrawEngineType *draw_engine_type,
    * resources as the main thread (viewport) may lead to data
    * races and undefined behavior on certain drivers. Using
    * GPU_finish to sync seems to fix the issue. (see T62997) */
-  GPU_finish();
+  eGPUBackendType type = GPU_backend_get_type();
+  if (type == GPU_BACKEND_OPENGL) {
+    GPU_finish();
+  }
 
   drw_manager_exit(&DST);
 }
@@ -2173,7 +2185,9 @@ void DRW_draw_render_loop_2d_ex(struct Depsgraph *depsgraph,
   drw_engines_draw_scene();
 
   /* Fix 3D view being "laggy" on macos and win+nvidia. (See T56996, T61474) */
-  GPU_flush();
+  if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+    GPU_flush();
+  }
 
   if (DST.draw_ctx.evil_C) {
     DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
@@ -3094,6 +3108,7 @@ void DRW_opengl_context_enable_ex(bool UNUSED(restore))
      * This shall remain in effect until immediate mode supports
      * multiple threads. */
     BLI_ticket_mutex_lock(DST.gl_context_mutex);
+    GPU_render_begin();
     WM_opengl_context_activate(DST.gl_context);
     GPU_context_active_set(DST.gpu_context);
   }
@@ -3105,7 +3120,9 @@ void DRW_opengl_context_disable_ex(bool restore)
 #ifdef __APPLE__
     /* Need to flush before disabling draw context, otherwise it does not
      * always finish drawing and viewport can be empty or partially drawn */
-    GPU_flush();
+    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+      GPU_flush();
+    }
 #endif
 
     if (BLI_thread_is_main() && restore) {
@@ -3116,6 +3133,10 @@ void DRW_opengl_context_disable_ex(bool restore)
       GPU_context_active_set(NULL);
     }
 
+    /* Render boundaries are opened and closed here as this may be
+     * called outside of an existing render loop. */
+    GPU_render_end();
+
     BLI_ticket_mutex_unlock(DST.gl_context_mutex);
   }
 }
diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c
index b01c901c77f..2c9ebfc080e 100644
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -498,9 +498,13 @@ void DRW_shgroup_uniform_vec4_array_copy(DRWShadingGroup *shgroup,
     return;
   }
 
+  /* Each array element stored as an individual entry in the uniform list.
+   * All entries from the same array share the same base location,
+   * and array-size used to determine the number of elements
+   * copied in draw_update_uniforms. */
   for (int i = 0; i < arraysize; i++) {
     drw_shgroup_uniform_create_ex(
-        shgroup, location + i, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, 1);
+        shgroup, location, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, arraysize);
   }
 }
 
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index 7d6ce51ff35..3b5b06c94d4 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -584,21 +584,85 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
                                  DRWCommandsState *state,
                                  bool *use_tfeedback)
 {
+#define MAX_UNIFORM_STACK_SIZE 64
+
+  /* Uniform array elements stored as separate entries. We need to batch these together */
+  int current_uniform_array_loc = -1;
+  unsigned int current_array_index = 0;
+  static union {
+    int istack[MAX_UNIFORM_STACK_SIZE];
+    float fstack[MAX_UNIFORM_STACK_SIZE];
+  } uniform_stack;
+
+  /* Loop through uniforms. */
   for (DRWUniformChunk *unichunk = shgroup->uniforms; unichunk; unichunk = unichunk->next) {
     DRWUniform *uni = unichunk->uniforms;
+
     for (int i = 0; i < unichunk->uniform_used; i++, uni++) {
+
+      /* For uniform array copies, copy per-array-element data into local buffer before upload. */
+      if (uni->arraysize > 1 &&
+          (uni->type == DRW_UNIFORM_INT_COPY || uni->type == DRW_UNIFORM_FLOAT_COPY)) {
+
+        /* Begin copying uniform array. */
+        if (current_array_index == 0) {
+          current_uniform_array_loc = uni->location;
+        }
+
+        /* Debug check same array loc. */
+        BLI_assert(current_uniform_array_loc > -1);
+        BLI_assert(current_uniform_array_loc == uni->location);
+
+        /* Copy array element data to local buffer. */
+        BLI_assert(((current_array_index + 1) * uni->length) <= MAX_UNIFORM_STACK_SIZE);
+        if (uni->type == DRW_UNIFORM_INT_COPY) {
+          memcpy(&uniform_stack.istack[current_array_index * uni->length],
+                 uni->ivalue,
+                 sizeof(int) * uni->length);
+        }
+        else {
+          memcpy(&uniform_stack.fstack[current_array_index * uni->length],
+                 uni->fvalue,
+                 sizeof(float) * uni->length);
+        }
+        current_array_index++;
+        BLI_assert(current_array_index <= uni->arraysize);
+
+        /* Flush array data to shader. */
+        if (current_array_index == uni->arraysize) {
+          if (uni->type == DRW_UNIFORM_INT_COPY) {
+            GPU_shader_uniform_vector_int(
+                shgroup->shader, uni->location, uni->length, uni->arraysize, uniform_stack.istack);
+          }
+          else {
+            GPU_shader_uniform_vector(
+                shgroup->shader, uni->location, uni->length, uni->arraysize, uniform_stack.fstack);
+          }
+          current_array_index = 0;
+          current_uniform_array_loc = -1;
+        }
+        continue;
+      }
+
+      /* Handle standard cases. */
       switch (uni->type) {
         case DRW_UNIFORM_INT_COPY:
-          GPU_shader_uniform_vector_int(
-              shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue);
+          BLI_assert(uni->arraysize == 1);
+          if (uni->arraysize == 1) {
+            GPU_shader_uniform_vector_int(
+                shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue);
+          }
           break;
         case DRW_UNIFORM_INT:
           GPU_shader_uniform_vector_int(
               shgroup->shader, uni->location, uni->length, uni->arraysize, uni->pvalue);
           break;
         case DRW_UNIFORM_FLOAT_COPY:
-          GPU_shader_uniform_vector(
-              shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue);
+          BLI_assert(uni->arraysize == 1);
+          if (uni->arraysize == 1) {
+            GPU_shader_uniform_vector(
+                shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue);
+          }
           break;
         case DRW_UNIFORM_FLOAT:
           GPU_shader_uniform_vector(
@@ -673,6 +737,9 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
       }
     }
   }
+  /* Ensure uniform arrays copied. */
+  BLI_assert(current_array_index == 0);
+  BLI_assert(current_uniform_array_loc == -1);
 }
 
 BLI_INLINE void draw_select_buffer(DRWShadingGroup *shgroup,
diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 618d2f46e91..1936aa599ff 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -91,6 +91,7 @@ static void drw_deferred_shader_compilation_exec(
     short *do_update,
     float *progress)
 {
+  GPU_render_begin();
   DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
   void *gl_context = comp->gl_context;
   GPUContext *gpu_context = comp->gpu_context;
@@ -138,7 +139,9 @@ static void drw_deferred_shader_compilation_exec(
     *progress = (float)comp->shaders_done / (float)total;
     *do_update = true;
 
-    GPU_flush();
+    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+      GPU_flush();
+    }
     BLI_mutex_unlock(&comp->compilation_lock);
 
     BLI_spin_lock(&comp->list_lock);
@@ -157,6 +160,7 @@ static void drw_deferred_shader_compilation_exec(
   if (use_main_context_workaround) {
     GPU_context_main_unlock();
   }
+  GPU_render_end();
 }
 
 static void drw_deferred_shader_compilation_free(void *custom_data)
-- 
cgit v1.2.3