From 922d53a791d53b77e5ffcf65003555fae0a0e883 Mon Sep 17 00:00:00 2001 From: Jason Fielder Date: Wed, 30 Mar 2022 20:24:39 +0200 Subject: Metal: Adding alternative support for GPU_PRIM_TRI_FAN/LINE_LOOP For Metal backend. - Metal uniform array compatibility in DRW module. - Guard OpenGL-specific workarounds and flushes behind GPU_type_matches_ex API guard. Add further render boundaries for render paths called outside of the main loop. Authored by Apple: Michael Parkin-White Ref: T96261 Reviewed By: fclem Differential Revision: https://developer.blender.org/D14438 --- source/blender/draw/intern/draw_cache.c | 5 +- source/blender/draw/intern/draw_manager.c | 29 +++++++-- source/blender/draw/intern/draw_manager_data.c | 6 +- source/blender/draw/intern/draw_manager_exec.c | 75 ++++++++++++++++++++++-- source/blender/draw/intern/draw_manager_shader.c | 6 +- 5 files changed, 109 insertions(+), 12 deletions(-) (limited to 'source/blender/draw/intern') diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c index 8fc97ddcfc2..1c2a580e26d 100644 --- a/source/blender/draw/intern/draw_cache.c +++ b/source/blender/draw/intern/draw_cache.c @@ -26,6 +26,7 @@ #include "GPU_batch.h" #include "GPU_batch_utils.h" +#include "GPU_capabilities.h" #include "MEM_guardedalloc.h" @@ -395,12 +396,12 @@ GPUBatch *DRW_cache_quad_get(void) int v = 0; int flag = VCLASS_EMPTY_SCALED; - const float p[4][2] = {{-1.0f, -1.0f}, {-1.0f, 1.0f}, {1.0f, 1.0f}, {1.0f, -1.0f}}; + const float p[4][2] = {{-1.0f, 1.0f}, {1.0f, 1.0f}, {-1.0f, -1.0f}, {1.0f, -1.0f}}; for (int a = 0; a < 4; a++) { GPU_vertbuf_vert_set(vbo, v++, &(Vert){{p[a][0], p[a][1], 0.0f}, flag}); } - SHC.drw_quad = GPU_batch_create_ex(GPU_PRIM_TRI_FAN, vbo, NULL, GPU_BATCH_OWNS_VBO); + SHC.drw_quad = GPU_batch_create_ex(GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO); } return SHC.drw_quad; } diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index 39ae01697a1..75c27937f25 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -55,6 +55,7 @@ #include "GPU_framebuffer.h" #include "GPU_immediate.h" #include "GPU_matrix.h" +#include "GPU_platform.h" #include "GPU_shader_shared.h" #include "GPU_state.h" #include "GPU_uniform_buffer.h" @@ -1706,7 +1707,9 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph, drw_engines_draw_scene(); /* Fix 3D view "lagging" on APPLE and WIN32+NVIDIA. (See T56996, T61474) */ - GPU_flush(); + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { + GPU_flush(); + } DRW_stats_reset(); @@ -1938,6 +1941,9 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph) }; drw_context_state_init(); + /* Begin GPU workload Boundary */ + GPU_render_begin(); + const int size[2] = {engine->resolution_x, engine->resolution_y}; drw_manager_init(&DST, NULL, size); @@ -1993,6 +1999,9 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph) /* Reset state after drawing */ DRW_state_reset(); + + /* End GPU workload Boundary */ + GPU_render_end(); } void DRW_render_object_iter( @@ -2072,7 +2081,10 @@ void DRW_custom_pipeline(DrawEngineType *draw_engine_type, * resources as the main thread (viewport) may lead to data * races and undefined behavior on certain drivers. Using * GPU_finish to sync seems to fix the issue. (see T62997) */ - GPU_finish(); + eGPUBackendType type = GPU_backend_get_type(); + if (type == GPU_BACKEND_OPENGL) { + GPU_finish(); + } drw_manager_exit(&DST); } @@ -2173,7 +2185,9 @@ void DRW_draw_render_loop_2d_ex(struct Depsgraph *depsgraph, drw_engines_draw_scene(); /* Fix 3D view being "laggy" on macos and win+nvidia. (See T56996, T61474) */ - GPU_flush(); + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { + GPU_flush(); + } if (DST.draw_ctx.evil_C) { DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get(); @@ -3094,6 +3108,7 @@ void DRW_opengl_context_enable_ex(bool UNUSED(restore)) * This shall remain in effect until immediate mode supports * multiple threads. */ BLI_ticket_mutex_lock(DST.gl_context_mutex); + GPU_render_begin(); WM_opengl_context_activate(DST.gl_context); GPU_context_active_set(DST.gpu_context); } @@ -3105,7 +3120,9 @@ void DRW_opengl_context_disable_ex(bool restore) #ifdef __APPLE__ /* Need to flush before disabling draw context, otherwise it does not * always finish drawing and viewport can be empty or partially drawn */ - GPU_flush(); + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { + GPU_flush(); + } #endif if (BLI_thread_is_main() && restore) { @@ -3116,6 +3133,10 @@ void DRW_opengl_context_disable_ex(bool restore) GPU_context_active_set(NULL); } + /* Render boundaries are opened and closed here as this may be + * called outside of an existing render loop. */ + GPU_render_end(); + BLI_ticket_mutex_unlock(DST.gl_context_mutex); } } diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c index b01c901c77f..2c9ebfc080e 100644 --- a/source/blender/draw/intern/draw_manager_data.c +++ b/source/blender/draw/intern/draw_manager_data.c @@ -498,9 +498,13 @@ void DRW_shgroup_uniform_vec4_array_copy(DRWShadingGroup *shgroup, return; } + /* Each array element stored as an individual entry in the uniform list. + * All entries from the same array share the same base location, + * and array-size used to determine the number of elements + * copied in draw_update_uniforms. */ for (int i = 0; i < arraysize; i++) { drw_shgroup_uniform_create_ex( - shgroup, location + i, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, 1); + shgroup, location, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, arraysize); } } diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c index 7d6ce51ff35..3b5b06c94d4 100644 --- a/source/blender/draw/intern/draw_manager_exec.c +++ b/source/blender/draw/intern/draw_manager_exec.c @@ -584,21 +584,85 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup, DRWCommandsState *state, bool *use_tfeedback) { +#define MAX_UNIFORM_STACK_SIZE 64 + + /* Uniform array elements stored as separate entries. We need to batch these together */ + int current_uniform_array_loc = -1; + unsigned int current_array_index = 0; + static union { + int istack[MAX_UNIFORM_STACK_SIZE]; + float fstack[MAX_UNIFORM_STACK_SIZE]; + } uniform_stack; + + /* Loop through uniforms. */ for (DRWUniformChunk *unichunk = shgroup->uniforms; unichunk; unichunk = unichunk->next) { DRWUniform *uni = unichunk->uniforms; + for (int i = 0; i < unichunk->uniform_used; i++, uni++) { + + /* For uniform array copies, copy per-array-element data into local buffer before upload. */ + if (uni->arraysize > 1 && + (uni->type == DRW_UNIFORM_INT_COPY || uni->type == DRW_UNIFORM_FLOAT_COPY)) { + + /* Begin copying uniform array. */ + if (current_array_index == 0) { + current_uniform_array_loc = uni->location; + } + + /* Debug check same array loc. */ + BLI_assert(current_uniform_array_loc > -1); + BLI_assert(current_uniform_array_loc == uni->location); + + /* Copy array element data to local buffer. */ + BLI_assert(((current_array_index + 1) * uni->length) <= MAX_UNIFORM_STACK_SIZE); + if (uni->type == DRW_UNIFORM_INT_COPY) { + memcpy(&uniform_stack.istack[current_array_index * uni->length], + uni->ivalue, + sizeof(int) * uni->length); + } + else { + memcpy(&uniform_stack.fstack[current_array_index * uni->length], + uni->fvalue, + sizeof(float) * uni->length); + } + current_array_index++; + BLI_assert(current_array_index <= uni->arraysize); + + /* Flush array data to shader. */ + if (current_array_index == uni->arraysize) { + if (uni->type == DRW_UNIFORM_INT_COPY) { + GPU_shader_uniform_vector_int( + shgroup->shader, uni->location, uni->length, uni->arraysize, uniform_stack.istack); + } + else { + GPU_shader_uniform_vector( + shgroup->shader, uni->location, uni->length, uni->arraysize, uniform_stack.fstack); + } + current_array_index = 0; + current_uniform_array_loc = -1; + } + continue; + } + + /* Handle standard cases. */ switch (uni->type) { case DRW_UNIFORM_INT_COPY: - GPU_shader_uniform_vector_int( - shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue); + BLI_assert(uni->arraysize == 1); + if (uni->arraysize == 1) { + GPU_shader_uniform_vector_int( + shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue); + } break; case DRW_UNIFORM_INT: GPU_shader_uniform_vector_int( shgroup->shader, uni->location, uni->length, uni->arraysize, uni->pvalue); break; case DRW_UNIFORM_FLOAT_COPY: - GPU_shader_uniform_vector( - shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue); + BLI_assert(uni->arraysize == 1); + if (uni->arraysize == 1) { + GPU_shader_uniform_vector( + shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue); + } break; case DRW_UNIFORM_FLOAT: GPU_shader_uniform_vector( @@ -673,6 +737,9 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup, } } } + /* Ensure uniform arrays copied. */ + BLI_assert(current_array_index == 0); + BLI_assert(current_uniform_array_loc == -1); } BLI_INLINE void draw_select_buffer(DRWShadingGroup *shgroup, diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c index 618d2f46e91..1936aa599ff 100644 --- a/source/blender/draw/intern/draw_manager_shader.c +++ b/source/blender/draw/intern/draw_manager_shader.c @@ -91,6 +91,7 @@ static void drw_deferred_shader_compilation_exec( short *do_update, float *progress) { + GPU_render_begin(); DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data; void *gl_context = comp->gl_context; GPUContext *gpu_context = comp->gpu_context; @@ -138,7 +139,9 @@ static void drw_deferred_shader_compilation_exec( *progress = (float)comp->shaders_done / (float)total; *do_update = true; - GPU_flush(); + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { + GPU_flush(); + } BLI_mutex_unlock(&comp->compilation_lock); BLI_spin_lock(&comp->list_lock); @@ -157,6 +160,7 @@ static void drw_deferred_shader_compilation_exec( if (use_main_context_workaround) { GPU_context_main_unlock(); } + GPU_render_end(); } static void drw_deferred_shader_compilation_free(void *custom_data) -- cgit v1.2.3