diff options
author | Jason Fielder <jason_apple> | 2022-09-01 23:14:18 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2022-09-01 23:18:02 +0300 |
commit | ac07fb38a1b35fa156b2d0901eb35cd65ed73903 (patch) | |
tree | 2fe5a9b69c5c8bf04818e8b2cde0393e960a12ca /source/blender/draw/intern | |
parent | 5f4409b02ef7c54089ff1b491e008d4b86c030f4 (diff) |
Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.
- Adding in compatibility paths to support minimum per-vertex strides for vertex formats. OpenGL supports a minimum stride of 1 byte, in Metal, this minimum stride is 4 bytes. Meaing a vertex format must be atleast 4-bytes in size.
- Replacing transform feedback compile-time check to conditional look-up, given TF is supported on macOS with Metal.
- 3D texture size safety check added as a general capability, rather than being in the gl backend only. Also required for Metal.
Authored by Apple: Michael Parkin-White
Ref T96261
Reviewed By: fclem
Maniphest Tasks: T96261
Differential Revision: https://developer.blender.org/D14510
Diffstat (limited to 'source/blender/draw/intern')
-rw-r--r-- | source/blender/draw/intern/draw_cache.c | 3 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_cache_impl_curves.cc | 22 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_cache_impl_particles.c | 30 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_curves.cc | 236 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_hair.cc | 237 | ||||
-rw-r--r-- | source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc | 6 |
6 files changed, 309 insertions, 225 deletions
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c index 4ff5745fc86..6537490c06c 100644 --- a/source/blender/draw/intern/draw_cache.c +++ b/source/blender/draw/intern/draw_cache.c @@ -826,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void) { if (SHC.drw_gpencil_dummy_quad == NULL) { GPUVertFormat format = {0}; - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT); + /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */ + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT); GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); GPU_vertbuf_data_alloc(vbo, 4); diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc index 4f0072ec657..3bca17d9c56 100644 --- a/source/blender/draw/intern/draw_cache_impl_curves.cc +++ b/source/blender/draw/intern/draw_cache_impl_curves.cc @@ -269,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves, GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); GPU_vertformat_alias_add(&format, "pos"); - cache.proc_point_buf = GPU_vertbuf_create_with_format(&format); + cache.proc_point_buf = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len); MutableSpan posTime_data{ @@ -279,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves, GPUVertFormat length_format = {0}; GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format); + cache.proc_length_buf = GPU_vertbuf_create_with_format_ex( + &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len); MutableSpan hairLength_data{ @@ -319,8 +321,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac const char *name) { CurvesEvalFinalCache &final_cache = cache.final[subdiv]; - final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format, - GPU_USAGE_DEVICE_ONLY); + final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex( + format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -351,7 +353,8 @@ static void curves_batch_ensure_attribute(const Curves &curves, /* All attributes use vec4, see comment below. */ GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format); + cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index]; GPU_vertbuf_data_alloc(attr_vbo, @@ -416,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves, uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT); /* Curve Data. */ - cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data); + cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex( + &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len); GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step); - cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg); + cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex( + &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len); GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step); @@ -441,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY); + cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c index 02afbab6899..4fdc46ea18b 100644 --- a/source/blender/draw/intern/draw_cache_impl_particles.c +++ b/source/blender/draw/intern/draw_cache_impl_particles.c @@ -32,6 +32,8 @@ #include "ED_particle.h" #include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_material.h" #include "DEG_depsgraph_query.h" @@ -808,7 +810,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format); + /* Transform feedback buffer only needs to be resident in device memory. */ + GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC; + cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex( + &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -873,17 +878,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names)); /* Strand Data */ - cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data); + cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex( + &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step); - cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg); + cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex( + &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step); /* UV layers */ for (int i = 0; i < cache->num_uv_layers; i++) { - cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv); + cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex( + &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]); @@ -913,7 +921,8 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit /* Vertex colors */ for (int i = 0; i < cache->num_col_layers; i++) { - cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col); + cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex( + &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]); @@ -1059,8 +1068,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit, static GPUVertFormat format = {0}; GPU_vertformat_clear(&format); - /* initialize vertex format */ - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); + /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum + * stride requirement. */ + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); GPU_vertbuf_data_alloc(vbo, 1); @@ -1101,7 +1111,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit, uint pos_id = GPU_vertformat_attr_add( &pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format); + cache->proc_point_buf = GPU_vertbuf_create_with_format_ex( + &pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len); GPUVertBufRaw pos_step; @@ -1111,7 +1122,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit, uint length_id = GPU_vertformat_attr_add( &length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format); + cache->proc_length_buf = GPU_vertbuf_create_with_format_ex( + &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len); GPUVertBufRaw length_step; diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc index 233af08c363..9c4181b0161 100644 --- a/source/blender/draw/intern/draw_curves.cc +++ b/source/blender/draw/intern/draw_curves.cc @@ -33,25 +33,17 @@ #include "draw_manager.h" #include "draw_shader.h" -#ifndef __APPLE__ -# define USE_TRANSFORM_FEEDBACK -# define USE_COMPUTE_SHADERS -#endif - BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get() { -#ifdef USE_COMPUTE_SHADERS if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) { return PART_REFINE_SHADER_COMPUTE; } -#endif -#ifdef USE_TRANSFORM_FEEDBACK - return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; -#endif + if (GPU_transform_feedback_support()) { + return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; + } return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND; } -#ifndef USE_TRANSFORM_FEEDBACK struct CurvesEvalCall { struct CurvesEvalCall *next; GPUVertBuf *vbo; @@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; -#endif static GPUVertBuf *g_dummy_vbo = nullptr; static GPUTexture *g_dummy_texture = nullptr; @@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data) CurvesUniformBufPool *pool = drw_data->curves_ubos; pool->reset(); -#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS) - g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0); -#else - g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR); -#endif + if (GPU_transform_feedback_support() || GPU_compute_shader_support()) { + g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0); + } + else { + g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR); + } if (g_dummy_vbo == nullptr) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - g_dummy_vbo = GPU_vertbuf_create_with_format(&format); + g_dummy_vbo = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); @@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, { GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM); -#ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo); -#else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__); - pr_call->next = g_tf_calls; - pr_call->vbo = vbo; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); -#endif + DRWShadingGroup *tf_shgrp = nullptr; + if (GPU_transform_feedback_support()) { + tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo); + } + else { + tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__); + pr_call->next = g_tf_calls; + pr_call->vbo = vbo; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + } + BLI_assert(tf_shgrp != nullptr); drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv); DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len); @@ -411,82 +407,118 @@ void DRW_curves_update() /* Update legacy hair too, to avoid verbosity in callers. */ DRW_hair_update(); -#ifndef USE_TRANSFORM_FEEDBACK - /** - * Workaround to transform feedback not working on mac. - * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). - * - * So instead of using transform feedback we render to a texture, - * read back the result to system memory and re-upload as VBO data. - * It is really not ideal performance wise, but it is the simplest - * and the most local workaround that still uses the power of the GPU. - */ - - if (g_tf_calls == nullptr) { - return; - } + if (!GPU_transform_feedback_support()) { + /** + * Workaround to transform feedback not working on mac. + * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). + * + * So instead of using transform feedback we render to a texture, + * read back the result to system memory and re-upload as VBO data. + * It is really not ideal performance wise, but it is the simplest + * and the most local workaround that still uses the power of the GPU. + */ + + if (g_tf_calls == nullptr) { + return; + } - /* Search ideal buffer size. */ - uint max_size = 0; - for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { - max_size = max_ii(max_size, pr_call->vert_len); - } + /* Search ideal buffer size. */ + uint max_size = 0; + for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { + max_size = max_ii(max_size, pr_call->vert_len); + } + + /* Create target Texture / Frame-buffer */ + /* Don't use max size as it can be really heavy and fail. + * Do chunks of maximum 2048 * 2048 hair points. */ + int width = 2048; + int height = min_ii(width, 1 + max_size / width); + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPUFrameBuffer *fb = nullptr; + GPU_framebuffer_ensure_config(&fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(tex), + }); + + float *data = static_cast<float *>( + MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer")); + + GPU_framebuffer_bind(fb); + while (g_tf_calls != nullptr) { + CurvesEvalCall *pr_call = g_tf_calls; + g_tf_calls = g_tf_calls->next; + + g_tf_id_offset = 0; + while (pr_call->vert_len > 0) { + int max_read_px_len = min_ii(width * height, pr_call->vert_len); + + DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); + /* Read back result to main memory. */ + GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); + /* Upload back to VBO. */ + GPU_vertbuf_use(pr_call->vbo); + GPU_vertbuf_update_sub(pr_call->vbo, + sizeof(float[4]) * g_tf_id_offset, + sizeof(float[4]) * max_read_px_len, + data); + + g_tf_id_offset += max_read_px_len; + pr_call->vert_len -= max_read_px_len; + } - /* Create target Texture / Frame-buffer */ - /* Don't use max size as it can be really heavy and fail. - * Do chunks of maximum 2048 * 2048 hair points. */ - int width = 2048; - int height = min_ii(width, 1 + max_size / width); - GPUTexture *tex = DRW_texture_pool_query_2d( - width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update); - g_tf_target_height = height; - g_tf_target_width = width; - - GPUFrameBuffer *fb = nullptr; - GPU_framebuffer_ensure_config(&fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(tex), - }); - - float *data = static_cast<float *>( - MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer")); - - GPU_framebuffer_bind(fb); - while (g_tf_calls != nullptr) { - CurvesEvalCall *pr_call = g_tf_calls; - g_tf_calls = g_tf_calls->next; - - g_tf_id_offset = 0; - while (pr_call->vert_len > 0) { - int max_read_px_len = min_ii(width * height, pr_call->vert_len); - - DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); - /* Read back result to main memory. */ - GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); - /* Upload back to VBO. */ - GPU_vertbuf_use(pr_call->vbo); - GPU_vertbuf_update_sub(pr_call->vbo, - sizeof(float[4]) * g_tf_id_offset, - sizeof(float[4]) * max_read_px_len, - data); - - g_tf_id_offset += max_read_px_len; - pr_call->vert_len -= max_read_px_len; + MEM_freeN(pr_call); } - MEM_freeN(pr_call); + MEM_freeN(data); + GPU_framebuffer_free(fb); } + else { + /* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid + * side-effects from rendering in the active buffer. + * We also need to guarantee that a Framebuffer is active to perform any rendering work, + * even if there is no output */ + GPUFrameBuffer *temp_fb = nullptr; + GPUFrameBuffer *prev_fb = nullptr; + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + if (!GPU_compute_shader_support()) { + prev_fb = GPU_framebuffer_active_get(); + char errorOut[256]; + /* if the framebuffer is invalid we need a dummy framebuffer to be bound. */ + if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) { + int width = 64; + int height = 64; + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)}); + + GPU_framebuffer_bind(temp_fb); + } + } + } - MEM_freeN(data); - GPU_framebuffer_free(fb); -#else - /* Just render the pass when using compute shaders or transform feedback. */ - DRW_draw_pass(g_tf_pass); - if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { - GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + /* Just render the pass when using compute shaders or transform feedback. */ + DRW_draw_pass(g_tf_pass); + if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + /* Release temporary framebuffer. */ + if (temp_fb != nullptr) { + GPU_framebuffer_free(temp_fb); + } + /* Rebind existing framebuffer */ + if (prev_fb != nullptr) { + GPU_framebuffer_bind(prev_fb); + } } -#endif } void DRW_curves_free() diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc index dc791314333..69f123b95f3 100644 --- a/source/blender/draw/intern/draw_hair.cc +++ b/source/blender/draw/intern/draw_hair.cc @@ -22,6 +22,7 @@ #include "GPU_batch.h" #include "GPU_capabilities.h" #include "GPU_compute.h" +#include "GPU_context.h" #include "GPU_material.h" #include "GPU_shader.h" #include "GPU_texture.h" @@ -33,25 +34,17 @@ #include "draw_shader.h" #include "draw_shader_shared.h" -#ifndef __APPLE__ -# define USE_TRANSFORM_FEEDBACK -# define USE_COMPUTE_SHADERS -#endif - BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get() { -#ifdef USE_COMPUTE_SHADERS if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) { return PART_REFINE_SHADER_COMPUTE; } -#endif -#ifdef USE_TRANSFORM_FEEDBACK - return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; -#endif + if (GPU_transform_feedback_support()) { + return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; + } return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND; } -#ifndef USE_TRANSFORM_FEEDBACK struct ParticleRefineCall { struct ParticleRefineCall *next; GPUVertBuf *vbo; @@ -63,7 +56,6 @@ static ParticleRefineCall *g_tf_calls = nullptr; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; -#endif static GPUVertBuf *g_dummy_vbo = nullptr; static GPUTexture *g_dummy_texture = nullptr; @@ -77,18 +69,20 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement) void DRW_hair_init(void) { -#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS) - g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW); -#else - g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); -#endif + if (GPU_transform_feedback_support() || GPU_compute_shader_support()) { + g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW); + } + else { + g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); + } if (g_dummy_vbo == nullptr) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - g_dummy_vbo = GPU_vertbuf_create_with_format(&format); + g_dummy_vbo = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); @@ -146,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache if (final_points_len > 0) { GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM); -#ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create( - tf_shader, g_tf_pass, cache->final[subdiv].proc_buf); -#else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__); - pr_call->next = g_tf_calls; - pr_call->vbo = cache->final[subdiv].proc_buf; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); -#endif + DRWShadingGroup *tf_shgrp = nullptr; + if (GPU_transform_feedback_support()) { + tf_shgrp = DRW_shgroup_transform_feedback_create( + tf_shader, g_tf_pass, cache->final[subdiv].proc_buf); + } + else { + tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__); + pr_call->next = g_tf_calls; + pr_call->vbo = cache->final[subdiv].proc_buf; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + } + BLI_assert(tf_shgrp != nullptr); drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv); DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len); @@ -306,81 +303,117 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object, void DRW_hair_update() { -#ifndef USE_TRANSFORM_FEEDBACK - /** - * Workaround to transform feedback not working on mac. - * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). - * - * So instead of using transform feedback we render to a texture, - * read back the result to system memory and re-upload as VBO data. - * It is really not ideal performance wise, but it is the simplest - * and the most local workaround that still uses the power of the GPU. - */ - - if (g_tf_calls == nullptr) { - return; - } + if (!GPU_transform_feedback_support()) { + /** + * Workaround to transform feedback not working on mac. + * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). + * + * So instead of using transform feedback we render to a texture, + * read back the result to system memory and re-upload as VBO data. + * It is really not ideal performance wise, but it is the simplest + * and the most local workaround that still uses the power of the GPU. + */ + + if (g_tf_calls == nullptr) { + return; + } - /* Search ideal buffer size. */ - uint max_size = 0; - for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { - max_size = max_ii(max_size, pr_call->vert_len); - } + /* Search ideal buffer size. */ + uint max_size = 0; + for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { + max_size = max_ii(max_size, pr_call->vert_len); + } + + /* Create target Texture / Frame-buffer */ + /* Don't use max size as it can be really heavy and fail. + * Do chunks of maximum 2048 * 2048 hair points. */ + int width = 2048; + int height = min_ii(width, 1 + max_size / width); + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPUFrameBuffer *fb = nullptr; + GPU_framebuffer_ensure_config(&fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(tex), + }); + + float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"); + + GPU_framebuffer_bind(fb); + while (g_tf_calls != nullptr) { + ParticleRefineCall *pr_call = g_tf_calls; + g_tf_calls = g_tf_calls->next; + + g_tf_id_offset = 0; + while (pr_call->vert_len > 0) { + int max_read_px_len = min_ii(width * height, pr_call->vert_len); + + DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); + /* Read back result to main memory. */ + GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); + /* Upload back to VBO. */ + GPU_vertbuf_use(pr_call->vbo); + GPU_vertbuf_update_sub(pr_call->vbo, + sizeof(float[4]) * g_tf_id_offset, + sizeof(float[4]) * max_read_px_len, + data); + + g_tf_id_offset += max_read_px_len; + pr_call->vert_len -= max_read_px_len; + } - /* Create target Texture / Frame-buffer */ - /* Don't use max size as it can be really heavy and fail. - * Do chunks of maximum 2048 * 2048 hair points. */ - int width = 2048; - int height = min_ii(width, 1 + max_size / width); - GPUTexture *tex = DRW_texture_pool_query_2d( - width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update); - g_tf_target_height = height; - g_tf_target_width = width; - - GPUFrameBuffer *fb = nullptr; - GPU_framebuffer_ensure_config(&fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(tex), - }); - - float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"); - - GPU_framebuffer_bind(fb); - while (g_tf_calls != nullptr) { - ParticleRefineCall *pr_call = g_tf_calls; - g_tf_calls = g_tf_calls->next; - - g_tf_id_offset = 0; - while (pr_call->vert_len > 0) { - int max_read_px_len = min_ii(width * height, pr_call->vert_len); - - DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); - /* Read back result to main memory. */ - GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); - /* Upload back to VBO. */ - GPU_vertbuf_use(pr_call->vbo); - GPU_vertbuf_update_sub(pr_call->vbo, - sizeof(float[4]) * g_tf_id_offset, - sizeof(float[4]) * max_read_px_len, - data); - - g_tf_id_offset += max_read_px_len; - pr_call->vert_len -= max_read_px_len; + MEM_freeN(pr_call); } - MEM_freeN(pr_call); + MEM_freeN(data); + GPU_framebuffer_free(fb); } + else { + /* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid + * side-effects from rendering in the active buffer. + * We also need to guarantee that a Framebuffer is active to perform any rendering work, + * even if there is no output */ + GPUFrameBuffer *temp_fb = nullptr; + GPUFrameBuffer *prev_fb = nullptr; + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + if (!GPU_compute_shader_support()) { + prev_fb = GPU_framebuffer_active_get(); + char errorOut[256]; + /* if the framebuffer is invalid we need a dummy framebuffer to be bound. */ + if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) { + int width = 64; + int height = 64; + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)}); + + GPU_framebuffer_bind(temp_fb); + } + } + } - MEM_freeN(data); - GPU_framebuffer_free(fb); -#else - /* Just render the pass when using compute shaders or transform feedback. */ - DRW_draw_pass(g_tf_pass); - if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { - GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + /* Just render the pass when using compute shaders or transform feedback. */ + DRW_draw_pass(g_tf_pass); + if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + /* Release temporary framebuffer. */ + if (temp_fb != nullptr) { + GPU_framebuffer_free(temp_fb); + } + /* Rebind existing framebuffer */ + if (prev_fb != nullptr) { + GPU_framebuffer_bind(prev_fb); + } } -#endif } void DRW_hair_free(void) diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc index eb6e800023a..50c37f6397c 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc @@ -174,7 +174,7 @@ static void extract_edge_fac_finish(const MeshRenderData *mr, GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data); - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { /* Some AMD drivers strangely crash with VBO's with a one byte format. * To workaround we reinitialize the VBO with another format and convert * all bytes to floats. */ @@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format() { static GPUVertFormat format = {0}; if (format.attr_len == 0) { - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); } else { @@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach uint offset = subdiv_cache->num_subdiv_loops; for (int i = 0; i < loose_geom.edge_len; i++) { - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { float loose_edge_fac[2] = {1.0f, 1.0f}; GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac); } |