diff options
author | Jason Fielder <jason_apple> | 2022-09-01 23:14:18 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2022-09-01 23:18:02 +0300 |
commit | ac07fb38a1b35fa156b2d0901eb35cd65ed73903 (patch) | |
tree | 2fe5a9b69c5c8bf04818e8b2cde0393e960a12ca /source/blender | |
parent | 5f4409b02ef7c54089ff1b491e008d4b86c030f4 (diff) |
Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.
- Adding in compatibility paths to support minimum per-vertex strides for vertex formats. OpenGL supports a minimum stride of 1 byte, in Metal, this minimum stride is 4 bytes. Meaing a vertex format must be atleast 4-bytes in size.
- Replacing transform feedback compile-time check to conditional look-up, given TF is supported on macOS with Metal.
- 3D texture size safety check added as a general capability, rather than being in the gl backend only. Also required for Metal.
Authored by Apple: Michael Parkin-White
Ref T96261
Reviewed By: fclem
Maniphest Tasks: T96261
Differential Revision: https://developer.blender.org/D14510
Diffstat (limited to 'source/blender')
18 files changed, 413 insertions, 240 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c index 533e71b9b32..2d96cffb4ba 100644 --- a/source/blender/draw/engines/eevee/eevee_volumes.c +++ b/source/blender/draw/engines/eevee/eevee_volumes.c @@ -30,6 +30,7 @@ #include "DEG_depsgraph_query.h" #include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_material.h" #include "GPU_texture.h" #include "eevee_private.h" @@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size)); tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1); + /* Clamp 3D texture size based on device maximum. */ + int maxSize = GPU_max_texture_3d_size(); + BLI_assert(tex_size[0] <= maxSize); + tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0]; + tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1]; + tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2]; + common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]); common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]); common_data->vol_coord_scale[2] = 1.0f / viewport_size[0]; diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c index 4ff5745fc86..6537490c06c 100644 --- a/source/blender/draw/intern/draw_cache.c +++ b/source/blender/draw/intern/draw_cache.c @@ -826,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void) { if (SHC.drw_gpencil_dummy_quad == NULL) { GPUVertFormat format = {0}; - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT); + /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */ + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT); GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); GPU_vertbuf_data_alloc(vbo, 4); diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc index 4f0072ec657..3bca17d9c56 100644 --- a/source/blender/draw/intern/draw_cache_impl_curves.cc +++ b/source/blender/draw/intern/draw_cache_impl_curves.cc @@ -269,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves, GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); GPU_vertformat_alias_add(&format, "pos"); - cache.proc_point_buf = GPU_vertbuf_create_with_format(&format); + cache.proc_point_buf = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len); MutableSpan posTime_data{ @@ -279,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves, GPUVertFormat length_format = {0}; GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format); + cache.proc_length_buf = GPU_vertbuf_create_with_format_ex( + &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len); MutableSpan hairLength_data{ @@ -319,8 +321,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac const char *name) { CurvesEvalFinalCache &final_cache = cache.final[subdiv]; - final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format, - GPU_USAGE_DEVICE_ONLY); + final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex( + format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -351,7 +353,8 @@ static void curves_batch_ensure_attribute(const Curves &curves, /* All attributes use vec4, see comment below. */ GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format); + cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index]; GPU_vertbuf_data_alloc(attr_vbo, @@ -416,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves, uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT); /* Curve Data. */ - cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data); + cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex( + &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len); GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step); - cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg); + cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex( + &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len); GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step); @@ -441,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY); + cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c index 02afbab6899..4fdc46ea18b 100644 --- a/source/blender/draw/intern/draw_cache_impl_particles.c +++ b/source/blender/draw/intern/draw_cache_impl_particles.c @@ -32,6 +32,8 @@ #include "ED_particle.h" #include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_material.h" #include "DEG_depsgraph_query.h" @@ -808,7 +810,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format); + /* Transform feedback buffer only needs to be resident in device memory. */ + GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC; + cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex( + &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -873,17 +878,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names)); /* Strand Data */ - cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data); + cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex( + &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step); - cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg); + cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex( + &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step); /* UV layers */ for (int i = 0; i < cache->num_uv_layers; i++) { - cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv); + cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex( + &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]); @@ -913,7 +921,8 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit /* Vertex colors */ for (int i = 0; i < cache->num_col_layers; i++) { - cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col); + cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex( + &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]); @@ -1059,8 +1068,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit, static GPUVertFormat format = {0}; GPU_vertformat_clear(&format); - /* initialize vertex format */ - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); + /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum + * stride requirement. */ + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); GPU_vertbuf_data_alloc(vbo, 1); @@ -1101,7 +1111,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit, uint pos_id = GPU_vertformat_attr_add( &pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format); + cache->proc_point_buf = GPU_vertbuf_create_with_format_ex( + &pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len); GPUVertBufRaw pos_step; @@ -1111,7 +1122,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit, uint length_id = GPU_vertformat_attr_add( &length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format); + cache->proc_length_buf = GPU_vertbuf_create_with_format_ex( + &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len); GPUVertBufRaw length_step; diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc index 233af08c363..9c4181b0161 100644 --- a/source/blender/draw/intern/draw_curves.cc +++ b/source/blender/draw/intern/draw_curves.cc @@ -33,25 +33,17 @@ #include "draw_manager.h" #include "draw_shader.h" -#ifndef __APPLE__ -# define USE_TRANSFORM_FEEDBACK -# define USE_COMPUTE_SHADERS -#endif - BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get() { -#ifdef USE_COMPUTE_SHADERS if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) { return PART_REFINE_SHADER_COMPUTE; } -#endif -#ifdef USE_TRANSFORM_FEEDBACK - return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; -#endif + if (GPU_transform_feedback_support()) { + return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; + } return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND; } -#ifndef USE_TRANSFORM_FEEDBACK struct CurvesEvalCall { struct CurvesEvalCall *next; GPUVertBuf *vbo; @@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; -#endif static GPUVertBuf *g_dummy_vbo = nullptr; static GPUTexture *g_dummy_texture = nullptr; @@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data) CurvesUniformBufPool *pool = drw_data->curves_ubos; pool->reset(); -#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS) - g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0); -#else - g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR); -#endif + if (GPU_transform_feedback_support() || GPU_compute_shader_support()) { + g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0); + } + else { + g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR); + } if (g_dummy_vbo == nullptr) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - g_dummy_vbo = GPU_vertbuf_create_with_format(&format); + g_dummy_vbo = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); @@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, { GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM); -#ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo); -#else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__); - pr_call->next = g_tf_calls; - pr_call->vbo = vbo; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); -#endif + DRWShadingGroup *tf_shgrp = nullptr; + if (GPU_transform_feedback_support()) { + tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo); + } + else { + tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__); + pr_call->next = g_tf_calls; + pr_call->vbo = vbo; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + } + BLI_assert(tf_shgrp != nullptr); drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv); DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len); @@ -411,82 +407,118 @@ void DRW_curves_update() /* Update legacy hair too, to avoid verbosity in callers. */ DRW_hair_update(); -#ifndef USE_TRANSFORM_FEEDBACK - /** - * Workaround to transform feedback not working on mac. - * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). - * - * So instead of using transform feedback we render to a texture, - * read back the result to system memory and re-upload as VBO data. - * It is really not ideal performance wise, but it is the simplest - * and the most local workaround that still uses the power of the GPU. - */ - - if (g_tf_calls == nullptr) { - return; - } + if (!GPU_transform_feedback_support()) { + /** + * Workaround to transform feedback not working on mac. + * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). + * + * So instead of using transform feedback we render to a texture, + * read back the result to system memory and re-upload as VBO data. + * It is really not ideal performance wise, but it is the simplest + * and the most local workaround that still uses the power of the GPU. + */ + + if (g_tf_calls == nullptr) { + return; + } - /* Search ideal buffer size. */ - uint max_size = 0; - for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { - max_size = max_ii(max_size, pr_call->vert_len); - } + /* Search ideal buffer size. */ + uint max_size = 0; + for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { + max_size = max_ii(max_size, pr_call->vert_len); + } + + /* Create target Texture / Frame-buffer */ + /* Don't use max size as it can be really heavy and fail. + * Do chunks of maximum 2048 * 2048 hair points. */ + int width = 2048; + int height = min_ii(width, 1 + max_size / width); + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPUFrameBuffer *fb = nullptr; + GPU_framebuffer_ensure_config(&fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(tex), + }); + + float *data = static_cast<float *>( + MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer")); + + GPU_framebuffer_bind(fb); + while (g_tf_calls != nullptr) { + CurvesEvalCall *pr_call = g_tf_calls; + g_tf_calls = g_tf_calls->next; + + g_tf_id_offset = 0; + while (pr_call->vert_len > 0) { + int max_read_px_len = min_ii(width * height, pr_call->vert_len); + + DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); + /* Read back result to main memory. */ + GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); + /* Upload back to VBO. */ + GPU_vertbuf_use(pr_call->vbo); + GPU_vertbuf_update_sub(pr_call->vbo, + sizeof(float[4]) * g_tf_id_offset, + sizeof(float[4]) * max_read_px_len, + data); + + g_tf_id_offset += max_read_px_len; + pr_call->vert_len -= max_read_px_len; + } - /* Create target Texture / Frame-buffer */ - /* Don't use max size as it can be really heavy and fail. - * Do chunks of maximum 2048 * 2048 hair points. */ - int width = 2048; - int height = min_ii(width, 1 + max_size / width); - GPUTexture *tex = DRW_texture_pool_query_2d( - width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update); - g_tf_target_height = height; - g_tf_target_width = width; - - GPUFrameBuffer *fb = nullptr; - GPU_framebuffer_ensure_config(&fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(tex), - }); - - float *data = static_cast<float *>( - MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer")); - - GPU_framebuffer_bind(fb); - while (g_tf_calls != nullptr) { - CurvesEvalCall *pr_call = g_tf_calls; - g_tf_calls = g_tf_calls->next; - - g_tf_id_offset = 0; - while (pr_call->vert_len > 0) { - int max_read_px_len = min_ii(width * height, pr_call->vert_len); - - DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); - /* Read back result to main memory. */ - GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); - /* Upload back to VBO. */ - GPU_vertbuf_use(pr_call->vbo); - GPU_vertbuf_update_sub(pr_call->vbo, - sizeof(float[4]) * g_tf_id_offset, - sizeof(float[4]) * max_read_px_len, - data); - - g_tf_id_offset += max_read_px_len; - pr_call->vert_len -= max_read_px_len; + MEM_freeN(pr_call); } - MEM_freeN(pr_call); + MEM_freeN(data); + GPU_framebuffer_free(fb); } + else { + /* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid + * side-effects from rendering in the active buffer. + * We also need to guarantee that a Framebuffer is active to perform any rendering work, + * even if there is no output */ + GPUFrameBuffer *temp_fb = nullptr; + GPUFrameBuffer *prev_fb = nullptr; + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + if (!GPU_compute_shader_support()) { + prev_fb = GPU_framebuffer_active_get(); + char errorOut[256]; + /* if the framebuffer is invalid we need a dummy framebuffer to be bound. */ + if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) { + int width = 64; + int height = 64; + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)}); + + GPU_framebuffer_bind(temp_fb); + } + } + } - MEM_freeN(data); - GPU_framebuffer_free(fb); -#else - /* Just render the pass when using compute shaders or transform feedback. */ - DRW_draw_pass(g_tf_pass); - if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { - GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + /* Just render the pass when using compute shaders or transform feedback. */ + DRW_draw_pass(g_tf_pass); + if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + /* Release temporary framebuffer. */ + if (temp_fb != nullptr) { + GPU_framebuffer_free(temp_fb); + } + /* Rebind existing framebuffer */ + if (prev_fb != nullptr) { + GPU_framebuffer_bind(prev_fb); + } } -#endif } void DRW_curves_free() diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc index dc791314333..69f123b95f3 100644 --- a/source/blender/draw/intern/draw_hair.cc +++ b/source/blender/draw/intern/draw_hair.cc @@ -22,6 +22,7 @@ #include "GPU_batch.h" #include "GPU_capabilities.h" #include "GPU_compute.h" +#include "GPU_context.h" #include "GPU_material.h" #include "GPU_shader.h" #include "GPU_texture.h" @@ -33,25 +34,17 @@ #include "draw_shader.h" #include "draw_shader_shared.h" -#ifndef __APPLE__ -# define USE_TRANSFORM_FEEDBACK -# define USE_COMPUTE_SHADERS -#endif - BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get() { -#ifdef USE_COMPUTE_SHADERS if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) { return PART_REFINE_SHADER_COMPUTE; } -#endif -#ifdef USE_TRANSFORM_FEEDBACK - return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; -#endif + if (GPU_transform_feedback_support()) { + return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; + } return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND; } -#ifndef USE_TRANSFORM_FEEDBACK struct ParticleRefineCall { struct ParticleRefineCall *next; GPUVertBuf *vbo; @@ -63,7 +56,6 @@ static ParticleRefineCall *g_tf_calls = nullptr; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; -#endif static GPUVertBuf *g_dummy_vbo = nullptr; static GPUTexture *g_dummy_texture = nullptr; @@ -77,18 +69,20 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement) void DRW_hair_init(void) { -#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS) - g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW); -#else - g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); -#endif + if (GPU_transform_feedback_support() || GPU_compute_shader_support()) { + g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW); + } + else { + g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); + } if (g_dummy_vbo == nullptr) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - g_dummy_vbo = GPU_vertbuf_create_with_format(&format); + g_dummy_vbo = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); @@ -146,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache if (final_points_len > 0) { GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM); -#ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create( - tf_shader, g_tf_pass, cache->final[subdiv].proc_buf); -#else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__); - pr_call->next = g_tf_calls; - pr_call->vbo = cache->final[subdiv].proc_buf; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); -#endif + DRWShadingGroup *tf_shgrp = nullptr; + if (GPU_transform_feedback_support()) { + tf_shgrp = DRW_shgroup_transform_feedback_create( + tf_shader, g_tf_pass, cache->final[subdiv].proc_buf); + } + else { + tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__); + pr_call->next = g_tf_calls; + pr_call->vbo = cache->final[subdiv].proc_buf; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + } + BLI_assert(tf_shgrp != nullptr); drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv); DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len); @@ -306,81 +303,117 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object, void DRW_hair_update() { -#ifndef USE_TRANSFORM_FEEDBACK - /** - * Workaround to transform feedback not working on mac. - * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). - * - * So instead of using transform feedback we render to a texture, - * read back the result to system memory and re-upload as VBO data. - * It is really not ideal performance wise, but it is the simplest - * and the most local workaround that still uses the power of the GPU. - */ - - if (g_tf_calls == nullptr) { - return; - } + if (!GPU_transform_feedback_support()) { + /** + * Workaround to transform feedback not working on mac. + * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). + * + * So instead of using transform feedback we render to a texture, + * read back the result to system memory and re-upload as VBO data. + * It is really not ideal performance wise, but it is the simplest + * and the most local workaround that still uses the power of the GPU. + */ + + if (g_tf_calls == nullptr) { + return; + } - /* Search ideal buffer size. */ - uint max_size = 0; - for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { - max_size = max_ii(max_size, pr_call->vert_len); - } + /* Search ideal buffer size. */ + uint max_size = 0; + for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { + max_size = max_ii(max_size, pr_call->vert_len); + } + + /* Create target Texture / Frame-buffer */ + /* Don't use max size as it can be really heavy and fail. + * Do chunks of maximum 2048 * 2048 hair points. */ + int width = 2048; + int height = min_ii(width, 1 + max_size / width); + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPUFrameBuffer *fb = nullptr; + GPU_framebuffer_ensure_config(&fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(tex), + }); + + float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"); + + GPU_framebuffer_bind(fb); + while (g_tf_calls != nullptr) { + ParticleRefineCall *pr_call = g_tf_calls; + g_tf_calls = g_tf_calls->next; + + g_tf_id_offset = 0; + while (pr_call->vert_len > 0) { + int max_read_px_len = min_ii(width * height, pr_call->vert_len); + + DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); + /* Read back result to main memory. */ + GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); + /* Upload back to VBO. */ + GPU_vertbuf_use(pr_call->vbo); + GPU_vertbuf_update_sub(pr_call->vbo, + sizeof(float[4]) * g_tf_id_offset, + sizeof(float[4]) * max_read_px_len, + data); + + g_tf_id_offset += max_read_px_len; + pr_call->vert_len -= max_read_px_len; + } - /* Create target Texture / Frame-buffer */ - /* Don't use max size as it can be really heavy and fail. - * Do chunks of maximum 2048 * 2048 hair points. */ - int width = 2048; - int height = min_ii(width, 1 + max_size / width); - GPUTexture *tex = DRW_texture_pool_query_2d( - width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update); - g_tf_target_height = height; - g_tf_target_width = width; - - GPUFrameBuffer *fb = nullptr; - GPU_framebuffer_ensure_config(&fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(tex), - }); - - float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"); - - GPU_framebuffer_bind(fb); - while (g_tf_calls != nullptr) { - ParticleRefineCall *pr_call = g_tf_calls; - g_tf_calls = g_tf_calls->next; - - g_tf_id_offset = 0; - while (pr_call->vert_len > 0) { - int max_read_px_len = min_ii(width * height, pr_call->vert_len); - - DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); - /* Read back result to main memory. */ - GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); - /* Upload back to VBO. */ - GPU_vertbuf_use(pr_call->vbo); - GPU_vertbuf_update_sub(pr_call->vbo, - sizeof(float[4]) * g_tf_id_offset, - sizeof(float[4]) * max_read_px_len, - data); - - g_tf_id_offset += max_read_px_len; - pr_call->vert_len -= max_read_px_len; + MEM_freeN(pr_call); } - MEM_freeN(pr_call); + MEM_freeN(data); + GPU_framebuffer_free(fb); } + else { + /* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid + * side-effects from rendering in the active buffer. + * We also need to guarantee that a Framebuffer is active to perform any rendering work, + * even if there is no output */ + GPUFrameBuffer *temp_fb = nullptr; + GPUFrameBuffer *prev_fb = nullptr; + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + if (!GPU_compute_shader_support()) { + prev_fb = GPU_framebuffer_active_get(); + char errorOut[256]; + /* if the framebuffer is invalid we need a dummy framebuffer to be bound. */ + if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) { + int width = 64; + int height = 64; + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)}); + + GPU_framebuffer_bind(temp_fb); + } + } + } - MEM_freeN(data); - GPU_framebuffer_free(fb); -#else - /* Just render the pass when using compute shaders or transform feedback. */ - DRW_draw_pass(g_tf_pass); - if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { - GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + /* Just render the pass when using compute shaders or transform feedback. */ + DRW_draw_pass(g_tf_pass); + if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + /* Release temporary framebuffer. */ + if (temp_fb != nullptr) { + GPU_framebuffer_free(temp_fb); + } + /* Rebind existing framebuffer */ + if (prev_fb != nullptr) { + GPU_framebuffer_bind(prev_fb); + } } -#endif } void DRW_hair_free(void) diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc index eb6e800023a..50c37f6397c 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc @@ -174,7 +174,7 @@ static void extract_edge_fac_finish(const MeshRenderData *mr, GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data); - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { /* Some AMD drivers strangely crash with VBO's with a one byte format. * To workaround we reinitialize the VBO with another format and convert * all bytes to floats. */ @@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format() { static GPUVertFormat format = {0}; if (format.attr_len == 0) { - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); } else { @@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach uint offset = subdiv_cache->num_subdiv_loops; for (int i = 0; i < loose_geom.edge_len; i++) { - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { float loose_edge_fac[2] = {1.0f, 1.0f}; GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac); } diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h index aa01f446b9b..61c60f336e1 100644 --- a/source/blender/gpu/GPU_capabilities.h +++ b/source/blender/gpu/GPU_capabilities.h @@ -16,6 +16,7 @@ extern "C" { #endif int GPU_max_texture_size(void); +int GPU_max_texture_3d_size(void); int GPU_max_texture_layers(void); int GPU_max_textures(void); int GPU_max_textures_vert(void); @@ -31,6 +32,7 @@ int GPU_max_vertex_attribs(void); int GPU_max_varying_floats(void); int GPU_max_shader_storage_buffer_bindings(void); int GPU_max_compute_shader_storage_blocks(void); +int GPU_max_samplers(void); int GPU_extensions_len(void); const char *GPU_extension_get(int i); @@ -57,6 +59,9 @@ void GPU_mem_stats_get(int *totalmem, int *freemem); */ bool GPU_stereo_quadbuffer_support(void); +int GPU_minimum_per_vertex_stride(void); +bool GPU_transform_feedback_support(void); + #ifdef __cplusplus } #endif diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h index 722ef878271..d3c1bd8145d 100644 --- a/source/blender/gpu/GPU_vertex_buffer.h +++ b/source/blender/gpu/GPU_vertex_buffer.h @@ -40,12 +40,20 @@ extern "C" { typedef enum { /* can be extended to support more types */ - GPU_USAGE_STREAM, - GPU_USAGE_STATIC, /* do not keep data in memory */ - GPU_USAGE_DYNAMIC, - GPU_USAGE_DEVICE_ONLY, /* Do not do host->device data transfers. */ + GPU_USAGE_STREAM = 0, + GPU_USAGE_STATIC = 1, /* do not keep data in memory */ + GPU_USAGE_DYNAMIC = 2, + GPU_USAGE_DEVICE_ONLY = 3, /* Do not do host->device data transfers. */ + + /** Extended usage flags. */ + /* Flag for vertex buffers used for textures. Skips additional padding/compaction to ensure + * format matches the texture exactly. Can be masked with other properties, and is stripped + * during VertBuf::init. */ + GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY = 1 << 3, } GPUUsageType; +ENUM_OPERATORS(GPUUsageType, GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); + /** Opaque type hiding blender::gpu::VertBuf. */ typedef struct GPUVertBuf GPUVertBuf; diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index 73f94ecfb1b..e584b757a05 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -33,6 +33,11 @@ int GPU_max_texture_size() return GCaps.max_texture_size; } +int GPU_max_texture_3d_size(void) +{ + return GCaps.max_texture_3d_size; +} + int GPU_texture_size_with_limit(int res) { int size = GPU_max_texture_size(); @@ -115,6 +120,11 @@ const char *GPU_extension_get(int i) return GCaps.extension_get ? GCaps.extension_get(i) : "\0"; } +int GPU_max_samplers() +{ + return GCaps.max_samplers; +} + bool GPU_mip_render_workaround() { return GCaps.mip_render_workaround; @@ -176,6 +186,16 @@ int GPU_max_compute_shader_storage_blocks() return GCaps.max_compute_shader_storage_blocks; } +int GPU_minimum_per_vertex_stride(void) +{ + return GCaps.minimum_per_vertex_stride; +} + +bool GPU_transform_feedback_support(void) +{ + return GCaps.transform_feedback_support; +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_texture.cc b/source/blender/gpu/intern/gpu_texture.cc index 65daa416cae..bec8b8a0df3 100644 --- a/source/blender/gpu/intern/gpu_texture.cc +++ b/source/blender/gpu/intern/gpu_texture.cc @@ -360,6 +360,13 @@ GPUTexture *GPU_texture_create_compressed_2d( GPUTexture *GPU_texture_create_from_vertbuf(const char *name, GPUVertBuf *vert) { +#ifndef NDEBUG + /* Vertex buffers used for texture buffers must be flagged with: + * GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY. */ + BLI_assert_msg(unwrap(vert)->extended_usage_ & GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY, + "Vertex Buffers used for textures should have usage flag " + "GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY."); +#endif eGPUTextureFormat tex_format = to_texture_format(GPU_vertbuf_get_format(vert)); Texture *tex = GPUBackend::get()->texture_alloc(name); diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc index 0dbd565291b..a441cfe2fb8 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer.cc +++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc @@ -40,10 +40,21 @@ VertBuf::~VertBuf() void VertBuf::init(const GPUVertFormat *format, GPUUsageType usage) { - usage_ = usage; + /* Strip extended usage flags. */ + usage_ = usage & ~GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY; +#ifndef NDEBUG + /* Store extended usage. */ + extended_usage_ = usage; +#endif flag = GPU_VERTBUF_DATA_DIRTY; GPU_vertformat_copy(&this->format, format); - if (!format->packed) { + /* Avoid packing vertex formats which are used for texture buffers. + * These cases use singular types and do not need packing. They must + * also not have increased alignment padding to the minimum per-vertex stride. */ + if (usage & GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY) { + VertexFormat_texture_buffer_pack(&this->format); + } + if (!this->format.packed) { VertexFormat_pack(&this->format); } flag |= GPU_VERTBUF_INIT; @@ -62,6 +73,10 @@ VertBuf *VertBuf::duplicate() *dst = *this; /* Almost full copy... */ dst->handle_refcount_ = 1; + /* Metadata. */ +#ifndef NDEBUG + dst->extended_usage_ = extended_usage_; +#endif /* Duplicate all needed implementation specifics data. */ this->duplicate_data(dst); return dst; @@ -192,6 +207,7 @@ void GPU_vertbuf_data_len_set(GPUVertBuf *verts_, uint v_len) void GPU_vertbuf_attr_set(GPUVertBuf *verts_, uint a_idx, uint v_idx, const void *data) { VertBuf *verts = unwrap(verts_); + BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY); const GPUVertFormat *format = &verts->format; const GPUVertAttr *a = &format->attrs[a_idx]; BLI_assert(v_idx < verts->vertex_alloc); @@ -215,6 +231,7 @@ void GPU_vertbuf_attr_fill(GPUVertBuf *verts_, uint a_idx, const void *data) void GPU_vertbuf_vert_set(GPUVertBuf *verts_, uint v_idx, const void *data) { VertBuf *verts = unwrap(verts_); + BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY); const GPUVertFormat *format = &verts->format; BLI_assert(v_idx < verts->vertex_alloc); BLI_assert(verts->data != nullptr); @@ -225,6 +242,7 @@ void GPU_vertbuf_vert_set(GPUVertBuf *verts_, uint v_idx, const void *data) void GPU_vertbuf_attr_fill_stride(GPUVertBuf *verts_, uint a_idx, uint stride, const void *data) { VertBuf *verts = unwrap(verts_); + BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY); const GPUVertFormat *format = &verts->format; const GPUVertAttr *a = &format->attrs[a_idx]; BLI_assert(a_idx < format->attr_len); diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh index a7920bacaec..f20f6caf6de 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh @@ -31,6 +31,11 @@ class VertBuf { /** NULL indicates data in VRAM (unmapped) */ uchar *data = nullptr; +#ifndef NDEBUG + /** Usage including extended usage flags. */ + GPUUsageType extended_usage_ = GPU_USAGE_STATIC; +#endif + protected: /** Usage hint for GL optimization. */ GPUUsageType usage_ = GPU_USAGE_STATIC; @@ -83,6 +88,11 @@ class VertBuf { } } + GPUUsageType get_usage_type() const + { + return usage_; + } + virtual void update_sub(uint start, uint len, const void *data) = 0; virtual const void *read() const = 0; virtual void *unmap(const void *mapped_data) const = 0; diff --git a/source/blender/gpu/intern/gpu_vertex_format.cc b/source/blender/gpu/intern/gpu_vertex_format.cc index c16c06c1421..5c21bc313eb 100644 --- a/source/blender/gpu/intern/gpu_vertex_format.cc +++ b/source/blender/gpu/intern/gpu_vertex_format.cc @@ -8,6 +8,8 @@ */ #include "GPU_vertex_format.h" +#include "GPU_capabilities.h" + #include "gpu_shader_create_info.hh" #include "gpu_shader_private.hh" #include "gpu_vertex_format_private.h" @@ -68,7 +70,7 @@ static uint attr_size(const GPUVertAttr *a) return a->comp_len * comp_size(static_cast<GPUVertCompType>(a->comp_type)); } -static uint attr_align(const GPUVertAttr *a) +static uint attr_align(const GPUVertAttr *a, uint minimum_stride) { if (a->comp_type == GPU_COMP_I10) { return 4; /* always packed as 10_10_10_2 */ @@ -78,7 +80,10 @@ static uint attr_align(const GPUVertAttr *a) return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */ } - return c; /* most fetches are ok if components are naturally aligned */ + /* Most fetches are ok if components are naturally aligned. + * However, in Metal,the minimum supported per-vertex stride is 4, + * so we must query the GPU and pad out the size accordingly. */ + return max_ii(minimum_stride, c); } uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len) @@ -308,7 +313,7 @@ static void show_pack(uint a_idx, uint size, uint pad) } #endif -void VertexFormat_pack(GPUVertFormat *format) +static void VertexFormat_pack_impl(GPUVertFormat *format, uint minimum_stride) { GPUVertAttr *a0 = &format->attrs[0]; a0->offset = 0; @@ -320,7 +325,7 @@ void VertexFormat_pack(GPUVertFormat *format) for (uint a_idx = 1; a_idx < format->attr_len; a_idx++) { GPUVertAttr *a = &format->attrs[a_idx]; - uint mid_padding = padding(offset, attr_align(a)); + uint mid_padding = padding(offset, attr_align(a, minimum_stride)); offset += mid_padding; a->offset = offset; offset += a->size; @@ -330,7 +335,7 @@ void VertexFormat_pack(GPUVertFormat *format) #endif } - uint end_padding = padding(offset, attr_align(a0)); + uint end_padding = padding(offset, attr_align(a0, minimum_stride)); #if PACK_DEBUG show_pack(0, 0, end_padding); diff --git a/source/blender/gpu/intern/gpu_vertex_format_private.h b/source/blender/gpu/intern/gpu_vertex_format_private.h index 0f8a869f6df..430008b4cb9 100644 --- a/source/blender/gpu/intern/gpu_vertex_format_private.h +++ b/source/blender/gpu/intern/gpu_vertex_format_private.h @@ -16,6 +16,7 @@ extern "C" { struct GPUVertFormat; void VertexFormat_pack(struct GPUVertFormat *format); +void VertexFormat_texture_buffer_pack(struct GPUVertFormat *format); uint padding(uint offset, uint alignment); uint vertex_buffer_size(const struct GPUVertFormat *format, uint vertex_len); diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index 24ca8c25bc0..2375e78d9f1 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -386,6 +386,11 @@ static void detect_workarounds() } } + /* Disable TF on macOS. */ + if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) { + GCaps.transform_feedback_support = false; + } + /* Some Intel drivers have issues with using mips as frame-buffer targets if * GL_TEXTURE_MAX_LEVEL is higher than the target MIP. * Only check at the end after all other workarounds because this uses the drawing code. @@ -431,7 +436,6 @@ static void detect_workarounds() /** Internal capabilities. */ GLint GLContext::max_cubemap_size = 0; -GLint GLContext::max_texture_3d_size = 0; GLint GLContext::max_ubo_binds = 0; GLint GLContext::max_ubo_size = 0; GLint GLContext::max_ssbo_binds = 0; @@ -499,6 +503,8 @@ void GLBackend::capabilities_init() GCaps.shader_draw_parameters_support = epoxy_has_gl_extension("GL_ARB_shader_draw_parameters"); GCaps.compute_shader_support = epoxy_has_gl_extension("GL_ARB_compute_shader") && epoxy_gl_version() >= 43; + GCaps.max_samplers = GCaps.max_textures; + if (GCaps.compute_shader_support) { glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &GCaps.max_work_group_count[0]); glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &GCaps.max_work_group_count[1]); @@ -512,8 +518,10 @@ void GLBackend::capabilities_init() } GCaps.shader_storage_buffer_objects_support = epoxy_has_gl_extension( "GL_ARB_shader_storage_buffer_object"); + GCaps.transform_feedback_support = true; + /* GL specific capabilities. */ - glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size); + glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GCaps.max_texture_3d_size); glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size); glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_BLOCKS, &GLContext::max_ubo_binds); glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &GLContext::max_ubo_size); diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index 2f8c2b762f8..1d413750fd4 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -40,7 +40,6 @@ class GLContext : public Context { /** Capabilities. */ static GLint max_cubemap_size; - static GLint max_texture_3d_size; static GLint max_ubo_size; static GLint max_ubo_binds; static GLint max_ssbo_size; diff --git a/source/blender/gpu/opengl/gl_texture.cc b/source/blender/gpu/opengl/gl_texture.cc index 2ce205353a3..02fc7dfdb5f 100644 --- a/source/blender/gpu/opengl/gl_texture.cc +++ b/source/blender/gpu/opengl/gl_texture.cc @@ -603,7 +603,7 @@ bool GLTexture::proxy_check(int mip) { /* Manual validation first, since some implementation have issues with proxy creation. */ int max_size = GPU_max_texture_size(); - int max_3d_size = GLContext::max_texture_3d_size; + int max_3d_size = GPU_max_texture_3d_size(); int max_cube_size = GLContext::max_cubemap_size; int size[3] = {1, 1, 1}; this->mip_size_get(mip, size); |