Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Fielder <jason_apple>2022-09-01 23:14:18 +0300
committerClément Foucault <foucault.clem@gmail.com>2022-09-01 23:18:02 +0300
commitac07fb38a1b35fa156b2d0901eb35cd65ed73903 (patch)
tree2fe5a9b69c5c8bf04818e8b2cde0393e960a12ca /source/blender/draw
parent5f4409b02ef7c54089ff1b491e008d4b86c030f4 (diff)
Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.
- Adding in compatibility paths to support minimum per-vertex strides for vertex formats. OpenGL supports a minimum stride of 1 byte, in Metal, this minimum stride is 4 bytes. Meaing a vertex format must be atleast 4-bytes in size. - Replacing transform feedback compile-time check to conditional look-up, given TF is supported on macOS with Metal. - 3D texture size safety check added as a general capability, rather than being in the gl backend only. Also required for Metal. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D14510
Diffstat (limited to 'source/blender/draw')
-rw-r--r--source/blender/draw/engines/eevee/eevee_volumes.c8
-rw-r--r--source/blender/draw/intern/draw_cache.c3
-rw-r--r--source/blender/draw/intern/draw_cache_impl_curves.cc22
-rw-r--r--source/blender/draw/intern/draw_cache_impl_particles.c30
-rw-r--r--source/blender/draw/intern/draw_curves.cc236
-rw-r--r--source/blender/draw/intern/draw_hair.cc237
-rw-r--r--source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc6
7 files changed, 317 insertions, 225 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c
index 533e71b9b32..2d96cffb4ba 100644
--- a/source/blender/draw/engines/eevee/eevee_volumes.c
+++ b/source/blender/draw/engines/eevee/eevee_volumes.c
@@ -30,6 +30,7 @@
#include "DEG_depsgraph_query.h"
#include "GPU_capabilities.h"
+#include "GPU_context.h"
#include "GPU_material.h"
#include "GPU_texture.h"
#include "eevee_private.h"
@@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size));
tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1);
+ /* Clamp 3D texture size based on device maximum. */
+ int maxSize = GPU_max_texture_3d_size();
+ BLI_assert(tex_size[0] <= maxSize);
+ tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0];
+ tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1];
+ tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2];
+
common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]);
common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]);
common_data->vol_coord_scale[2] = 1.0f / viewport_size[0];
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 4ff5745fc86..6537490c06c 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -826,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void)
{
if (SHC.drw_gpencil_dummy_quad == NULL) {
GPUVertFormat format = {0};
- GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT);
+ /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */
+ GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
GPU_vertbuf_data_alloc(vbo, 4);
diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc
index 4f0072ec657..3bca17d9c56 100644
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@@ -269,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
GPU_vertformat_alias_add(&format, "pos");
- cache.proc_point_buf = GPU_vertbuf_create_with_format(&format);
+ cache.proc_point_buf = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len);
MutableSpan posTime_data{
@@ -279,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
GPUVertFormat length_format = {0};
GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
- cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+ cache.proc_length_buf = GPU_vertbuf_create_with_format_ex(
+ &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len);
MutableSpan hairLength_data{
@@ -319,8 +321,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac
const char *name)
{
CurvesEvalFinalCache &final_cache = cache.final[subdiv];
- final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format,
- GPU_USAGE_DEVICE_ONLY);
+ final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+ format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
/* Create a destination buffer for the transform feedback. Sized appropriately */
/* Those are points! not line segments. */
@@ -351,7 +353,8 @@ static void curves_batch_ensure_attribute(const Curves &curves,
/* All attributes use vec4, see comment below. */
GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format);
+ cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index];
GPU_vertbuf_data_alloc(attr_vbo,
@@ -416,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT);
/* Curve Data. */
- cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+ cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+ &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len);
GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step);
- cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+ cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+ &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len);
GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
@@ -441,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
GPUVertFormat format = {0};
GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY);
+ cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
/* Create a destination buffer for the transform feedback. Sized appropriately */
/* Those are points! not line segments. */
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index 02afbab6899..4fdc46ea18b 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -32,6 +32,8 @@
#include "ED_particle.h"
#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
#include "GPU_material.h"
#include "DEG_depsgraph_query.h"
@@ -808,7 +810,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
GPUVertFormat format = {0};
GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format);
+ /* Transform feedback buffer only needs to be resident in device memory. */
+ GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC;
+ cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+ &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
/* Create a destination buffer for the transform feedback. Sized appropriately */
/* Those are points! not line segments. */
@@ -873,17 +878,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names));
/* Strand Data */
- cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+ cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+ &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step);
- cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+ cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+ &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step);
/* UV layers */
for (int i = 0; i < cache->num_uv_layers; i++) {
- cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv);
+ cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex(
+ &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
@@ -913,7 +921,8 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
/* Vertex colors */
for (int i = 0; i < cache->num_col_layers; i++) {
- cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col);
+ cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex(
+ &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
@@ -1059,8 +1068,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
static GPUVertFormat format = {0};
GPU_vertformat_clear(&format);
- /* initialize vertex format */
- GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+ /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum
+ * stride requirement. */
+ GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
GPU_vertbuf_data_alloc(vbo, 1);
@@ -1101,7 +1111,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
uint pos_id = GPU_vertformat_attr_add(
&pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format);
+ cache->proc_point_buf = GPU_vertbuf_create_with_format_ex(
+ &pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len);
GPUVertBufRaw pos_step;
@@ -1111,7 +1122,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
uint length_id = GPU_vertformat_attr_add(
&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
- cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+ cache->proc_length_buf = GPU_vertbuf_create_with_format_ex(
+ &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len);
GPUVertBufRaw length_step;
diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc
index 233af08c363..9c4181b0161 100644
--- a/source/blender/draw/intern/draw_curves.cc
+++ b/source/blender/draw/intern/draw_curves.cc
@@ -33,25 +33,17 @@
#include "draw_manager.h"
#include "draw_shader.h"
-#ifndef __APPLE__
-# define USE_TRANSFORM_FEEDBACK
-# define USE_COMPUTE_SHADERS
-#endif
-
BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
{
-#ifdef USE_COMPUTE_SHADERS
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
return PART_REFINE_SHADER_COMPUTE;
}
-#endif
-#ifdef USE_TRANSFORM_FEEDBACK
- return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
-#endif
+ if (GPU_transform_feedback_support()) {
+ return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+ }
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
}
-#ifndef USE_TRANSFORM_FEEDBACK
struct CurvesEvalCall {
struct CurvesEvalCall *next;
GPUVertBuf *vbo;
@@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr;
static int g_tf_id_offset;
static int g_tf_target_width;
static int g_tf_target_height;
-#endif
static GPUVertBuf *g_dummy_vbo = nullptr;
static GPUTexture *g_dummy_texture = nullptr;
@@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data)
CurvesUniformBufPool *pool = drw_data->curves_ubos;
pool->reset();
-#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
- g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
-#else
- g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
-#endif
+ if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
+ g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
+ }
+ else {
+ g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
+ }
if (g_dummy_vbo == nullptr) {
/* initialize vertex format */
GPUVertFormat format = {0};
uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+ g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
@@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
{
GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
-#ifdef USE_TRANSFORM_FEEDBACK
- DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
-#else
- DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
-
- CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
- pr_call->next = g_tf_calls;
- pr_call->vbo = vbo;
- pr_call->shgrp = tf_shgrp;
- pr_call->vert_len = final_points_len;
- g_tf_calls = pr_call;
- DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
- DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
- DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
-#endif
+ DRWShadingGroup *tf_shgrp = nullptr;
+ if (GPU_transform_feedback_support()) {
+ tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
+ }
+ else {
+ tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+ CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
+ pr_call->next = g_tf_calls;
+ pr_call->vbo = vbo;
+ pr_call->shgrp = tf_shgrp;
+ pr_call->vert_len = final_points_len;
+ g_tf_calls = pr_call;
+ DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+ DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+ DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+ }
+ BLI_assert(tf_shgrp != nullptr);
drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv);
DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
@@ -411,82 +407,118 @@ void DRW_curves_update()
/* Update legacy hair too, to avoid verbosity in callers. */
DRW_hair_update();
-#ifndef USE_TRANSFORM_FEEDBACK
- /**
- * Workaround to transform feedback not working on mac.
- * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
- *
- * So instead of using transform feedback we render to a texture,
- * read back the result to system memory and re-upload as VBO data.
- * It is really not ideal performance wise, but it is the simplest
- * and the most local workaround that still uses the power of the GPU.
- */
-
- if (g_tf_calls == nullptr) {
- return;
- }
+ if (!GPU_transform_feedback_support()) {
+ /**
+ * Workaround to transform feedback not working on mac.
+ * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+ *
+ * So instead of using transform feedback we render to a texture,
+ * read back the result to system memory and re-upload as VBO data.
+ * It is really not ideal performance wise, but it is the simplest
+ * and the most local workaround that still uses the power of the GPU.
+ */
+
+ if (g_tf_calls == nullptr) {
+ return;
+ }
- /* Search ideal buffer size. */
- uint max_size = 0;
- for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
- max_size = max_ii(max_size, pr_call->vert_len);
- }
+ /* Search ideal buffer size. */
+ uint max_size = 0;
+ for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+ max_size = max_ii(max_size, pr_call->vert_len);
+ }
+
+ /* Create target Texture / Frame-buffer */
+ /* Don't use max size as it can be really heavy and fail.
+ * Do chunks of maximum 2048 * 2048 hair points. */
+ int width = 2048;
+ int height = min_ii(width, 1 + max_size / width);
+ GPUTexture *tex = DRW_texture_pool_query_2d(
+ width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
+ g_tf_target_height = height;
+ g_tf_target_width = width;
+
+ GPUFrameBuffer *fb = nullptr;
+ GPU_framebuffer_ensure_config(&fb,
+ {
+ GPU_ATTACHMENT_NONE,
+ GPU_ATTACHMENT_TEXTURE(tex),
+ });
+
+ float *data = static_cast<float *>(
+ MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
+
+ GPU_framebuffer_bind(fb);
+ while (g_tf_calls != nullptr) {
+ CurvesEvalCall *pr_call = g_tf_calls;
+ g_tf_calls = g_tf_calls->next;
+
+ g_tf_id_offset = 0;
+ while (pr_call->vert_len > 0) {
+ int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+ DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+ /* Read back result to main memory. */
+ GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+ /* Upload back to VBO. */
+ GPU_vertbuf_use(pr_call->vbo);
+ GPU_vertbuf_update_sub(pr_call->vbo,
+ sizeof(float[4]) * g_tf_id_offset,
+ sizeof(float[4]) * max_read_px_len,
+ data);
+
+ g_tf_id_offset += max_read_px_len;
+ pr_call->vert_len -= max_read_px_len;
+ }
- /* Create target Texture / Frame-buffer */
- /* Don't use max size as it can be really heavy and fail.
- * Do chunks of maximum 2048 * 2048 hair points. */
- int width = 2048;
- int height = min_ii(width, 1 + max_size / width);
- GPUTexture *tex = DRW_texture_pool_query_2d(
- width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
- g_tf_target_height = height;
- g_tf_target_width = width;
-
- GPUFrameBuffer *fb = nullptr;
- GPU_framebuffer_ensure_config(&fb,
- {
- GPU_ATTACHMENT_NONE,
- GPU_ATTACHMENT_TEXTURE(tex),
- });
-
- float *data = static_cast<float *>(
- MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
-
- GPU_framebuffer_bind(fb);
- while (g_tf_calls != nullptr) {
- CurvesEvalCall *pr_call = g_tf_calls;
- g_tf_calls = g_tf_calls->next;
-
- g_tf_id_offset = 0;
- while (pr_call->vert_len > 0) {
- int max_read_px_len = min_ii(width * height, pr_call->vert_len);
-
- DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
- /* Read back result to main memory. */
- GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
- /* Upload back to VBO. */
- GPU_vertbuf_use(pr_call->vbo);
- GPU_vertbuf_update_sub(pr_call->vbo,
- sizeof(float[4]) * g_tf_id_offset,
- sizeof(float[4]) * max_read_px_len,
- data);
-
- g_tf_id_offset += max_read_px_len;
- pr_call->vert_len -= max_read_px_len;
+ MEM_freeN(pr_call);
}
- MEM_freeN(pr_call);
+ MEM_freeN(data);
+ GPU_framebuffer_free(fb);
}
+ else {
+ /* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid
+ * side-effects from rendering in the active buffer.
+ * We also need to guarantee that a Framebuffer is active to perform any rendering work,
+ * even if there is no output */
+ GPUFrameBuffer *temp_fb = nullptr;
+ GPUFrameBuffer *prev_fb = nullptr;
+ if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+ if (!GPU_compute_shader_support()) {
+ prev_fb = GPU_framebuffer_active_get();
+ char errorOut[256];
+ /* if the framebuffer is invalid we need a dummy framebuffer to be bound. */
+ if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
+ int width = 64;
+ int height = 64;
+ GPUTexture *tex = DRW_texture_pool_query_2d(
+ width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
+ g_tf_target_height = height;
+ g_tf_target_width = width;
+
+ GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
+
+ GPU_framebuffer_bind(temp_fb);
+ }
+ }
+ }
- MEM_freeN(data);
- GPU_framebuffer_free(fb);
-#else
- /* Just render the pass when using compute shaders or transform feedback. */
- DRW_draw_pass(g_tf_pass);
- if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
- GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+ /* Just render the pass when using compute shaders or transform feedback. */
+ DRW_draw_pass(g_tf_pass);
+ if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+ GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+
+ /* Release temporary framebuffer. */
+ if (temp_fb != nullptr) {
+ GPU_framebuffer_free(temp_fb);
+ }
+ /* Rebind existing framebuffer */
+ if (prev_fb != nullptr) {
+ GPU_framebuffer_bind(prev_fb);
+ }
}
-#endif
}
void DRW_curves_free()
diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc
index dc791314333..69f123b95f3 100644
--- a/source/blender/draw/intern/draw_hair.cc
+++ b/source/blender/draw/intern/draw_hair.cc
@@ -22,6 +22,7 @@
#include "GPU_batch.h"
#include "GPU_capabilities.h"
#include "GPU_compute.h"
+#include "GPU_context.h"
#include "GPU_material.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
@@ -33,25 +34,17 @@
#include "draw_shader.h"
#include "draw_shader_shared.h"
-#ifndef __APPLE__
-# define USE_TRANSFORM_FEEDBACK
-# define USE_COMPUTE_SHADERS
-#endif
-
BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
{
-#ifdef USE_COMPUTE_SHADERS
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
return PART_REFINE_SHADER_COMPUTE;
}
-#endif
-#ifdef USE_TRANSFORM_FEEDBACK
- return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
-#endif
+ if (GPU_transform_feedback_support()) {
+ return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+ }
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
}
-#ifndef USE_TRANSFORM_FEEDBACK
struct ParticleRefineCall {
struct ParticleRefineCall *next;
GPUVertBuf *vbo;
@@ -63,7 +56,6 @@ static ParticleRefineCall *g_tf_calls = nullptr;
static int g_tf_id_offset;
static int g_tf_target_width;
static int g_tf_target_height;
-#endif
static GPUVertBuf *g_dummy_vbo = nullptr;
static GPUTexture *g_dummy_texture = nullptr;
@@ -77,18 +69,20 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement)
void DRW_hair_init(void)
{
-#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
- g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
-#else
- g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
-#endif
+ if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
+ g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
+ }
+ else {
+ g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
+ }
if (g_dummy_vbo == nullptr) {
/* initialize vertex format */
GPUVertFormat format = {0};
uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+ g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
@@ -146,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache
if (final_points_len > 0) {
GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM);
-#ifdef USE_TRANSFORM_FEEDBACK
- DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(
- tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
-#else
- DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
-
- ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
- pr_call->next = g_tf_calls;
- pr_call->vbo = cache->final[subdiv].proc_buf;
- pr_call->shgrp = tf_shgrp;
- pr_call->vert_len = final_points_len;
- g_tf_calls = pr_call;
- DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
- DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
- DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
-#endif
+ DRWShadingGroup *tf_shgrp = nullptr;
+ if (GPU_transform_feedback_support()) {
+ tf_shgrp = DRW_shgroup_transform_feedback_create(
+ tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
+ }
+ else {
+ tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+ ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
+ pr_call->next = g_tf_calls;
+ pr_call->vbo = cache->final[subdiv].proc_buf;
+ pr_call->shgrp = tf_shgrp;
+ pr_call->vert_len = final_points_len;
+ g_tf_calls = pr_call;
+ DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+ DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+ DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+ }
+ BLI_assert(tf_shgrp != nullptr);
drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv);
DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
@@ -306,81 +303,117 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
void DRW_hair_update()
{
-#ifndef USE_TRANSFORM_FEEDBACK
- /**
- * Workaround to transform feedback not working on mac.
- * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
- *
- * So instead of using transform feedback we render to a texture,
- * read back the result to system memory and re-upload as VBO data.
- * It is really not ideal performance wise, but it is the simplest
- * and the most local workaround that still uses the power of the GPU.
- */
-
- if (g_tf_calls == nullptr) {
- return;
- }
+ if (!GPU_transform_feedback_support()) {
+ /**
+ * Workaround to transform feedback not working on mac.
+ * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+ *
+ * So instead of using transform feedback we render to a texture,
+ * read back the result to system memory and re-upload as VBO data.
+ * It is really not ideal performance wise, but it is the simplest
+ * and the most local workaround that still uses the power of the GPU.
+ */
+
+ if (g_tf_calls == nullptr) {
+ return;
+ }
- /* Search ideal buffer size. */
- uint max_size = 0;
- for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
- max_size = max_ii(max_size, pr_call->vert_len);
- }
+ /* Search ideal buffer size. */
+ uint max_size = 0;
+ for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+ max_size = max_ii(max_size, pr_call->vert_len);
+ }
+
+ /* Create target Texture / Frame-buffer */
+ /* Don't use max size as it can be really heavy and fail.
+ * Do chunks of maximum 2048 * 2048 hair points. */
+ int width = 2048;
+ int height = min_ii(width, 1 + max_size / width);
+ GPUTexture *tex = DRW_texture_pool_query_2d(
+ width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
+ g_tf_target_height = height;
+ g_tf_target_width = width;
+
+ GPUFrameBuffer *fb = nullptr;
+ GPU_framebuffer_ensure_config(&fb,
+ {
+ GPU_ATTACHMENT_NONE,
+ GPU_ATTACHMENT_TEXTURE(tex),
+ });
+
+ float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
+
+ GPU_framebuffer_bind(fb);
+ while (g_tf_calls != nullptr) {
+ ParticleRefineCall *pr_call = g_tf_calls;
+ g_tf_calls = g_tf_calls->next;
+
+ g_tf_id_offset = 0;
+ while (pr_call->vert_len > 0) {
+ int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+ DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+ /* Read back result to main memory. */
+ GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+ /* Upload back to VBO. */
+ GPU_vertbuf_use(pr_call->vbo);
+ GPU_vertbuf_update_sub(pr_call->vbo,
+ sizeof(float[4]) * g_tf_id_offset,
+ sizeof(float[4]) * max_read_px_len,
+ data);
+
+ g_tf_id_offset += max_read_px_len;
+ pr_call->vert_len -= max_read_px_len;
+ }
- /* Create target Texture / Frame-buffer */
- /* Don't use max size as it can be really heavy and fail.
- * Do chunks of maximum 2048 * 2048 hair points. */
- int width = 2048;
- int height = min_ii(width, 1 + max_size / width);
- GPUTexture *tex = DRW_texture_pool_query_2d(
- width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
- g_tf_target_height = height;
- g_tf_target_width = width;
-
- GPUFrameBuffer *fb = nullptr;
- GPU_framebuffer_ensure_config(&fb,
- {
- GPU_ATTACHMENT_NONE,
- GPU_ATTACHMENT_TEXTURE(tex),
- });
-
- float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
-
- GPU_framebuffer_bind(fb);
- while (g_tf_calls != nullptr) {
- ParticleRefineCall *pr_call = g_tf_calls;
- g_tf_calls = g_tf_calls->next;
-
- g_tf_id_offset = 0;
- while (pr_call->vert_len > 0) {
- int max_read_px_len = min_ii(width * height, pr_call->vert_len);
-
- DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
- /* Read back result to main memory. */
- GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
- /* Upload back to VBO. */
- GPU_vertbuf_use(pr_call->vbo);
- GPU_vertbuf_update_sub(pr_call->vbo,
- sizeof(float[4]) * g_tf_id_offset,
- sizeof(float[4]) * max_read_px_len,
- data);
-
- g_tf_id_offset += max_read_px_len;
- pr_call->vert_len -= max_read_px_len;
+ MEM_freeN(pr_call);
}
- MEM_freeN(pr_call);
+ MEM_freeN(data);
+ GPU_framebuffer_free(fb);
}
+ else {
+ /* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid
+ * side-effects from rendering in the active buffer.
+ * We also need to guarantee that a Framebuffer is active to perform any rendering work,
+ * even if there is no output */
+ GPUFrameBuffer *temp_fb = nullptr;
+ GPUFrameBuffer *prev_fb = nullptr;
+ if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+ if (!GPU_compute_shader_support()) {
+ prev_fb = GPU_framebuffer_active_get();
+ char errorOut[256];
+ /* if the framebuffer is invalid we need a dummy framebuffer to be bound. */
+ if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
+ int width = 64;
+ int height = 64;
+ GPUTexture *tex = DRW_texture_pool_query_2d(
+ width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
+ g_tf_target_height = height;
+ g_tf_target_width = width;
+
+ GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
+
+ GPU_framebuffer_bind(temp_fb);
+ }
+ }
+ }
- MEM_freeN(data);
- GPU_framebuffer_free(fb);
-#else
- /* Just render the pass when using compute shaders or transform feedback. */
- DRW_draw_pass(g_tf_pass);
- if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
- GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+ /* Just render the pass when using compute shaders or transform feedback. */
+ DRW_draw_pass(g_tf_pass);
+ if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+ GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+
+ /* Release temporary framebuffer. */
+ if (temp_fb != nullptr) {
+ GPU_framebuffer_free(temp_fb);
+ }
+ /* Rebind existing framebuffer */
+ if (prev_fb != nullptr) {
+ GPU_framebuffer_bind(prev_fb);
+ }
}
-#endif
}
void DRW_hair_free(void)
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
index eb6e800023a..50c37f6397c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
@@ -174,7 +174,7 @@ static void extract_edge_fac_finish(const MeshRenderData *mr,
GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data);
- if (GPU_crappy_amd_driver()) {
+ if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
/* Some AMD drivers strangely crash with VBO's with a one byte format.
* To workaround we reinitialize the VBO with another format and convert
* all bytes to floats. */
@@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format()
{
static GPUVertFormat format = {0};
if (format.attr_len == 0) {
- if (GPU_crappy_amd_driver()) {
+ if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
}
else {
@@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach
uint offset = subdiv_cache->num_subdiv_loops;
for (int i = 0; i < loose_geom.edge_len; i++) {
- if (GPU_crappy_amd_driver()) {
+ if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
float loose_edge_fac[2] = {1.0f, 1.0f};
GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
}