Curves: Further split of curves draw code from particles

Extends the changes started in f31c3f8114616bb to completely separate much of the DRW curves code from the particle hair drawing. In the short term this increases duplication, but the idea is to simplify development by making it easier to do larger changes to the new code, and the new system will replace the particle hair at some point. After this, only the shaders themselves are shared. Differential Revision: https://developer.blender.org/D14699
author: Hans Goudey <h.goudey@me.com> 2022-04-22 18:43:48 +0300
committer: Hans Goudey <h.goudey@me.com> 2022-04-22 18:44:01 +0300
commit: 5b87862ddca2cb1b9351a5e250e1743b7245f38b (patch)
tree: 53d6e591709d50bcf77ac444f2cb10c12c1874e9 /source/blender/draw/intern/draw_curves.cc
parent: 8f05a547d539feb4a8da35ee15239fa46ff38083 (diff)
1 files changed, 324 insertions, 0 deletions
diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc
new file mode 100644
index 00000000000..139efdf4f8c
--- /dev/null
+++ b/source/blender/draw/intern/draw_curves.cc
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2017 Blender Foundation. All rights reserved. */
+
+/** \file
+ * \ingroup draw
+ *
+ * \brief Contains procedural GPU hair drawing methods.
+ */
+
+#include "BLI_string_utils.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_customdata_types.h"
+
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_material.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_vertex_buffer.h"
+
+#include "DRW_render.h"
+
+#include "draw_hair_private.h"
+#include "draw_shader.h"
+
+#ifndef __APPLE__
+#  define USE_TRANSFORM_FEEDBACK
+#  define USE_COMPUTE_SHADERS
+#endif
+
+BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
+{
+#ifdef USE_COMPUTE_SHADERS
+  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
+    return PART_REFINE_SHADER_COMPUTE;
+  }
+#endif
+#ifdef USE_TRANSFORM_FEEDBACK
+  return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+#endif
+  return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
+}
+
+#ifndef USE_TRANSFORM_FEEDBACK
+typedef struct CurvesEvalCall {
+  struct CurvesEvalCall *next;
+  GPUVertBuf *vbo;
+  DRWShadingGroup *shgrp;
+  uint vert_len;
+} CurvesEvalCall;
+
+static CurvesEvalCall *g_tf_calls = nullptr;
+static int g_tf_id_offset;
+static int g_tf_target_width;
+static int g_tf_target_height;
+#endif
+
+static GPUVertBuf *g_dummy_vbo = nullptr;
+static GPUTexture *g_dummy_texture = nullptr;
+static DRWPass *g_tf_pass; /* XXX can be a problem with multiple DRWManager in the future */
+
+static GPUShader *curves_eval_shader_get(CurvesEvalShader type)
+{
+  return DRW_shader_curves_refine_get(type, drw_curves_shader_type_get());
+}
+
+void DRW_curves_init(void)
+{
+  /* Initialize legacy hair too, to avoid verbosity in callers. */
+  DRW_hair_init();
+
+#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
+  g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
+#else
+  g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
+#endif
+
+  if (g_dummy_vbo == nullptr) {
+    /* initialize vertex format */
+    GPUVertFormat format = {0};
+    uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+
+    g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+
+    const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+    GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
+    GPU_vertbuf_attr_fill(g_dummy_vbo, dummy_id, vert);
+    /* Create vbo immediately to bind to texture buffer. */
+    GPU_vertbuf_use(g_dummy_vbo);
+
+    g_dummy_texture = GPU_texture_create_from_vertbuf("hair_dummy_attr", g_dummy_vbo);
+  }
+}
+
+static void drw_curves_cache_shgrp_attach_resources(DRWShadingGroup *shgrp,
+                                                    CurvesEvalCache *cache,
+                                                    const int subdiv)
+{
+  DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", cache->point_tex);
+  DRW_shgroup_uniform_texture(shgrp, "hairStrandBuffer", cache->strand_tex);
+  DRW_shgroup_uniform_texture(shgrp, "hairStrandSegBuffer", cache->strand_seg_tex);
+  DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &cache->final[subdiv].strands_res, 1);
+}
+
+static void drw_curves_cache_update_compute(CurvesEvalCache *cache, const int subdiv)
+{
+  const int strands_len = cache->strands_len;
+  const int final_points_len = cache->final[subdiv].strands_res * strands_len;
+  if (final_points_len > 0) {
+    GPUShader *shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
+    DRWShadingGroup *shgrp = DRW_shgroup_create(shader, g_tf_pass);
+    drw_curves_cache_shgrp_attach_resources(shgrp, cache, subdiv);
+    DRW_shgroup_vertex_buffer(shgrp, "posTime", cache->final[subdiv].proc_buf);
+
+    const int max_strands_per_call = GPU_max_work_group_count(0);
+    int strands_start = 0;
+    while (strands_start < strands_len) {
+      int batch_strands_len = MIN2(strands_len - strands_start, max_strands_per_call);
+      DRWShadingGroup *subgroup = DRW_shgroup_create_sub(shgrp);
+      DRW_shgroup_uniform_int_copy(subgroup, "hairStrandOffset", strands_start);
+      DRW_shgroup_call_compute(subgroup, batch_strands_len, cache->final[subdiv].strands_res, 1);
+      strands_start += batch_strands_len;
+    }
+  }
+}
+
+static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, const int subdiv)
+{
+  const int final_points_len = cache->final[subdiv].strands_res * cache->strands_len;
+  if (final_points_len > 0) {
+    GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
+
+#ifdef USE_TRANSFORM_FEEDBACK
+    DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(
+        tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
+#else
+    DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+    CurvesEvalCall *pr_call = MEM_mallocN(sizeof(*pr_call), __func__);
+    pr_call->next = g_tf_calls;
+    pr_call->vbo = cache->final[subdiv].proc_buf;
+    pr_call->shgrp = tf_shgrp;
+    pr_call->vert_len = final_points_len;
+    g_tf_calls = pr_call;
+    DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+    DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+    DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+#endif
+
+    drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv);
+    DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
+  }
+}
+
+static CurvesEvalCache *drw_curves_cache_get(Object *object,
+                                             GPUMaterial *gpu_material,
+                                             int subdiv,
+                                             int thickness_res)
+{
+  CurvesEvalCache *cache;
+  bool update = curves_ensure_procedural_data(object, &cache, gpu_material, subdiv, thickness_res);
+
+  if (update) {
+    if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+      drw_curves_cache_update_compute(cache, subdiv);
+    }
+    else {
+      drw_curves_cache_update_transform_feedback(cache, subdiv);
+    }
+  }
+  return cache;
+}
+
+GPUVertBuf *DRW_curves_pos_buffer_get(Object *object)
+{
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  Scene *scene = draw_ctx->scene;
+
+  int subdiv = scene->r.hair_subdiv;
+  int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
+
+  CurvesEvalCache *cache = drw_curves_cache_get(object, nullptr, subdiv, thickness_res);
+
+  return cache->final[subdiv].proc_buf;
+}
+
+DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
+                                               DRWShadingGroup *shgrp_parent,
+                                               GPUMaterial *gpu_material)
+{
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  Scene *scene = draw_ctx->scene;
+
+  int subdiv = scene->r.hair_subdiv;
+  int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
+
+  CurvesEvalCache *curves_cache = drw_curves_cache_get(
+      object, gpu_material, subdiv, thickness_res);
+
+  DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
+
+  /* Fix issue with certain driver not drawing anything if there is no texture bound to
+   * "ac", "au", "u" or "c". */
+  DRW_shgroup_uniform_texture(shgrp, "u", g_dummy_texture);
+  DRW_shgroup_uniform_texture(shgrp, "au", g_dummy_texture);
+  DRW_shgroup_uniform_texture(shgrp, "c", g_dummy_texture);
+  DRW_shgroup_uniform_texture(shgrp, "ac", g_dummy_texture);
+
+  /* TODO: Generalize radius implementation for curves data type. */
+  float hair_rad_shape = 1.0f;
+  float hair_rad_root = 0.005f;
+  float hair_rad_tip = 0.0f;
+  bool hair_close_tip = true;
+
+  DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", curves_cache->final[subdiv].proc_tex);
+  if (curves_cache->length_tex) {
+    DRW_shgroup_uniform_texture(shgrp, "hairLen", curves_cache->length_tex);
+  }
+  DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &curves_cache->final[subdiv].strands_res, 1);
+  DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res);
+  DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape);
+  DRW_shgroup_uniform_mat4_copy(shgrp, "hairDupliMatrix", object->obmat);
+  DRW_shgroup_uniform_float_copy(shgrp, "hairRadRoot", hair_rad_root);
+  DRW_shgroup_uniform_float_copy(shgrp, "hairRadTip", hair_rad_tip);
+  DRW_shgroup_uniform_bool_copy(shgrp, "hairCloseTip", hair_close_tip);
+  /* TODO(fclem): Until we have a better way to cull the curves and render with orco, bypass
+   * culling test. */
+  GPUBatch *geom = curves_cache->final[subdiv].proc_hairs[thickness_res - 1];
+  DRW_shgroup_call_no_cull(shgrp, geom, object);
+
+  return shgrp;
+}
+
+void DRW_curves_update()
+{
+  /* Update legacy hair too, to avoid verbosity in callers. */
+  DRW_hair_update();
+
+#ifndef USE_TRANSFORM_FEEDBACK
+  /**
+   * Workaround to transform feedback not working on mac.
+   * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+   *
+   * So instead of using transform feedback we render to a texture,
+   * read back the result to system memory and re-upload as VBO data.
+   * It is really not ideal performance wise, but it is the simplest
+   * and the most local workaround that still uses the power of the GPU.
+   */
+
+  if (g_tf_calls == nullptr) {
+    return;
+  }
+
+  /* Search ideal buffer size. */
+  uint max_size = 0;
+  for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+    max_size = max_ii(max_size, pr_call->vert_len);
+  }
+
+  /* Create target Texture / Frame-buffer */
+  /* Don't use max size as it can be really heavy and fail.
+   * Do chunks of maximum 2048 * 2048 hair points. */
+  int width = 2048;
+  int height = min_ii(width, 1 + max_size / width);
+  GPUTexture *tex = DRW_texture_pool_query_2d(
+      width, height, GPU_RGBA32F, (void *)DRW_curves_update);
+  g_tf_target_height = height;
+  g_tf_target_width = width;
+
+  GPUFrameBuffer *fb = nullptr;
+  GPU_framebuffer_ensure_config(&fb,
+                                {
+                                    GPU_ATTACHMENT_NONE,
+                                    GPU_ATTACHMENT_TEXTURE(tex),
+                                });
+
+  float *data = MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
+
+  GPU_framebuffer_bind(fb);
+  while (g_tf_calls != nullptr) {
+    CurvesEvalCall *pr_call = g_tf_calls;
+    g_tf_calls = g_tf_calls->next;
+
+    g_tf_id_offset = 0;
+    while (pr_call->vert_len > 0) {
+      int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+      DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+      /* Readback result to main memory. */
+      GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+      /* Upload back to VBO. */
+      GPU_vertbuf_use(pr_call->vbo);
+      GPU_vertbuf_update_sub(pr_call->vbo,
+                             sizeof(float[4]) * g_tf_id_offset,
+                             sizeof(float[4]) * max_read_px_len,
+                             data);
+
+      g_tf_id_offset += max_read_px_len;
+      pr_call->vert_len -= max_read_px_len;
+    }
+
+    MEM_freeN(pr_call);
+  }
+
+  MEM_freeN(data);
+  GPU_framebuffer_free(fb);
+#else
+  /* Just render the pass when using compute shaders or transform feedback. */
+  DRW_draw_pass(g_tf_pass);
+  if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+#endif
+}
+
+void DRW_curves_free()
+{
+  DRW_hair_free();
+
+  GPU_VERTBUF_DISCARD_SAFE(g_dummy_vbo);
+  DRW_TEXTURE_FREE_SAFE(g_dummy_texture);
+}
author	Hans Goudey <h.goudey@me.com>	2022-04-22 18:43:48 +0300
committer	Hans Goudey <h.goudey@me.com>	2022-04-22 18:44:01 +0300
commit	5b87862ddca2cb1b9351a5e250e1743b7245f38b (patch)
tree	53d6e591709d50bcf77ac444f2cb10c12c1874e9 /source/blender/draw/intern/draw_curves.cc
parent	8f05a547d539feb4a8da35ee15239fa46ff38083 (diff)