Paint Tile streaming.

author: Jeroen Bakker <jeroen@blender.org> 2022-10-11 14:45:19 +0300
committer: Jeroen Bakker <jeroen@blender.org> 2022-10-11 14:45:19 +0300
commit: 82d9617dab040400a13f142949b55c55a8491ce9 (patch)
tree: c29cd1e51b6a17ea1b7893e410a22d902edfaa63
parent: d6c1b15527b6f938787b50f8cfadc9d9f82a4ab1 (diff)
11 files changed, 392 insertions, 65 deletions
diff --git a/source/blender/blenkernel/BKE_pbvh_pixels.hh b/source/blender/blenkernel/BKE_pbvh_pixels.hh
index 282aebcbcdc..bff6bee41e4 100644
--- a/source/blender/blenkernel/BKE_pbvh_pixels.hh
+++ b/source/blender/blenkernel/BKE_pbvh_pixels.hh
@@ -21,6 +21,10 @@
 
 namespace blender::bke::pbvh::pixels {
 
+/* During GPU painting the texture is spliced into sub-tiles. This constant contains the size of
+ * sub-tiles (width and height). */
+const int32_t TEXTURE_STREAMING_TILE_SIZE = 1024;
+
 /**
  * Data shared between pixels that belong to the same triangle.
  *
@@ -101,11 +105,15 @@ struct UDIMTilePixels {
 
   Vector<PackedPixelRow> pixel_rows;
   int64_t gpu_buffer_offset;
+  /* Region of the tile that can be painted on by this node. Size of a subtile is determined by  */
+  /* TODO: use list of sub_tile_ids to not overcommit texture usage. */
+  rcti gpu_sub_tiles;
 
   UDIMTilePixels()
   {
     flags.dirty = false;
     BLI_rcti_init_minmax(&dirty_region);
+    BLI_rcti_init_minmax(&gpu_sub_tiles);
   }
 
   void mark_dirty(const PackedPixelRow &pixel_row)
@@ -121,6 +129,8 @@ struct UDIMTilePixels {
     BLI_rcti_init_minmax(&dirty_region);
     flags.dirty = false;
   }
+
+  void init_gpu_sub_tiles();
 };
 
 struct UDIMTileUndo {
@@ -215,6 +225,7 @@ struct NodeData {
     triangles.ensure_gpu_buffer();
     if (gpu_buffers.pixels == nullptr) {
       build_pixels_gpu_buffer();
+      init_gpu_sub_tiles();
     }
   }
 
@@ -226,6 +237,7 @@ struct NodeData {
 
  private:
   void build_pixels_gpu_buffer();
+  void init_gpu_sub_tiles();
 };
 
 NodeData &BKE_pbvh_pixels_node_data_get(PBVHNode &node);
diff --git a/source/blender/blenkernel/intern/paint.cc b/source/blender/blenkernel/intern/paint.cc
index f7f5f7e411a..642fd9fa00d 100644
--- a/source/blender/blenkernel/intern/paint.cc
+++ b/source/blender/blenkernel/intern/paint.cc
@@ -1393,6 +1393,13 @@ void BKE_sculptsession_free_vwpaint_data(SculptSession *ss)
   MEM_SAFE_FREE(gmap->poly_map_mem);
 }
 
+/* TODO: We should move the gpu batches to BKE. */
+void SCULPT_paint_image_sculpt_data_free(SculptSession *ss);
+static void bke_sculptsession_free_texture_paint_data(SculptSession *UNUSED(ss))
+{
+  // SCULPT_paint_image_sculpt_data_free(ss);
+}
+
 /**
  * Write out the sculpt dynamic-topology #BMesh to the #Mesh.
  */
@@ -1533,6 +1540,7 @@ void BKE_sculptsession_free(Object *ob)
     }
 
     BKE_sculptsession_free_vwpaint_data(ob->sculpt);
+    bke_sculptsession_free_texture_paint_data(ob->sculpt);
 
     MEM_SAFE_FREE(ss->last_paint_canvas_key);
 
diff --git a/source/blender/blenkernel/intern/pbvh_pixels.cc b/source/blender/blenkernel/intern/pbvh_pixels.cc
index db1a7514b15..a87a29019b3 100644
--- a/source/blender/blenkernel/intern/pbvh_pixels.cc
+++ b/source/blender/blenkernel/intern/pbvh_pixels.cc
@@ -77,6 +77,26 @@ void NodeData::build_pixels_gpu_buffer()
       elem_len * sizeof(PackedPixelRow), elements.data(), GPU_USAGE_STATIC, __func__);
 }
 
+void UDIMTilePixels::init_gpu_sub_tiles()
+{
+  BLI_rcti_init_minmax(&gpu_sub_tiles);
+  for (const PackedPixelRow &elements : pixel_rows) {
+    int2 subtile_from = int2(elements.start_image_coordinate / TEXTURE_STREAMING_TILE_SIZE);
+    int2 coord_to = int2(elements.start_image_coordinate) + int2(elements.num_pixels + 1, 1);
+    int2 subtile_to = int2(coord_to / TEXTURE_STREAMING_TILE_SIZE);
+
+    BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_from);
+    BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_to);
+  }
+}
+
+void NodeData::init_gpu_sub_tiles()
+{
+  for (UDIMTilePixels &tile : tiles) {
+    tile.init_gpu_sub_tiles();
+  }
+}
+
 /**
  * During debugging this check could be enabled.
  * It will write to each image pixel that is covered by the PBVH.
diff --git a/source/blender/editors/sculpt_paint/sculpt_intern.h b/source/blender/editors/sculpt_paint/sculpt_intern.h
index ac7a43e7fc2..b3b8a30f2aa 100644
--- a/source/blender/editors/sculpt_paint/sculpt_intern.h
+++ b/source/blender/editors/sculpt_paint/sculpt_intern.h
@@ -1770,6 +1770,7 @@ void SCULPT_paint_image_batches_flush(struct PaintModeSettings *paint_mode_setti
 void SCULPT_paint_image_batches_finalize(struct PaintModeSettings *paint_mode_settings,
                                          struct Sculpt *sd,
                                          struct Object *ob);
+void SCULPT_paint_image_sculpt_data_free(SculptSession *ss);
 
 /* Smear Brush. */
 void SCULPT_do_smear_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode);
diff --git a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
index 99353b934fd..a44b9141578 100644
--- a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
+++ b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
@@ -526,7 +526,6 @@ static void init_paint_brush_alpha(const Brush &brush, PaintBrushData &r_paint_b
   r_paint_brush.alpha = brush.alpha;
 }
 
-/* TODO: Currently only spherical is supported. */
 static void init_paint_brush_test(const SculptSession &ss, PaintBrushData &r_paint_brush)
 {
   r_paint_brush.test.symm_rot_mat_inv = ss.cache->symm_rot_mat_inv;
@@ -547,6 +546,237 @@ static void init_paint_brush(const SculptSession &ss,
   init_paint_brush_falloff(brush, r_paint_brush);
 }
 
+/**
+ * Tiles are split on the GPU in sub-tiles.
+ *
+ * Sub tiles are used to reduce the needed memory on the GPU.
+ * - Only tiles that are painted on are loaded in memory, painted on and merged back to the actual
+ * texture.
+ */
+
+template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
+  struct Info {
+    struct {
+      bool in_use : 1;
+      /* Does this sub tile needs to be updated (CPU->GPU transfer).*/
+      bool needs_update : 1;
+      bool should_be_removed : 1;
+    } flags;
+  };
+  const int32_t LayerIdUnused = -1;
+  const int32_t LayerIdMarkRemoval = -2;
+
+  Vector<PaintTileData> paint_tiles_;
+  Vector<Info> infos_;
+
+  std::array<int32_t, Depth> layer_lookup_;
+
+  GPUTexture *gpu_texture_ = nullptr;
+  GPUStorageBuf *tile_buf_ = nullptr;
+  int64_t tile_buf_size_ = 0;
+
+ public:
+  GPUSubTileTexture()
+  {
+    for (int i = 0; i < Depth; i++) {
+      layer_lookup_[i] = LayerIdUnused;
+    }
+  }
+  ~GPUSubTileTexture()
+  {
+    if (gpu_texture_) {
+      GPU_texture_free(gpu_texture_);
+      gpu_texture_ = nullptr;
+    }
+
+    if (tile_buf_) {
+      GPU_storagebuf_free(tile_buf_);
+      tile_buf_ = nullptr;
+    }
+  }
+
+  void reset_usage()
+  {
+    printf("%s\n", __func__);
+    for (Info &info : infos_) {
+      info.flags.in_use = false;
+    }
+  }
+
+  void mark_usage(TileNumber tile_number, int2 sub_tile_id)
+  {
+    for (int index : paint_tiles_.index_range()) {
+      PaintTileData &tile = paint_tiles_[index];
+      if (tile.tile_number == tile_number && tile.sub_tile_id == sub_tile_id) {
+        Info &info = infos_[index];
+        if (!info.flags.in_use) {
+          printf("%s: mark existing {tile:%d, sub_tile:%d,%d}\n",
+                 __func__,
+                 tile_number,
+                 UNPACK2(sub_tile_id));
+        }
+        info.flags.in_use = true;
+        return;
+      }
+    }
+
+    /* Tile not yet added, add a new one.*/
+    Info info;
+    info.flags.in_use = true;
+    info.flags.needs_update = true;
+    info.flags.should_be_removed = false;
+    infos_.append(info);
+
+    PaintTileData tile;
+    tile.tile_number = tile_number;
+    tile.sub_tile_id = sub_tile_id;
+    tile.layer_id = LayerIdUnused;
+    paint_tiles_.append(tile);
+
+    printf(
+        "%s: mark new {tile:%d, sub_tile:%d,%d}\n", __func__, tile_number, UNPACK2(sub_tile_id));
+  }
+
+  /** Remove all sub tiles that are currently flagged not to be used (flags.in_use = false). */
+  void remove_unused()
+  {
+    for (int i = 0; i < layer_lookup_.size(); i++) {
+      int index = layer_lookup_[i];
+      if (index == -1) {
+        continue;
+      }
+      infos_[index].flags.should_be_removed = false;
+      if (infos_[index].flags.in_use == false) {
+        infos_[index].flags.should_be_removed = true;
+        paint_tiles_[index].layer_id = LayerIdMarkRemoval;
+        printf("%s: remove sub tile at layer %d\n", __func__, i);
+        layer_lookup_[i] = -1;
+      }
+    }
+
+    infos_.remove_if([&](Info &info) { return info.flags.should_be_removed; });
+    paint_tiles_.remove_if(
+        [&](PaintTileData &tile) { return tile.layer_id == LayerIdMarkRemoval; });
+  }
+
+  void assign_layer_ids()
+  {
+    for (int64_t index : paint_tiles_.index_range()) {
+      PaintTileData &tile = paint_tiles_[index];
+
+      if (tile.layer_id != LayerIdUnused) {
+        continue;
+      }
+
+      tile.layer_id = first_empty_layer_id();
+      layer_lookup_[tile.layer_id] = index;
+      printf("%s: assign {tile:%d, sub_tile:%d,%d} to layer %d\n",
+             __func__,
+             tile.tile_number,
+             UNPACK2(tile.sub_tile_id),
+             tile.layer_id);
+    }
+  }
+
+  int first_empty_layer_id() const
+  {
+    for (int i = 0; i < Depth; i++) {
+      if (layer_lookup_[i] == LayerIdUnused) {
+        return i;
+      }
+    }
+
+    BLI_assert_unreachable();
+    return LayerIdUnused;
+  }
+
+  void ensure_gpu_texture()
+  {
+    if (gpu_texture_ != nullptr) {
+      return;
+    }
+    gpu_texture_ = GPU_texture_create_3d(
+        "GPUSubTileTexture", Size, Size, Depth, 1, GPU_RGBA16F, GPU_DATA_FLOAT, nullptr);
+  }
+
+  void update_gpu_texture(TileNumber tile_number, ImBuf &UNUSED(image_buffer))
+  {
+    BLI_assert(gpu_texture_);
+    float *buffer = nullptr;
+    for (int64_t index : infos_.index_range()) {
+      Info &info = infos_[index];
+      PaintTileData &tile = paint_tiles_[index];
+      if (!info.flags.needs_update) {
+        continue;
+      }
+
+      if (tile.tile_number != tile_number) {
+        continue;
+      }
+
+      if (buffer == nullptr) {
+        buffer = static_cast<float *>(MEM_callocN(Size * Size * 4 * sizeof(float), __func__));
+      }
+
+      /* TODO: Copy correct data from ImBuf.*/
+
+      GPU_texture_update_sub(
+          gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1);
+      info.flags.needs_update = false;
+    }
+
+    if (buffer) {
+      MEM_freeN(buffer);
+    }
+  }
+
+  GPUTexture *gpu_texture_get()
+  {
+    return gpu_texture_;
+  }
+
+  void ensure_tile_buf()
+  {
+    int64_t needed_size = paint_tiles_.capacity() * sizeof(PaintTileData);
+
+    /* Reuse previous buffer only when exact size, due to potentional read out of bound errors.*/
+    if (tile_buf_ && tile_buf_size_ == needed_size) {
+      return;
+    }
+
+    if (tile_buf_) {
+      GPU_storagebuf_free(tile_buf_);
+      tile_buf_ = nullptr;
+    }
+    tile_buf_ = GPU_storagebuf_create(needed_size);
+  }
+
+  void update_tile_buf()
+  {
+    BLI_assert(tile_buf_);
+    GPU_storagebuf_update(tile_buf_, paint_tiles_.data());
+  }
+
+  GPUStorageBuf *tile_buf_get()
+  {
+    BLI_assert(tile_buf_);
+    return tile_buf_;
+  }
+
+  int32_t paint_tiles_len()
+  {
+    return paint_tiles_.size();
+  }
+
+  void bind(GPUShader *shader)
+  {
+    GPU_texture_image_bind(gpu_texture_get(),
+                           GPU_shader_get_texture_binding(shader, "paint_tiles_img"));
+    GPU_storagebuf_bind(tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf"));
+    GPU_shader_uniform_1i(shader, "paint_tile_buf_len", paint_tiles_len());
+  }
+};
+
 struct GPUSculptPaintData {
   Vector<PaintStepData> steps;
   GPUStorageBuf *step_buf = nullptr;
@@ -554,7 +784,7 @@ struct GPUSculptPaintData {
   GPUStorageBuf *vert_coord_buf = nullptr;
   GPUUniformBuf *paint_brush_buf = nullptr;
 
-  GPUTexture *tile_texture = nullptr;
+  GPUSubTileTexture<TEXTURE_STREAMING_TILE_SIZE> tile_texture;
 
   ~GPUSculptPaintData()
   {
@@ -572,18 +802,13 @@ struct GPUSculptPaintData {
       GPU_storagebuf_free(step_buf);
       step_buf = nullptr;
     }
-
-    if (tile_texture) {
-      GPU_texture_free(tile_texture);
-      tile_texture = nullptr;
-    }
   }
 
   void update_step_buf()
   {
     int requested_size = sizeof(PaintStepData) * steps.size();
-    /* Reallocate buffer when it doesn't fit, or is to big to correct reading from uninitialized
-     * memory. */
+    /* Reallocate buffer when it doesn't fit, or is to big to correct reading from
+     * uninitialized memory. */
     const bool reallocate_buf = (requested_size > step_buf_alloc_size) ||
                                 (sizeof(PaintStepData) * steps.capacity() < step_buf_alloc_size);
 
@@ -623,24 +848,13 @@ struct GPUSculptPaintData {
 
     GPU_uniformbuf_update(paint_brush_buf, &paint_brush);
   }
-
-  void ensure_tile_texture(const int2 resolution)
-  {
-    if (tile_texture == nullptr || GPU_texture_width(tile_texture) != resolution.x ||
-        GPU_texture_height(tile_texture) != resolution.y) {
-      if (tile_texture) {
-        GPU_texture_free(tile_texture);
-        tile_texture = nullptr;
-      }
-      tile_texture = GPU_texture_create_2d(__func__, UNPACK2(resolution), 1, GPU_RGBA16F, nullptr);
-    }
-  }
 };
 
 static void ensure_gpu_buffers(TexturePaintingUserData &data)
 {
   SculptSession &ss = *data.ob->sculpt;
   if (!ss.mode.texture_paint.gpu_data) {
+    printf("%s: new gpu_data\n", __func__);
     ss.mode.texture_paint.gpu_data = MEM_new<GPUSculptPaintData>(__func__);
   }
 
@@ -649,6 +863,7 @@ static void ensure_gpu_buffers(TexturePaintingUserData &data)
   if (paint_data.steps.is_empty()) {
     PBVH *pbvh = ss.pbvh;
     BKE_pbvh_frame_selection_clear(pbvh);
+    paint_data.tile_texture.reset_usage();
   }
 
   for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) {
@@ -668,20 +883,14 @@ static BrushVariationFlags determine_shader_variation_flags(const Brush &brush)
   return result;
 }
 
-// TODO: Currently only working on a copy of the actual data. In most use cases this isn't needed
-// and can we paint directly on the target gpu target.
 static void gpu_painting_paint_step(TexturePaintingUserData &data,
                                     GPUSculptPaintData &batches,
                                     TileNumber tile_number,
-                                    ImBuf *image_buffer,
                                     int2 paint_step_range)
 {
   BrushVariationFlags variation_flags = determine_shader_variation_flags(*data.brush);
   GPUShader *shader = SCULPT_shader_paint_image_get(variation_flags);
 
-  batches.ensure_tile_texture(int2(image_buffer->x, image_buffer->y));
-  bool texture_needs_clearing = true;
-
   /* Dispatch all nodes that paint on the active tile. */
   for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) {
     NodeData &node_data = BKE_pbvh_pixels_node_data_get(*node);
@@ -690,16 +899,10 @@ static void gpu_painting_paint_step(TexturePaintingUserData &data,
         continue;
       }
 
-      /* Only clear the texture when it is used for the first time. */
-      if (texture_needs_clearing) {
-        // Copy from image buffer?
-        GPU_texture_clear(batches.tile_texture, GPU_DATA_FLOAT, float4(0.0f, 0.0f, 0.0f, 1.0f));
-        texture_needs_clearing = false;
-      }
-
       GPU_shader_bind(shader);
-      GPU_texture_image_bind(batches.tile_texture,
-                             GPU_shader_get_texture_binding(shader, "out_img"));
+
+      batches.tile_texture.bind(shader);
+
       GPU_storagebuf_bind(batches.step_buf, GPU_shader_get_ssbo(shader, "paint_step_buf"));
       GPU_shader_uniform_2iv(shader, "paint_step_range", paint_step_range);
       GPU_uniformbuf_bind(batches.paint_brush_buf,
@@ -724,17 +927,16 @@ static void gpu_painting_paint_step(TexturePaintingUserData &data,
   }
 }
 
-static void gpu_painting_image_merge(TexturePaintingUserData &UNUSED(data),
+static void gpu_painting_image_merge(GPUSculptPaintData &batches,
                                      Image &image,
                                      ImageUser &image_user,
-                                     ImBuf &image_buffer,
-                                     GPUTexture *paint_tex)
+                                     ImBuf &image_buffer)
 {
   GPUTexture *canvas_tex = BKE_image_get_gpu_texture(&image, &image_user, &image_buffer);
   GPUShader *shader = SCULPT_shader_paint_image_merge_get();
   GPU_shader_bind(shader);
-  GPU_texture_image_bind(paint_tex, GPU_shader_get_texture_binding(shader, "in_paint_img"));
-  GPU_texture_image_bind(canvas_tex, GPU_shader_get_texture_binding(shader, "out_img"));
+  batches.tile_texture.bind(shader);
+  GPU_texture_image_bind(canvas_tex, GPU_shader_get_texture_binding(shader, "texture_img"));
   GPU_compute_dispatch(shader, image_buffer.x, image_buffer.y, 1);
 }
 
@@ -768,6 +970,28 @@ static void dispatch_gpu_painting(TexturePaintingUserData &data)
   batches.steps.append(paint_step);
 }
 
+/* This should be done based on the frame_selection nodes, otherwise we might be over
+ * committing.
+ */
+static void paint_tiles_mark_used(TexturePaintingUserData &data)
+{
+  SculptSession &ss = *data.ob->sculpt;
+  GPUSculptPaintData &batches = *static_cast<GPUSculptPaintData *>(ss.mode.texture_paint.gpu_data);
+
+  for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) {
+    NodeData &node_data = BKE_pbvh_pixels_node_data_get(*node);
+    for (UDIMTilePixels &tile : node_data.tiles) {
+      for (int x = tile.gpu_sub_tiles.xmin; x <= tile.gpu_sub_tiles.xmax; x++) {
+        for (int y = tile.gpu_sub_tiles.ymin; y <= tile.gpu_sub_tiles.ymax; y++) {
+          int2 sub_tile_id(x, y);
+          batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id);
+        }
+      }
+    }
+  }
+}
+
+/** Mark all nodes that are used when drawing this frame. */
 static void update_frame_selection(TexturePaintingUserData &data)
 {
   for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) {
@@ -804,6 +1028,11 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data)
   batches.update_step_buf();
   batches.ensure_vert_coord_buf(ss);
   batches.ensure_paint_brush_buf(ss, *data.brush);
+  batches.tile_texture.ensure_gpu_texture();
+  batches.tile_texture.remove_unused();
+  batches.tile_texture.assign_layer_ids();
+  batches.tile_texture.ensure_tile_buf();
+  batches.tile_texture.update_tile_buf();
 
   Image &image = *data.image_data.image;
   ImageUser local_image_user = *data.image_data.image_user;
@@ -817,10 +1046,11 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data)
       continue;
     }
 
+    batches.tile_texture.update_gpu_texture(tile_number, *image_buffer);
+
     GPU_debug_group_begin("Paint tile");
-    gpu_painting_paint_step(data, batches, tile_number, image_buffer, paint_step_range);
-    gpu_painting_image_merge(
-        data, *data.image_data.image, local_image_user, *image_buffer, batches.tile_texture);
+    gpu_painting_paint_step(data, batches, tile_number, paint_step_range);
+    gpu_painting_image_merge(batches, *data.image_data.image, local_image_user, *image_buffer);
     GPU_debug_group_end();
 
     BKE_image_release_ibuf(data.image_data.image, image_buffer, nullptr);
@@ -899,6 +1129,7 @@ void SCULPT_do_paint_brush_image(
     ensure_gpu_buffers(data);
     update_frame_selection(data);
     dispatch_gpu_painting(data);
+    paint_tiles_mark_used(data);
   }
   else {
     TaskParallelSettings settings;
@@ -932,11 +1163,11 @@ void SCULPT_paint_image_batches_flush(PaintModeSettings *paint_mode_settings,
   }
 
   if (ImageData::init_active_image(ob, &data.image_data, paint_mode_settings)) {
-    TIMEIT_START(paint_image_gpu);
+    // TIMEIT_START(paint_image_gpu);
     GPU_debug_group_begin("SCULPT_paint_brush");
     dispatch_gpu_batches(data);
     GPU_debug_group_end();
-    TIMEIT_END(paint_image_gpu);
+    // TIMEIT_END(paint_image_gpu);
   }
 
   MEM_freeN(data.nodes);
@@ -944,7 +1175,7 @@ void SCULPT_paint_image_batches_flush(PaintModeSettings *paint_mode_settings,
 
 void SCULPT_paint_image_batches_finalize(PaintModeSettings *UNUSED(paint_mode_settings),
                                          Sculpt *UNUSED(sd),
-                                         Object *ob)
+                                         Object *UNUSED(ob))
 {
   if (!SCULPT_use_image_paint_compute()) {
     return;
@@ -953,9 +1184,16 @@ void SCULPT_paint_image_batches_finalize(PaintModeSettings *UNUSED(paint_mode_se
   // TODO(jbakker): record undo steps.
   // TODO(jbakker): download results and update the image data-block.
 
-  SculptSession &ss = *ob->sculpt;
-  GPUSculptPaintData *batches = static_cast<GPUSculptPaintData *>(ss.mode.texture_paint.gpu_data);
-  MEM_delete(batches);
-  ss.mode.texture_paint.gpu_data = nullptr;
+  /* TODO: move this to sculpt tool switch and sculpt session free. */
+  // SCULPT_paint_image_sculpt_data_free(ob->sculpt);
+}
+
+void SCULPT_paint_image_sculpt_data_free(SculptSession *ss)
+{
+  GPUSculptPaintData *batches = static_cast<GPUSculptPaintData *>(ss->mode.texture_paint.gpu_data);
+  if (batches) {
+    MEM_delete(batches);
+    ss->mode.texture_paint.gpu_data = nullptr;
+  }
 }
 }
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 9b981cddf48..904209e7a98 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -481,6 +481,7 @@ set(GLSL_SRC
   shaders/sculpt_paint/sculpt_paint_image_comp.glsl
   shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl
   shaders/sculpt_paint/sculpt_paint_image_lib.glsl
+  shaders/sculpt_paint/sculpt_paint_tile_lib.glsl
 
   GPU_shader_shared_utils.h
 )
diff --git a/source/blender/gpu/GPU_sculpt_shader_shared.h b/source/blender/gpu/GPU_sculpt_shader_shared.h
index 2b1305ad190..a9464c57760 100644
--- a/source/blender/gpu/GPU_sculpt_shader_shared.h
+++ b/source/blender/gpu/GPU_sculpt_shader_shared.h
@@ -67,3 +67,10 @@ struct PaintStepData {
   int _pad0[1];
 };
 BLI_STATIC_ASSERT_ALIGN(PaintStepData, 16);
+
+struct PaintTileData {
+  int tile_number;
+  int layer_id;
+  int2 sub_tile_id;
+};
+BLI_STATIC_ASSERT_ALIGN(PaintTileData, 16);
diff --git a/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh b/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh
index e42666ac578..1d7ca8dffb5 100644
--- a/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh
+++ b/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh
@@ -7,25 +7,33 @@
 
 #include "gpu_shader_create_info.hh"
 
+GPU_SHADER_CREATE_INFO(sculpt_paint_sub_tiles)
+    .storage_buf(0, Qualifier::READ, "PaintTileData", "paint_tile_buf[]")
+    .push_constant(Type::INT, "paint_tile_buf_len")
+    .define("SUB_TILE_SIZE", "1024");
+
 GPU_SHADER_CREATE_INFO(sculpt_paint_image_compute)
     .local_group_size(1, 1, 1)
-    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_img")
-    .storage_buf(0, Qualifier::READ, "PackedPixelRow", "pixel_row_buf[]")
-    .storage_buf(1, Qualifier::READ, "TrianglePaintInput", "paint_input[]")
-    .storage_buf(2, Qualifier::READ, "vec3", "vert_coord_buf[]")
-    .storage_buf(3, Qualifier::READ, "PaintStepData", "paint_step_buf[]")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_3D, "paint_tiles_img")
+    .storage_buf(1, Qualifier::READ, "PackedPixelRow", "pixel_row_buf[]")
+    .storage_buf(2, Qualifier::READ, "TrianglePaintInput", "paint_input[]")
+    .storage_buf(3, Qualifier::READ, "vec3", "vert_coord_buf[]")
+    .storage_buf(4, Qualifier::READ, "PaintStepData", "paint_step_buf[]")
     .uniform_buf(0, "PaintBrushData", "paint_brush_buf")
     .push_constant(Type::INT, "pixel_row_offset")
     .push_constant(Type::IVEC2, "paint_step_range")
+    .push_constant(Type::INT, "udim_tile_number")
     .compute_source("sculpt_paint_image_comp.glsl")
+    .additional_info("sculpt_paint_sub_tiles")
     .typedef_source("GPU_sculpt_shader_shared.h");
 
 GPU_SHADER_CREATE_INFO(sculpt_paint_image_merge_compute)
     .local_group_size(1, 1, 1)
-    .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_paint_img")
-    .image(1, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_img")
+    .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_3D, "paint_tiles_img")
+    .image(1, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "texture_img")
     .compute_source("sculpt_paint_image_merge_comp.glsl")
     .typedef_source("GPU_sculpt_shader_shared.h")
+    .additional_info("sculpt_paint_sub_tiles")
     .do_static_compilation(true);
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl
index 2f3a4e3e1c8..86c90de2d90 100644
--- a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl
+++ b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl
@@ -1,4 +1,5 @@
 #pragma BLENDER_REQUIRE(sculpt_paint_image_lib.glsl)
+#pragma BLENDER_REQUIRE(sculpt_paint_tile_lib.glsl)
 
 bool SCULPT_brush_test(PaintBrushTestData test_data,
                        PaintStepData step_data,
@@ -19,7 +20,9 @@ void main()
 {
   PackedPixelRow row = pixel_row_buf[gl_GlobalInvocationID.x + pixel_row_offset];
   TrianglePaintInput triangle = paint_input[PIXEL_ROW_PRIM_INDEX(row)];
-  ivec2 image_coord = PIXEL_ROW_START_IMAGE_COORD(row);
+  PaintTileData paint_tile;
+  ivec3 image_coord = paint_tile_coord_from_udim(
+      1001, PIXEL_ROW_START_IMAGE_COORD(row), paint_tile);
 
   uint row_len = PIXEL_ROW_LEN(row);
 
@@ -43,7 +46,7 @@ void main()
       bool test_result = SCULPT_brush_test(paint_brush_buf.test, step_data, pos, distance);
       if (test_result) {
         if (!color_read) {
-          color = imageLoad(out_img, image_coord);
+          color = imageLoad(paint_tiles_img, image_coord);
           color_read = true;
         }
         // TODO: blend with color...
@@ -56,7 +59,7 @@ void main()
       }
     }
     if (color_read) {
-      imageStore(out_img, image_coord, color);
+      imageStore(paint_tiles_img, image_coord, color);
     }
     image_coord.x += 1;
     pos += delta;
diff --git a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl
index 5248aa87a80..406d83633b0 100644
--- a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl
+++ b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl
@@ -1,8 +1,11 @@
+#pragma BLENDER_REQUIRE(sculpt_paint_tile_lib.glsl)
+
 void main()
 {
-  ivec2 coord_in = ivec2(gl_GlobalInvocationID.xy);
-  ivec2 coord_out = coord_in;
-  vec4 paint_color = imageLoad(in_paint_img, coord_in);
+  ivec2 coord_out = ivec2(gl_GlobalInvocationID.xy);
+  PaintTileData paint_tile;
+  ivec3 coord_in = paint_tile_coord_from_udim(1001, coord_out, paint_tile);
+  vec4 paint_color = imageLoad(paint_tiles_img, coord_in);
   paint_color.a = 1.0;
-  imageStore(out_img, coord_out, paint_color);
+  imageStore(texture_img, coord_out, paint_color);
 }
 \ No newline at end of file
diff --git a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl
new file mode 100644
index 00000000000..1896535ec20
--- /dev/null
+++ b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl
@@ -0,0 +1,26 @@
+ivec2 paint_tile_coord_to_sub_tile_id(ivec2 coord)
+{
+  return coord / ivec2(SUB_TILE_SIZE);
+}
+
+bool paint_tile_search(int tile_number, ivec2 coord, out PaintTileData r_paint_tile)
+{
+  int2 sub_tile_id = paint_tile_coord_to_sub_tile_id(coord);
+  for (int i = 0; i < paint_tile_buf_len; i++) {
+    if (paint_tile_buf[i].tile_number == tile_number &&
+        paint_tile_buf[i].sub_tile_id == sub_tile_id) {
+      r_paint_tile = paint_tile_buf[i];
+      return true;
+    }
+  }
+  return false;
+}
+
+ivec3 paint_tile_coord_from_udim(int tile_number, ivec2 coord, out PaintTileData r_paint_tile)
+{
+  if (paint_tile_search(tile_number, coord, r_paint_tile)) {
+    return ivec3(coord - r_paint_tile.sub_tile_id * ivec2(SUB_TILE_SIZE), r_paint_tile.layer_id);
+  }
+
+  return ivec3(0);
+}
author	Jeroen Bakker <jeroen@blender.org>	2022-10-11 14:45:19 +0300
committer	Jeroen Bakker <jeroen@blender.org>	2022-10-11 14:45:19 +0300
commit	82d9617dab040400a13f142949b55c55a8491ce9 (patch)
tree	c29cd1e51b6a17ea1b7893e410a22d902edfaa63
parent	d6c1b15527b6f938787b50f8cfadc9d9f82a4ab1 (diff)