diff options
Diffstat (limited to 'source')
8 files changed, 156 insertions, 89 deletions
diff --git a/source/blender/blenkernel/BKE_pbvh_pixels.hh b/source/blender/blenkernel/BKE_pbvh_pixels.hh index bff6bee41e4..1eed7aab423 100644 --- a/source/blender/blenkernel/BKE_pbvh_pixels.hh +++ b/source/blender/blenkernel/BKE_pbvh_pixels.hh @@ -106,14 +106,12 @@ struct UDIMTilePixels { Vector<PackedPixelRow> pixel_rows; int64_t gpu_buffer_offset; /* Region of the tile that can be painted on by this node. Size of a subtile is determined by */ - /* TODO: use list of sub_tile_ids to not overcommit texture usage. */ - rcti gpu_sub_tiles; + Vector<int2> gpu_sub_tiles; UDIMTilePixels() { flags.dirty = false; BLI_rcti_init_minmax(&dirty_region); - BLI_rcti_init_minmax(&gpu_sub_tiles); } void mark_dirty(const PackedPixelRow &pixel_row) diff --git a/source/blender/blenkernel/intern/pbvh_pixels.cc b/source/blender/blenkernel/intern/pbvh_pixels.cc index a87a29019b3..38fae311d04 100644 --- a/source/blender/blenkernel/intern/pbvh_pixels.cc +++ b/source/blender/blenkernel/intern/pbvh_pixels.cc @@ -15,6 +15,8 @@ #include "BLI_math.h" #include "BLI_task.h" +#include "PIL_time_utildefines.h" + #include "BKE_image_wrappers.hh" #include "bmesh.h" @@ -79,19 +81,37 @@ void NodeData::build_pixels_gpu_buffer() void UDIMTilePixels::init_gpu_sub_tiles() { - BLI_rcti_init_minmax(&gpu_sub_tiles); + BLI_assert(gpu_sub_tiles.is_empty()); + const int max_sub_tiles = 16; + bool sub_tiles_hit[max_sub_tiles][max_sub_tiles]; + for (int x = 0; x < max_sub_tiles; x++) { + for (int y = 0; y < max_sub_tiles; y++) { + sub_tiles_hit[x][y] = false; + } + } + + int2 max_sub_tile_len(0, 0); for (const PackedPixelRow &elements : pixel_rows) { int2 subtile_from = int2(elements.start_image_coordinate / TEXTURE_STREAMING_TILE_SIZE); int2 coord_to = int2(elements.start_image_coordinate) + int2(elements.num_pixels + 1, 1); int2 subtile_to = int2(coord_to / TEXTURE_STREAMING_TILE_SIZE); + for (int x = subtile_from.x; x < subtile_to.x; x++) { + sub_tiles_hit[x][subtile_from.y] = true; + } + } - BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_from); - BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_to); + for (int x = 0; x < max_sub_tiles; x++) { + for (int y = 0; y < max_sub_tiles; y++) { + if (sub_tiles_hit[x][y]) { + gpu_sub_tiles.append(int2(x, y)); + } + } } } void NodeData::init_gpu_sub_tiles() { + printf("%s\n", __func__); for (UDIMTilePixels &tile : tiles) { tile.init_gpu_sub_tiles(); } diff --git a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc index ddcf91d76e4..88f4aca67d8 100644 --- a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc +++ b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc @@ -553,7 +553,7 @@ static void init_paint_brush(const SculptSession &ss, * - Only tiles that are painted on are loaded in memory, painted on and merged back to the actual * texture. */ -template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { +template<int32_t Size, int32_t Depth = 16> class GPUSubTileTexture { struct Info { struct { bool in_use_stroke : 1; @@ -572,7 +572,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { std::array<int32_t, Depth> layer_lookup_; GPUTexture *gpu_texture_ = nullptr; - GPUStorageBuf *tile_buf_ = nullptr; + GPUStorageBuf *paint_tile_buf_ = nullptr; int64_t tile_buf_size_ = 0; public: @@ -593,17 +593,9 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { gpu_texture_ = nullptr; } - if (tile_buf_) { - GPU_storagebuf_free(tile_buf_); - tile_buf_ = nullptr; - } - } - - void reset_usage() - { - printf("%s\n", __func__); - for (Info &info : infos_) { - info.flags.in_use = false; + if (paint_tile_buf_) { + GPU_storagebuf_free(paint_tile_buf_); + paint_tile_buf_ = nullptr; } } @@ -766,6 +758,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { { BLI_assert(gpu_texture_); float *buffer = nullptr; + bool tiles_updated = false; for (int64_t index : infos_.index_range()) { Info &info = infos_[index]; PaintTileData &tile = paint_tiles_[index]; @@ -781,11 +774,22 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { buffer = static_cast<float *>(MEM_callocN(Size * Size * 4 * sizeof(float), __func__)); } + printf("%s: initializing tile {tile:%d, sub_tile:%d,%d, layer_id:%d}\n", + __func__, + tile.tile_number, + UNPACK2(tile.sub_tile_id), + tile.layer_id); + /* TODO: Copy correct data from ImBuf.*/ - // GPU_texture_update_sub( - // gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1); + GPU_texture_update_sub( + gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1); info.flags.needs_update = false; + tiles_updated = true; + } + + if (tiles_updated) { + GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE); } if (buffer) { @@ -798,32 +802,42 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { return gpu_texture_; } - void ensure_tile_buf() + void ensure_paint_tile_buf() { int64_t needed_size = paint_tiles_.capacity() * sizeof(PaintTileData); /* Reuse previous buffer only when exact size, due to potentional read out of bound errors.*/ - if (tile_buf_ && tile_buf_size_ == needed_size) { + if (paint_tile_buf_ && tile_buf_size_ == needed_size) { return; } - if (tile_buf_) { - GPU_storagebuf_free(tile_buf_); - tile_buf_ = nullptr; + if (paint_tile_buf_) { + GPU_storagebuf_free(paint_tile_buf_); + paint_tile_buf_ = nullptr; + } + paint_tile_buf_ = GPU_storagebuf_create(needed_size); + } + + void update_paint_tile_buf() + { + BLI_assert(paint_tile_buf_); + for (PaintTileData &tile : paint_tiles_) { + tile.in_use_frame = false; } - tile_buf_ = GPU_storagebuf_create(needed_size); + GPU_storagebuf_update(paint_tile_buf_, paint_tiles_.data()); } - void update_tile_buf() + void read_back_paint_tile_buf() { - BLI_assert(tile_buf_); - GPU_storagebuf_update(tile_buf_, paint_tiles_.data()); + BLI_assert(paint_tile_buf_); + // GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + GPU_storagebuf_read(paint_tile_buf_, paint_tiles_.data()); } - GPUStorageBuf *tile_buf_get() + GPUStorageBuf *paint_tile_buf_get() { - BLI_assert(tile_buf_); - return tile_buf_; + BLI_assert(paint_tile_buf_); + return paint_tile_buf_; } int32_t paint_tiles_len() @@ -835,7 +849,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { { GPU_texture_image_bind(gpu_texture_get(), GPU_shader_get_texture_binding(shader, "paint_tiles_img")); - GPU_storagebuf_bind(tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf")); + GPU_storagebuf_bind(paint_tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf")); GPU_shader_uniform_1i(shader, "paint_tile_buf_len", paint_tiles_len()); } @@ -847,6 +861,11 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture { if (!info.flags.in_use_frame) { continue; } + /* + PaintTileData &paint_tile = paint_tiles_[index]; + if (!paint_tile.in_use_frame) { + continue; + }*/ predicate(paint_tiles_[index]); } } @@ -1029,19 +1048,15 @@ static void gpu_painting_image_merge(GPUSculptPaintData &batches, ImageUser &image_user, ImBuf &image_buffer) { + GPU_memory_barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); GPUTexture *canvas_tex = BKE_image_get_gpu_texture(&image, &image_user, &image_buffer); GPUShader *shader = SCULPT_shader_paint_image_merge_get(); GPU_shader_bind(shader); batches.tile_texture.bind(shader); GPU_texture_image_bind(canvas_tex, GPU_shader_get_texture_binding(shader, "texture_img")); batches.tile_texture.foreach_in_frame([shader](PaintTileData &paint_tile) { - printf("%s: merging tile stored on layer %d {tile:%d sub_tile:%d,%d} \n", - __func__, - paint_tile.layer_id, - paint_tile.tile_number, - UNPACK2(paint_tile.sub_tile_id)); GPU_shader_uniform_1i(shader, "layer_id", paint_tile.layer_id); - GPU_compute_dispatch(shader, TEXTURE_STREAMING_TILE_SIZE, TEXTURE_STREAMING_TILE_SIZE, 1); + GPU_compute_dispatch(shader, TEXTURE_STREAMING_TILE_SIZE / 32, TEXTURE_STREAMING_TILE_SIZE, 1); }); } @@ -1064,7 +1079,7 @@ static void init_paint_step(const SculptSession &ss, } } -static void dispatch_gpu_painting(TexturePaintingUserData &data) +static void add_paint_step(TexturePaintingUserData &data) { SculptSession &ss = *data.ob->sculpt; @@ -1073,6 +1088,7 @@ static void dispatch_gpu_painting(TexturePaintingUserData &data) PaintStepData paint_step; init_paint_step(ss, *data.brush, paint_step); batches.steps.append(paint_step); + PIL_sleep_ms(1); } /* This should be done based on the frame_selection nodes, otherwise we might be over @@ -1086,11 +1102,8 @@ static void paint_tiles_mark_used(TexturePaintingUserData &data) for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) { NodeData &node_data = BKE_pbvh_pixels_node_data_get(*node); for (UDIMTilePixels &tile : node_data.tiles) { - for (int x = tile.gpu_sub_tiles.xmin; x <= tile.gpu_sub_tiles.xmax; x++) { - for (int y = tile.gpu_sub_tiles.ymin; y <= tile.gpu_sub_tiles.ymax; y++) { - int2 sub_tile_id(x, y); - batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id); - } + for (int2 &sub_tile_id : tile.gpu_sub_tiles) { + batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id); } } } @@ -1120,7 +1133,7 @@ static TileNumbers collect_active_tile_numbers(const TexturePaintingUserData &da return result; } -static void dispatch_gpu_batches(TexturePaintingUserData &data) +static void flush_gpu_batches(TexturePaintingUserData &data) { SculptSession &ss = *data.ob->sculpt; if (!ss.mode.texture_paint.gpu_data) { @@ -1129,6 +1142,7 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data) GPUSculptPaintData &batches = *static_cast<GPUSculptPaintData *>(ss.mode.texture_paint.gpu_data); const int64_t steps_len = batches.steps.size(); + printf("%s: flushing %ld steps\n", __func__, steps_len); int2 paint_step_range(0, steps_len); batches.update_step_buf(); batches.ensure_vert_coord_buf(ss); @@ -1136,8 +1150,8 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data) batches.tile_texture.ensure_gpu_texture(); batches.tile_texture.remove_unused(); batches.tile_texture.assign_layer_ids(); - batches.tile_texture.ensure_tile_buf(); - batches.tile_texture.update_tile_buf(); + batches.tile_texture.ensure_paint_tile_buf(); + batches.tile_texture.update_paint_tile_buf(); Image &image = *data.image_data.image; ImageUser local_image_user = *data.image_data.image_user; @@ -1153,17 +1167,14 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data) TIMEIT_START(upload); batches.tile_texture.update_gpu_texture(tile_number, *image_buffer); - GPU_flush(); TIMEIT_END(upload); GPU_debug_group_begin("Paint tile"); TIMEIT_START(paint_step); gpu_painting_paint_step(data, batches, tile_number, paint_step_range); - GPU_flush(); TIMEIT_END(paint_step); TIMEIT_START(merge); gpu_painting_image_merge(batches, *data.image_data.image, local_image_user, *image_buffer); - GPU_flush(); TIMEIT_END(merge); GPU_debug_group_end(); @@ -1185,6 +1196,17 @@ static void gpu_frame_end(TexturePaintingUserData &data) batches.tile_texture.reset_usage_frame(); } +static bool has_unflushed_batches(Object *ob) +{ + SculptSession &ss = *ob->sculpt; + if (!ss.mode.texture_paint.gpu_data) { + return false; + } + + GPUSculptPaintData &batches = *static_cast<GPUSculptPaintData *>(ss.mode.texture_paint.gpu_data); + return batches.steps.size(); +} + /** \} */ } // namespace blender::ed::sculpt_paint::paint::image @@ -1253,7 +1275,7 @@ void SCULPT_do_paint_brush_image( if (SCULPT_use_image_paint_compute()) { ensure_gpu_buffers(data); update_frame_selection(data); - dispatch_gpu_painting(data); + add_paint_step(data); paint_tiles_mark_used(data); } else { @@ -1278,6 +1300,10 @@ void SCULPT_paint_image_batches_flush(PaintModeSettings *paint_mode_settings, return; } + if (!has_unflushed_batches(ob)) { + return; + } + Brush *brush = BKE_paint_brush(&sd->paint); TexturePaintingUserData data = {nullptr}; data.ob = ob; @@ -1290,7 +1316,7 @@ void SCULPT_paint_image_batches_flush(PaintModeSettings *paint_mode_settings, if (ImageData::init_active_image(ob, &data.image_data, paint_mode_settings)) { TIMEIT_START(paint_image_gpu); GPU_debug_group_begin("SCULPT_paint_brush"); - dispatch_gpu_batches(data); + flush_gpu_batches(data); gpu_frame_end(data); GPU_debug_group_end(); TIMEIT_END(paint_image_gpu); diff --git a/source/blender/gpu/GPU_sculpt_shader_shared.h b/source/blender/gpu/GPU_sculpt_shader_shared.h index a9464c57760..8734ffa93c0 100644 --- a/source/blender/gpu/GPU_sculpt_shader_shared.h +++ b/source/blender/gpu/GPU_sculpt_shader_shared.h @@ -69,8 +69,11 @@ struct PaintStepData { BLI_STATIC_ASSERT_ALIGN(PaintStepData, 16); struct PaintTileData { + int2 sub_tile_id; int tile_number; int layer_id; - int2 sub_tile_id; + int index; + bool1 in_use_frame; + int _pad1[2]; }; BLI_STATIC_ASSERT_ALIGN(PaintTileData, 16); diff --git a/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh b/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh index 1051abc63e9..86003867f02 100644 --- a/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh +++ b/source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh @@ -8,7 +8,7 @@ #include "gpu_shader_create_info.hh" GPU_SHADER_CREATE_INFO(sculpt_paint_sub_tiles) - .storage_buf(0, Qualifier::READ, "PaintTileData", "paint_tile_buf[]") + .storage_buf(0, Qualifier::READ_WRITE, "PaintTileData", "paint_tile_buf[]") .push_constant(Type::INT, "paint_tile_buf_len") .define("SUB_TILE_SIZE", "1024"); @@ -28,7 +28,7 @@ GPU_SHADER_CREATE_INFO(sculpt_paint_image_compute) .typedef_source("GPU_sculpt_shader_shared.h"); GPU_SHADER_CREATE_INFO(sculpt_paint_image_merge_compute) - .local_group_size(1, 1, 1) + .local_group_size(32, 1, 1) .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_3D, "paint_tiles_img") .image(1, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "texture_img") .push_constant(Type::INT, "layer_id") diff --git a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl index 86c90de2d90..f4af1509233 100644 --- a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl +++ b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl @@ -20,9 +20,6 @@ void main() { PackedPixelRow row = pixel_row_buf[gl_GlobalInvocationID.x + pixel_row_offset]; TrianglePaintInput triangle = paint_input[PIXEL_ROW_PRIM_INDEX(row)]; - PaintTileData paint_tile; - ivec3 image_coord = paint_tile_coord_from_udim( - 1001, PIXEL_ROW_START_IMAGE_COORD(row), paint_tile); uint row_len = PIXEL_ROW_LEN(row); @@ -34,34 +31,42 @@ void main() vec3 delta; SCULPT_get_row_pos_and_delta(co1, co2, co3, triangle, row, pos, delta); - for (int x = 0; x < row_len; x++) { - /* TODO: Do clipping test. */ - vec4 color; - bool color_read = false; + int x = 0; + while (x < row_len) { + PaintTileData paint_tile; + ivec3 image_coord = paint_tile_coord_from_udim( + 1001, PIXEL_ROW_START_IMAGE_COORD(row) + int2(x, 0), paint_tile); + bool in_use = false; + for (; x < row_len && image_coord.x < SUB_TILE_SIZE; x++, image_coord.x++, pos += delta) { + /* TODO: Do clipping test. */ + vec4 color; + bool color_read = false; - for (int step_index = paint_step_range[0]; step_index < paint_step_range[1]; step_index++) { - PaintStepData step_data = paint_step_buf[step_index]; - - float distance; - bool test_result = SCULPT_brush_test(paint_brush_buf.test, step_data, pos, distance); - if (test_result) { - if (!color_read) { - color = imageLoad(paint_tiles_img, image_coord); - color_read = true; + for (int step_index = paint_step_range[0]; step_index < paint_step_range[1]; step_index++) { + PaintStepData step_data = paint_step_buf[step_index]; + float distance; + bool test_result = SCULPT_brush_test(paint_brush_buf.test, step_data, pos, distance); + if (test_result) { + if (!color_read) { + color = imageLoad(paint_tiles_img, image_coord); + color_read = true; + in_use = true; + } + // TODO: blend with color... + float factor = SCULPT_hardness_factor(distance, step_data.hardness, step_data.radius); + float curve_factor = SCULPT_curve_strength(factor, paint_brush_buf.falloff_shape); + vec4 final_paint_color = SCULPT_blend_color( + color, paint_brush_buf.color * curve_factor * step_data.strength); + final_paint_color *= paint_brush_buf.alpha; + color = SCULPT_blend_color(color, final_paint_color); } - // TODO: blend with color... - float factor = SCULPT_hardness_factor(distance, step_data.hardness, step_data.radius); - float curve_factor = SCULPT_curve_strength(factor, paint_brush_buf.falloff_shape); - vec4 final_paint_color = SCULPT_blend_color( - color, paint_brush_buf.color * curve_factor * step_data.strength); - final_paint_color *= paint_brush_buf.alpha; - color = SCULPT_blend_color(color, final_paint_color); + } + if (color_read) { + imageStore(paint_tiles_img, image_coord, color); } } - if (color_read) { - imageStore(paint_tiles_img, image_coord, color); + if (in_use) { + paint_tile_mark_used(paint_tile); } - image_coord.x += 1; - pos += delta; } }
\ No newline at end of file diff --git a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl index b3ff3ac3ff3..cec4546f783 100644 --- a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl +++ b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl @@ -4,6 +4,10 @@ void main() { PaintTileData paint_tile; paint_tile_get_layer(layer_id, paint_tile); + if (!paint_tile.in_use_frame) { + return; + } + ivec3 coord_in = ivec3(gl_GlobalInvocationID.xy, layer_id); vec4 paint_color = imageLoad(paint_tiles_img, coord_in); paint_color.a = 1.0; diff --git a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl index c45664dd97c..88931feecd1 100644 --- a/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl +++ b/source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl @@ -3,28 +3,39 @@ ivec2 paint_tile_coord_to_sub_tile_id(ivec2 coord) return coord / ivec2(SUB_TILE_SIZE); } -bool paint_tile_search(int tile_number, ivec2 coord, out PaintTileData r_paint_tile) +bool paint_tile_search(int tile_number, int2 sub_tile_id, out PaintTileData r_paint_tile) { - int2 sub_tile_id = paint_tile_coord_to_sub_tile_id(coord); for (int i = 0; i < paint_tile_buf_len; i++) { if (paint_tile_buf[i].tile_number == tile_number && paint_tile_buf[i].sub_tile_id == sub_tile_id) { r_paint_tile = paint_tile_buf[i]; + r_paint_tile.index = i; return true; } } return false; } +void paint_tile_mark_used(PaintTileData paint_tile) +{ + paint_tile_buf[paint_tile.index].in_use_frame = true; +} + void paint_tile_get_layer(int layer_id, out PaintTileData r_paint_tile) { r_paint_tile = paint_tile_buf[layer_id]; } -ivec3 paint_tile_coord_from_udim(int tile_number, ivec2 coord, out PaintTileData r_paint_tile) +ivec3 paint_tile_coord_from_paint_tile(ivec2 coord, PaintTileData paint_tile) { - if (paint_tile_search(tile_number, coord, r_paint_tile)) { - return ivec3(coord - r_paint_tile.sub_tile_id * ivec2(SUB_TILE_SIZE), r_paint_tile.layer_id); + return ivec3(coord - paint_tile.sub_tile_id * ivec2(SUB_TILE_SIZE), paint_tile.layer_id); +} + +ivec3 paint_tile_coord_from_udim(int tile_number, ivec2 coord, inout PaintTileData r_paint_tile) +{ + int2 sub_tile_id = paint_tile_coord_to_sub_tile_id(coord); + if (paint_tile_search(tile_number, sub_tile_id, r_paint_tile)) { + return paint_tile_coord_from_paint_tile(coord, r_paint_tile); } return ivec3(0); |