From 40d18ff8c6b6977eaf8450dc4eeb866ece5298fc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 18 Dec 2019 12:25:39 +0100 Subject: pack-bitmap: introduce bitmap_walk_contains() We will use this helper function in a following commit to tell us if an object is packed. Signed-off-by: Jeff King Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- pack-bitmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'pack-bitmap.h') diff --git a/pack-bitmap.h b/pack-bitmap.h index 00de3ec8e4..5425767f0f 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -3,6 +3,7 @@ #include "ewah/ewok.h" #include "khash.h" +#include "pack.h" #include "pack-objects.h" struct commit; @@ -53,6 +54,8 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *, int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); +int bitmap_walk_contains(struct bitmap_index *, + struct bitmap *bitmap, const struct object_id *oid); /* * After a traversal has been performed by prepare_bitmap_walk(), this can be -- cgit v1.2.3 From bb514de356cfcfd314f7ac7ae1acfeede3fa4b1f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 18 Dec 2019 12:25:45 +0100 Subject: pack-objects: improve partial packfile reuse The old code to reuse deltas from an existing packfile just tried to dump a whole segment of the pack verbatim. That's faster than the traditional way of actually adding objects to the packing list, but it didn't kick in very often. This new code is really going for a middle ground: do _some_ per-object work, but way less than we'd traditionally do. The general strategy of the new code is to make a bitmap of objects from the packfile we'll include, and then iterate over it, writing out each object exactly as it is in our on-disk pack, but _not_ adding it to our packlist (which costs memory, and increases the search space for deltas). One complication is that if we're omitting some objects, we can't set a delta against a base that we're not sending. So we have to check each object in try_partial_reuse() to make sure we have its delta. About performance, in the worst case we might have interleaved objects that we are sending or not sending, and we'd have as many chunks as objects. But in practice we send big chunks. For instance, packing torvalds/linux on GitHub servers now reused 6.5M objects, but only needed ~50k chunks. Helped-by: Jonathan Tan Signed-off-by: Jeff King Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- pack-bitmap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'pack-bitmap.h') diff --git a/pack-bitmap.h b/pack-bitmap.h index 5425767f0f..91bdbfc837 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -50,7 +50,8 @@ void test_bitmap_walk(struct rev_info *revs); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs); int reuse_partial_packfile_from_bitmap(struct bitmap_index *, struct packed_git **packfile, - uint32_t *entries, off_t *up_to); + uint32_t *entries, + struct bitmap **reuse_out); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); -- cgit v1.2.3