From 3d74a2337c679839265efa16b2bca2a9b7795a00 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:36 +0200 Subject: repack: fix trying to use preferred pack in alternates When doing a geometric repack with multi-pack-indices, then we ask git-multi-pack-index(1) to use the largest packfile as the preferred pack. It can happen though that the largest packfile is not part of the main object database, but instead part of an alternate object database. The result is that git-multi-pack-index(1) will not be able to find the preferred pack and print a warning. It then falls back to use the first packfile that the multi-pack-index shall reference. Fix this bug by only considering packfiles as preferred pack that are local. This is the right thing to do given that a multi-pack-index should never reference packfiles borrowed from an alternate. While at it, rename the function `get_largest_active_packfile()` to `get_preferred_pack()` to better document its intent. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'builtin/repack.c') diff --git a/builtin/repack.c b/builtin/repack.c index f649379531..63585ad046 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -444,8 +444,10 @@ static void split_pack_geometry(struct pack_geometry *geometry, int factor) geometry->split = split; } -static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry) +static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) { + uint32_t i; + if (!geometry) { /* * No geometry means either an all-into-one repack (in which @@ -460,7 +462,21 @@ static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry } if (geometry->split == geometry->pack_nr) return NULL; - return geometry->pack[geometry->pack_nr - 1]; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; } static void clear_pack_geometry(struct pack_geometry *geometry) @@ -587,7 +603,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *largest = get_largest_active_pack(geometry); + struct packed_git *preferred = get_preferred_pack(geometry); FILE *in; int ret; @@ -608,9 +624,9 @@ static int write_midx_included_packs(struct string_list *include, if (write_bitmaps) strvec_push(&cmd.args, "--bitmap"); - if (largest) + if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(largest)); + pack_basename(preferred)); if (refs_snapshot) strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); -- cgit v1.2.3 From 51861340f8d7f76a99e0d7265f4417b0a9a6871c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:40 +0200 Subject: repack: fix generating multi-pack-index with only non-local packs When writing the multi-pack-index with geometric repacking we will add all packfiles to the index that are part of the geometric sequence. This can potentially also include packfiles borrowed from an alternate object directory. But given that a multi-pack-index can only ever include packs that are part of the main object database this does not make much sense whatsoever. In the edge case where all packfiles are contained in the alternate object database and the local repository has none itself this bug can cause us to invoke git-multi-pack-index(1) with only non-local packfiles that it ultimately cannot find. This causes it to return an error and thus causes the geometric repack to fail. Fix the code to skip non-local packfiles. Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'builtin/repack.c') diff --git a/builtin/repack.c b/builtin/repack.c index 63585ad046..a591a4ddd6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -570,6 +570,17 @@ static void midx_included_packs(struct string_list *include, for (i = geometry->split; i < geometry->pack_nr; i++) { struct packed_git *p = geometry->pack[i]; + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + strbuf_addstr(&buf, pack_basename(p)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".idx"); -- cgit v1.2.3 From 932c16c04b5e41ee1c76d5640ec3ae67e1900c07 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:08 +0200 Subject: repack: honor `-l` when calculating pack geometry When the user passes `-l` to git-repack(1), then they essentially ask us to only repack objects part of the local object database while ignoring any packfiles part of an alternate object database. And we in fact honor this bit when doing a geometric repack as the resulting packfile will only ever contain local objects. What we're missing though is that we don't take locality of packfiles into account when computing whether the geometric sequence is intact or not. So even though we would only ever roll up local packfiles anyway, we could end up trying to repack because of non-local packfiles. This does not make much sense, and in the worst case it can cause us to try and do the geometric repack over and over again because we're never able to restore the geometric sequence. Fix this bug by honoring whether the user has passed `-l`. If so, we skip adding any non-local packfiles to the pack geometry. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'builtin/repack.c') diff --git a/builtin/repack.c b/builtin/repack.c index a591a4ddd6..165fe1b628 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -321,7 +321,8 @@ static int geometry_cmp(const void *va, const void *vb) } static void init_pack_geometry(struct pack_geometry **geometry_p, - struct string_list *existing_kept_packs) + struct string_list *existing_kept_packs, + const struct pack_objects_args *args) { struct packed_git *p; struct pack_geometry *geometry; @@ -331,6 +332,14 @@ static void init_pack_geometry(struct pack_geometry **geometry_p, geometry = *geometry_p; for (p = get_all_packs(the_repository); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + if (!pack_kept_objects) { /* * Any pack that has its pack_keep bit set will appear @@ -898,7 +907,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (geometric_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing_kept_packs); + init_pack_geometry(&geometry, &existing_kept_packs, &po_args); split_pack_geometry(geometry, geometric_factor); } -- cgit v1.2.3 From d85cd1877777aa92c73868b9e86516d4be04b4a0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:12 +0200 Subject: repack: disable writing bitmaps when doing a local repack In order to write a bitmap, we need to have full coverage of all objects that are about to be packed. In the traditional non-multi-pack-index world this meant we need to do a full repack of all objects into a single packfile. But in the new multi-pack-index world we can get away with writing bitmaps when we have multiple packfiles as long as the multi-pack-index covers all objects. This is not always the case though. When asked to perform a repack of local objects, only, then we cannot guarantee to have full coverage of all objects regardless of whether we do a full repack or a repack with a multi-pack-index. The end result is that writing the bitmap will fail in both worlds: $ git multi-pack-index write --stdin-packs --bitmap Signed-off-by: Junio C Hamano --- builtin/repack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'builtin/repack.c') diff --git a/builtin/repack.c b/builtin/repack.c index 165fe1b628..c26f06769b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -885,6 +885,18 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); + if (write_bitmaps && po_args.local && has_alt_odb(the_repository)) { + /* + * When asked to do a local repack, but we have + * packfiles that are inherited from an alternate, then + * we cannot guarantee that the multi-pack-index would + * have full coverage of all objects. We thus disable + * writing bitmaps in that case. + */ + warning(_("disabling bitmap writing, as some objects are not being packed")); + write_bitmaps = 0; + } + if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; -- cgit v1.2.3