diff options
author | Taylor Blau <me@ttaylorr.com> | 2022-11-12 01:42:30 +0300 |
---|---|---|
committer | Taylor Blau <me@ttaylorr.com> | 2022-11-12 01:42:30 +0300 |
commit | 13deb12372624f59af4423cfe9e02bf2e06b1c9e (patch) | |
tree | 4b26a6e54d5c892ea839472a4d5ae321df7c682a | |
parent | 7a892ef35910bcc4ebfe5e9d68aa4da140b43ecf (diff) | |
parent | 91badeba32d31d4dcc695a8888e5b697b4c3d90c (diff) |
Merge branch 'tb/repack-expire-to' into jch
"git repack" learns to send cruft objects out of the way into
packfiles outside the repository.
* tb/repack-expire-to:
builtin/repack.c: implement `--expire-to` for storing pruned objects
builtin/repack.c: write cruft packs to arbitrary locations
builtin/repack.c: pass "cruft_expiration" to `write_cruft_pack`
builtin/repack.c: pass "out" to `prepare_pack_objects`
-rw-r--r-- | Documentation/git-repack.txt | 6 | ||||
-rw-r--r-- | builtin/repack.c | 71 | ||||
-rwxr-xr-x | t/t7700-repack.sh | 121 |
3 files changed, 188 insertions, 10 deletions
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 0bf13893d81..4017157949e 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -74,6 +74,12 @@ to the new separate pack will be written. immediately instead of waiting for the next `git gc` invocation. Only useful with `--cruft -d`. +--expire-to=<dir>:: + Write a cruft pack containing pruned objects (if any) to the + directory `<dir>`. This option is useful for keeping a copy of + any pruned objects in a separate directory as a backup. Only + useful with `--cruft -d`. + -l:: Pass the `--local` option to 'git pack-objects'. See linkgit:git-pack-objects[1]. diff --git a/builtin/repack.c b/builtin/repack.c index 10e23f9ee1f..65eb1b8bd22 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -32,7 +32,6 @@ static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; static char *packdir, *packtmp_name, *packtmp; -static char *cruft_expiration; static const char *const git_repack_usage[] = { N_("git repack [<options>]"), @@ -150,7 +149,8 @@ static void remove_redundant_pack(const char *dir_name, const char *base_name) } static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args) + const struct pack_objects_args *args, + const char *out) { strvec_push(&cmd->args, "pack-objects"); if (args->window) @@ -173,7 +173,7 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--quiet"); if (delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, packtmp); + strvec_push(&cmd->args, out); cmd->git_cmd = 1; cmd->out = -1; } @@ -241,7 +241,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, FILE *out; struct strbuf line = STRBUF_INIT; - prepare_pack_objects(&cmd, args); + prepare_pack_objects(&cmd, args, packtmp); cmd.in = -1; /* @@ -657,7 +657,9 @@ static void remove_redundant_bitmaps(struct string_list *include, } static int write_cruft_pack(const struct pack_objects_args *args, + const char *destination, const char *pack_prefix, + const char *cruft_expiration, struct string_list *names, struct string_list *existing_packs, struct string_list *existing_kept_packs) @@ -667,8 +669,10 @@ static int write_cruft_pack(const struct pack_objects_args *args, struct string_list_item *item; FILE *in, *out; int ret; + const char *scratch; + int local = skip_prefix(destination, packdir, &scratch); - prepare_pack_objects(&cmd, args); + prepare_pack_objects(&cmd, args, destination); strvec_push(&cmd.args, "--cruft"); if (cruft_expiration) @@ -693,6 +697,10 @@ static int write_cruft_pack(const struct pack_objects_args *args, * By the time it is read here, it contains only the pack(s) * that were just written, which is exactly the set of packs we * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) @@ -710,9 +718,14 @@ static int write_cruft_pack(const struct pack_objects_args *args, if (line.len != the_hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); - - item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); + /* + * avoid putting packs written outside of the repository in the + * list of names + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = populate_pack_exts(line.buf); + } } fclose(out); @@ -744,6 +757,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) struct pack_objects_args cruft_po_args = {NULL}; int geometric_factor = 0; int write_midx = 0; + const char *cruft_expiration = NULL; + const char *expire_to = NULL; struct option builtin_repack_options[] = { OPT_BIT('a', NULL, &pack_everything, @@ -793,6 +808,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) N_("find a geometric progression with factor <N>")), OPT_BOOL('m', "write-midx", &write_midx, N_("write a multi-pack index of the resulting packs")), + OPT_STRING(0, "expire-to", &expire_to, N_("dir"), + N_("pack prefix to store a pack containing pruned objects")), OPT_END() }; @@ -858,7 +875,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) split_pack_geometry(geometry, geometric_factor); } - prepare_pack_objects(&cmd, &po_args); + prepare_pack_objects(&cmd, &po_args, packtmp); show_progress = !po_args.quiet && isatty(2); @@ -984,11 +1001,45 @@ int cmd_repack(int argc, const char **argv, const char *prefix) cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; - ret = write_cruft_pack(&cruft_po_args, pack_prefix, &names, + ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, + cruft_expiration, &names, &existing_nonkept_packs, &existing_kept_packs); if (ret) return ret; + + if (delete_redundant && expire_to) { + /* + * If `--expire-to` is given with `-d`, it's possible + * that we're about to prune some objects. With cruft + * packs, pruning is implicit: any objects from existing + * packs that weren't picked up by new packs are removed + * when their packs are deleted. + * + * Generate an additional cruft pack, with one twist: + * `names` now includes the name of the cruft pack + * written in the previous step. So the contents of + * _this_ cruft pack exclude everything contained in the + * existing cruft pack (that is, all of the unreachable + * objects which are no older than + * `--cruft-expiration`). + * + * To make this work, cruft_expiration must become NULL + * so that this cruft pack doesn't actually prune any + * objects. If it were non-NULL, this call would always + * generate an empty pack (since every object not in the + * cruft pack generated above will have an mtime older + * than the expiration). + */ + ret = write_cruft_pack(&cruft_po_args, expire_to, + pack_prefix, + NULL, + &names, + &existing_nonkept_packs, + &existing_kept_packs); + if (ret) + return ret; + } } string_list_sort(&names); diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 5be483bf887..c630e0d52da 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -543,4 +543,125 @@ test_expect_success '-n overrides repack.updateServerInfo=true' ' test_server_info_missing ' +test_expect_success '--expire-to stores pruned objects (now)' ' + git init expire-to-now && + ( + cd expire-to-now && + + git branch -M main && + + test_commit base && + + git checkout -b cruft && + test_commit --no-tag cruft && + + git rev-list --objects --no-object-names main..cruft >moved.raw && + sort moved.raw >moved.want && + + git rev-list --all --objects --no-object-names >expect.raw && + sort expect.raw >expect && + + git checkout main && + git branch -D cruft && + git reflog expire --all --expire=all && + + git init --bare expired.git && + git repack -d \ + --cruft --cruft-expiration="now" \ + --expire-to="expired.git/objects/pack/pack" && + + expired="$(ls expired.git/objects/pack/pack-*.idx)" && + test_path_is_file "${expired%.idx}.mtimes" && + + # Since the `--cruft-expiration` is "now", the effective + # behavior is to move _all_ unreachable objects out to + # the location in `--expire-to`. + git show-index <$expired >expired.raw && + cut -d" " -f2 expired.raw | sort >expired.objects && + git rev-list --all --objects --no-object-names \ + >remaining.objects && + + # ...in other words, the combined contents of this + # repository and expired.git should be the same as the + # set of objects we started with. + cat expired.objects remaining.objects | sort >actual && + test_cmp expect actual && + + # The "moved" objects (i.e., those in expired.git) + # should be the same as the cruft objects which were + # expired in the previous step. + test_cmp moved.want expired.objects + ) +' + +test_expect_success '--expire-to stores pruned objects (5.minutes.ago)' ' + git init expire-to-5.minutes.ago && + ( + cd expire-to-5.minutes.ago && + + git branch -M main && + + test_commit base && + + # Create two classes of unreachable objects, one which + # is older than 5 minutes (stale), and another which is + # newer (recent). + for kind in stale recent + do + git checkout -b $kind main && + test_commit --no-tag $kind || return 1 + done && + + git rev-list --objects --no-object-names main..stale >in && + stale="$(git pack-objects $objdir/pack/pack <in)" && + mtime="$(test-tool chmtime --get =-600 $objdir/pack/pack-$stale.pack)" && + + # expect holds the set of objects we expect to find in + # this repository after repacking + git rev-list --objects --no-object-names recent >expect.raw && + sort expect.raw >expect && + + # moved.want holds the set of objects we expect to find + # in expired.git + git rev-list --objects --no-object-names main..stale >out && + sort out >moved.want && + + git checkout main && + git branch -D stale recent && + git reflog expire --all --expire=all && + git prune-packed && + + git init --bare expired.git && + git repack -d \ + --cruft --cruft-expiration=5.minutes.ago \ + --expire-to="expired.git/objects/pack/pack" && + + # Some of the remaining objects in this repository are + # unreachable, so use `cat-file --batch-all-objects` + # instead of `rev-list` to get their names + git cat-file --batch-all-objects --batch-check="%(objectname)" \ + >remaining.objects && + sort remaining.objects >actual && + test_cmp expect actual && + + ( + cd expired.git && + + expired="$(ls objects/pack/pack-*.mtimes)" && + test-tool pack-mtimes $(basename $expired) >out && + cut -d" " -f1 out | sort >../moved.got && + + # Ensure that there are as many objects with the + # expected mtime as were moved to expired.git. + # + # In other words, ensure that the recorded + # mtimes of any moved objects was written + # correctly. + grep " $mtime$" out >matching && + test_line_count = $(wc -l <../moved.want) matching + ) && + test_cmp moved.want moved.got + ) +' + test_done |