diff options
author | Junio C Hamano <gitster@pobox.com> | 2024-01-13 03:09:56 +0300 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2024-01-13 03:09:56 +0300 |
commit | 0fea6b73f1771469cc423288fcd754d374865400 (patch) | |
tree | ead5e63ecb0371557bf920fdd0e150d87219877b /t | |
parent | 0ebbaa07d0c19b2ade6fed9660e10dcf834fc922 (diff) | |
parent | ba47d88795e12193e7b0fffc5130757a5517a5da (diff) |
Merge branch 'tb/multi-pack-verbatim-reuse'
Streaming spans of packfile data used to be done only from a
single, primary, pack in a repository with multiple packfiles. It
has been extended to allow reuse from other packfiles, too.
* tb/multi-pack-verbatim-reuse: (26 commits)
t/perf: add performance tests for multi-pack reuse
pack-bitmap: enable reuse from all bitmapped packs
pack-objects: allow setting `pack.allowPackReuse` to "single"
t/test-lib-functions.sh: implement `test_trace2_data` helper
pack-objects: add tracing for various packfile metrics
pack-bitmap: prepare to mark objects from multiple packs for reuse
pack-revindex: implement `midx_pair_to_pack_pos()`
pack-revindex: factor out `midx_key_to_pack_pos()` helper
midx: implement `midx_preferred_pack()`
git-compat-util.h: implement checked size_t to uint32_t conversion
pack-objects: include number of packs reused in output
pack-objects: prepare `write_reused_pack_verbatim()` for multi-pack reuse
pack-objects: prepare `write_reused_pack()` for multi-pack reuse
pack-objects: pass `bitmapped_pack`'s to pack-reuse functions
pack-objects: keep track of `pack_start` for each reuse pack
pack-objects: parameterize pack-reuse routines over a single pack
pack-bitmap: return multiple packs via `reuse_partial_packfile_from_bitmap()`
pack-bitmap: simplify `reuse_partial_packfile_from_bitmap()` signature
ewah: implement `bitmap_is_empty()`
pack-bitmap: pass `bitmapped_pack` struct to pack-reuse functions
...
Diffstat (limited to 't')
-rw-r--r-- | t/helper/test-read-midx.c | 41 | ||||
-rwxr-xr-x | t/perf/p5332-multi-pack-reuse.sh | 81 | ||||
-rwxr-xr-x | t/t5319-multi-pack-index.sh | 35 | ||||
-rwxr-xr-x | t/t5332-multi-pack-reuse.sh | 203 | ||||
-rwxr-xr-x | t/t6113-rev-list-bitmap-filters.sh | 2 | ||||
-rw-r--r-- | t/test-lib-functions.sh | 14 |
6 files changed, 368 insertions, 8 deletions
diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index e9a444ddba..4acae41bb9 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -6,6 +6,7 @@ #include "pack-bitmap.h" #include "packfile.h" #include "setup.h" +#include "gettext.h" static int read_midx_file(const char *object_dir, int show_objects) { @@ -79,7 +80,7 @@ static int read_midx_checksum(const char *object_dir) static int read_midx_preferred_pack(const char *object_dir) { struct multi_pack_index *midx = NULL; - struct bitmap_index *bitmap = NULL; + uint32_t preferred_pack; setup_git_directory(); @@ -87,23 +88,45 @@ static int read_midx_preferred_pack(const char *object_dir) if (!midx) return 1; - bitmap = prepare_bitmap_git(the_repository); - if (!bitmap) + if (midx_preferred_pack(midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); return 1; - if (!bitmap_is_midx(bitmap)) { - free_bitmap_index(bitmap); + } + + printf("%s\n", midx->pack_names[preferred_pack]); + return 0; +} + +static int read_midx_bitmapped_packs(const char *object_dir) +{ + struct multi_pack_index *midx = NULL; + struct bitmapped_pack pack; + uint32_t i; + + setup_git_directory(); + + midx = load_multi_pack_index(object_dir, 1); + if (!midx) return 1; + + for (i = 0; i < midx->num_packs; i++) { + if (nth_bitmapped_pack(the_repository, midx, &pack, i) < 0) + return 1; + + printf("%s\n", pack_basename(pack.p)); + printf(" bitmap_pos: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_pos); + printf(" bitmap_nr: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_nr); } - printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]); - free_bitmap_index(bitmap); + close_midx(midx); + return 0; } int cmd__read_midx(int argc, const char **argv) { if (!(argc == 2 || argc == 3)) - usage("read-midx [--show-objects|--checksum|--preferred-pack] <object-dir>"); + usage("read-midx [--show-objects|--checksum|--preferred-pack|--bitmap] <object-dir>"); if (!strcmp(argv[1], "--show-objects")) return read_midx_file(argv[2], 1); @@ -111,5 +134,7 @@ int cmd__read_midx(int argc, const char **argv) return read_midx_checksum(argv[2]); else if (!strcmp(argv[1], "--preferred-pack")) return read_midx_preferred_pack(argv[2]); + else if (!strcmp(argv[1], "--bitmap")) + return read_midx_bitmapped_packs(argv[2]); return read_midx_file(argv[1], 0); } diff --git a/t/perf/p5332-multi-pack-reuse.sh b/t/perf/p5332-multi-pack-reuse.sh new file mode 100755 index 0000000000..5c6c575d62 --- /dev/null +++ b/t/perf/p5332-multi-pack-reuse.sh @@ -0,0 +1,81 @@ +#!/bin/sh + +test_description='tests pack performance with multi-pack reuse' + +. ./perf-lib.sh +. "${TEST_DIRECTORY}/perf/lib-pack.sh" + +packdir=.git/objects/pack + +test_perf_large_repo + +find_pack () { + for idx in $packdir/pack-*.idx + do + if git show-index <$idx | grep -q "$1" + then + basename $idx + fi || return 1 + done +} + +repack_into_n_chunks () { + git repack -adk && + + test "$1" -eq 1 && return || + + find $packdir -type f | sort >packs.before && + + # partition the repository into $1 chunks of consecutive commits, and + # then create $1 packs with the objects reachable from each chunk + # (excluding any objects reachable from the previous chunks) + sz="$(($(git rev-list --count --all) / $1))" + for rev in $(git rev-list --all | awk "NR % $sz == 0" | tac) + do + pack="$(echo "$rev" | git pack-objects --revs \ + --honor-pack-keep --delta-base-offset $packdir/pack)" && + touch $packdir/pack-$pack.keep || return 1 + done + + # grab any remaining objects not packed by the previous step(s) + git pack-objects --revs --all --honor-pack-keep --delta-base-offset \ + $packdir/pack && + + find $packdir -type f | sort >packs.after && + + # and install the whole thing + for f in $(comm -12 packs.before packs.after) + do + rm -f "$f" || return 1 + done + rm -fr $packdir/*.keep +} + +for nr_packs in 1 10 100 +do + test_expect_success "create $nr_packs-pack scenario" ' + repack_into_n_chunks $nr_packs + ' + + test_expect_success "setup bitmaps for $nr_packs-pack scenario" ' + find $packdir -type f -name "*.idx" | sed -e "s/.*\/\(.*\)$/+\1/g" | + git multi-pack-index write --stdin-packs --bitmap \ + --preferred-pack="$(find_pack $(git rev-parse HEAD))" + ' + + for reuse in single multi + do + test_perf "clone for $nr_packs-pack scenario ($reuse-pack reuse)" " + git for-each-ref --format='%(objectname)' refs/heads refs/tags >in && + git -c pack.allowPackReuse=$reuse pack-objects \ + --revs --delta-base-offset --use-bitmap-index \ + --stdout <in >result + " + + test_size "clone size for $nr_packs-pack scenario ($reuse-pack reuse)" ' + wc -c <result + ' + done +done + +test_done diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index c20aafe99a..dd09134db0 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1171,4 +1171,39 @@ test_expect_success 'reader notices out-of-bounds fanout' ' test_cmp expect err ' +test_expect_success 'bitmapped packs are stored via the BTMP chunk' ' + test_when_finished "rm -fr repo" && + git init repo && + ( + cd repo && + + for i in 1 2 3 4 5 + do + test_commit "$i" && + git repack -d || return 1 + done && + + find $objdir/pack -type f -name "*.idx" | xargs -n 1 basename | + sort >packs && + + git multi-pack-index write --stdin-packs <packs && + test_must_fail test-tool read-midx --bitmap $objdir 2>err && + cat >expect <<-\EOF && + error: MIDX does not contain the BTMP chunk + EOF + test_cmp expect err && + + git multi-pack-index write --stdin-packs --bitmap \ + --preferred-pack="$(head -n1 <packs)" <packs && + test-tool read-midx --bitmap $objdir >actual && + for i in $(test_seq $(wc -l <packs)) + do + sed -ne "${i}s/\.idx$/\.pack/p" packs && + echo " bitmap_pos: $((($i - 1) * 3))" && + echo " bitmap_nr: 3" || return 1 + done >expect && + test_cmp expect actual + ) +' + test_done diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh new file mode 100755 index 0000000000..2ba788b042 --- /dev/null +++ b/t/t5332-multi-pack-reuse.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='pack-objects multi-pack reuse' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-bitmap.sh + +objdir=.git/objects +packdir=$objdir/pack + +test_pack_reused () { + test_trace2_data pack-objects pack-reused "$1" +} + +test_packs_reused () { + test_trace2_data pack-objects packs-reused "$1" +} + + +# pack_position <object> </path/to/pack.idx +pack_position () { + git show-index >objects && + grep "$1" objects | cut -d" " -f1 +} + +test_expect_success 'preferred pack is reused for single-pack reuse' ' + test_config pack.allowPackReuse single && + + for i in A B + do + test_commit "$i" && + git repack -d || return 1 + done && + + git multi-pack-index write --bitmap && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'enable multi-pack reuse' ' + git config pack.allowPackReuse multi +' + +test_expect_success 'reuse all objects from subset of bitmapped packs' ' + test_commit C && + git repack -d && + + git multi-pack-index write --bitmap && + + cat >in <<-EOF && + $(git rev-parse C) + ^$(git rev-parse A) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs <in >/dev/null && + + test_pack_reused 6 <trace2.txt && + test_packs_reused 2 <trace2.txt +' + +test_expect_success 'reuse all objects from all packs' ' + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 9 <trace2.txt && + test_packs_reused 3 <trace2.txt +' + +test_expect_success 'reuse objects from first pack with middle gap' ' + for i in D E F + do + test_commit "$i" || return 1 + done && + + # Set "pack.window" to zero to ensure that we do not create any + # deltas, which could alter the amount of pack reuse we perform + # (if, for e.g., we are not sending one or more bases). + D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" && + + d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" && + e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" && + f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" && + + # commits F, E, and D, should appear in that order at the + # beginning of the pack + test $f_pos -lt $e_pos && + test $e_pos -lt $d_pos && + + # Ensure that the pack we are constructing sorts ahead of any + # other packs in lexical/bitmap order by choosing it as the + # preferred pack. + git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" && + + cat >in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs <in >/dev/null && + + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'reuse objects from middle pack with middle gap' ' + rm -fr $packdir/multi-pack-index* && + + # Ensure that the pack we are constructing sort into any + # position *but* the first one, by choosing a different pack as + # the preferred one. + git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" && + + cat >in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs <in >/dev/null && + + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'omit delta with uninteresting base (same pack)' ' + git repack -adk && + + test_seq 32 >f && + git add f && + test_tick && + git commit -m "delta" && + delta="$(git rev-parse HEAD)" && + + test_seq 64 >f && + test_tick && + git commit -a -m "base" && + base="$(git rev-parse HEAD)" && + + test_commit other && + + git repack -d && + + have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" && + + git multi-pack-index write --bitmap && + + cat >in <<-EOF && + $(git rev-parse other) + ^$base + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs <in >/dev/null && + + # We can only reuse the 3 objects corresponding to "other" from + # the latest pack. + # + # This is because even though we want "delta", we do not want + # "base", meaning that we have to inflate the delta/base-pair + # corresponding to the blob in commit "delta", which bypasses + # the pack-reuse mechanism. + # + # The remaining objects from the other pack are similarly not + # reused because their objects are on the uninteresting side of + # the query. + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'omit delta from uninteresting base (cross pack)' ' + cat >in <<-EOF && + $(git rev-parse $base) + ^$(git rev-parse $delta) + EOF + + P="$(git pack-objects --revs $packdir/pack <in)" && + + git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --all >/dev/null && + + packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" && + objects_nr="$(git rev-list --count --all --objects)" && + + test_pack_reused $(($objects_nr - 1)) <trace2.txt && + test_packs_reused $packs_nr <trace2.txt +' + +test_done diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh index 86c70521f1..459f0d7412 100755 --- a/t/t6113-rev-list-bitmap-filters.sh +++ b/t/t6113-rev-list-bitmap-filters.sh @@ -4,6 +4,8 @@ test_description='rev-list combining bitmaps and filters' . ./test-lib.sh . "$TEST_DIRECTORY"/lib-bitmap.sh +TEST_PASSES_SANITIZE_LEAK=true + test_expect_success 'set up bitmapped repo' ' # one commit will have bitmaps, the other will not test_commit one && diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 5eb57914ab..1213b8398b 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1874,6 +1874,20 @@ test_region () { return 0 } +# Check that the given data fragment was included as part of the +# trace2-format trace on stdin. +# +# test_trace2_data <category> <key> <value> +# +# For example, to look for trace2_data_intmax("pack-objects", repo, +# "reused", N) in an invocation of "git pack-objects", run: +# +# GIT_TRACE2_EVENT="$(pwd)/trace.txt" git pack-objects ... && +# test_trace2_data pack-objects reused N <trace2.txt +test_trace2_data () { + grep -e '"category":"'"$1"'","key":"'"$2"'","value":"'"$3"'"' +} + # Given a GIT_TRACE2_EVENT log over stdin, writes to stdout a list of URLs # sent to git-remote-https child processes. test_remote_https_urls() { |