Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-01-13 03:09:56 +0300
committerJunio C Hamano <gitster@pobox.com>2024-01-13 03:09:56 +0300
commit0fea6b73f1771469cc423288fcd754d374865400 (patch)
treeead5e63ecb0371557bf920fdd0e150d87219877b /t
parent0ebbaa07d0c19b2ade6fed9660e10dcf834fc922 (diff)
parentba47d88795e12193e7b0fffc5130757a5517a5da (diff)
Merge branch 'tb/multi-pack-verbatim-reuse'
Streaming spans of packfile data used to be done only from a single, primary, pack in a repository with multiple packfiles. It has been extended to allow reuse from other packfiles, too. * tb/multi-pack-verbatim-reuse: (26 commits) t/perf: add performance tests for multi-pack reuse pack-bitmap: enable reuse from all bitmapped packs pack-objects: allow setting `pack.allowPackReuse` to "single" t/test-lib-functions.sh: implement `test_trace2_data` helper pack-objects: add tracing for various packfile metrics pack-bitmap: prepare to mark objects from multiple packs for reuse pack-revindex: implement `midx_pair_to_pack_pos()` pack-revindex: factor out `midx_key_to_pack_pos()` helper midx: implement `midx_preferred_pack()` git-compat-util.h: implement checked size_t to uint32_t conversion pack-objects: include number of packs reused in output pack-objects: prepare `write_reused_pack_verbatim()` for multi-pack reuse pack-objects: prepare `write_reused_pack()` for multi-pack reuse pack-objects: pass `bitmapped_pack`'s to pack-reuse functions pack-objects: keep track of `pack_start` for each reuse pack pack-objects: parameterize pack-reuse routines over a single pack pack-bitmap: return multiple packs via `reuse_partial_packfile_from_bitmap()` pack-bitmap: simplify `reuse_partial_packfile_from_bitmap()` signature ewah: implement `bitmap_is_empty()` pack-bitmap: pass `bitmapped_pack` struct to pack-reuse functions ...
Diffstat (limited to 't')
-rw-r--r--t/helper/test-read-midx.c41
-rwxr-xr-xt/perf/p5332-multi-pack-reuse.sh81
-rwxr-xr-xt/t5319-multi-pack-index.sh35
-rwxr-xr-xt/t5332-multi-pack-reuse.sh203
-rwxr-xr-xt/t6113-rev-list-bitmap-filters.sh2
-rw-r--r--t/test-lib-functions.sh14
6 files changed, 368 insertions, 8 deletions
diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c
index e9a444ddba..4acae41bb9 100644
--- a/t/helper/test-read-midx.c
+++ b/t/helper/test-read-midx.c
@@ -6,6 +6,7 @@
#include "pack-bitmap.h"
#include "packfile.h"
#include "setup.h"
+#include "gettext.h"
static int read_midx_file(const char *object_dir, int show_objects)
{
@@ -79,7 +80,7 @@ static int read_midx_checksum(const char *object_dir)
static int read_midx_preferred_pack(const char *object_dir)
{
struct multi_pack_index *midx = NULL;
- struct bitmap_index *bitmap = NULL;
+ uint32_t preferred_pack;
setup_git_directory();
@@ -87,23 +88,45 @@ static int read_midx_preferred_pack(const char *object_dir)
if (!midx)
return 1;
- bitmap = prepare_bitmap_git(the_repository);
- if (!bitmap)
+ if (midx_preferred_pack(midx, &preferred_pack) < 0) {
+ warning(_("could not determine MIDX preferred pack"));
return 1;
- if (!bitmap_is_midx(bitmap)) {
- free_bitmap_index(bitmap);
+ }
+
+ printf("%s\n", midx->pack_names[preferred_pack]);
+ return 0;
+}
+
+static int read_midx_bitmapped_packs(const char *object_dir)
+{
+ struct multi_pack_index *midx = NULL;
+ struct bitmapped_pack pack;
+ uint32_t i;
+
+ setup_git_directory();
+
+ midx = load_multi_pack_index(object_dir, 1);
+ if (!midx)
return 1;
+
+ for (i = 0; i < midx->num_packs; i++) {
+ if (nth_bitmapped_pack(the_repository, midx, &pack, i) < 0)
+ return 1;
+
+ printf("%s\n", pack_basename(pack.p));
+ printf(" bitmap_pos: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_pos);
+ printf(" bitmap_nr: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_nr);
}
- printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]);
- free_bitmap_index(bitmap);
+ close_midx(midx);
+
return 0;
}
int cmd__read_midx(int argc, const char **argv)
{
if (!(argc == 2 || argc == 3))
- usage("read-midx [--show-objects|--checksum|--preferred-pack] <object-dir>");
+ usage("read-midx [--show-objects|--checksum|--preferred-pack|--bitmap] <object-dir>");
if (!strcmp(argv[1], "--show-objects"))
return read_midx_file(argv[2], 1);
@@ -111,5 +134,7 @@ int cmd__read_midx(int argc, const char **argv)
return read_midx_checksum(argv[2]);
else if (!strcmp(argv[1], "--preferred-pack"))
return read_midx_preferred_pack(argv[2]);
+ else if (!strcmp(argv[1], "--bitmap"))
+ return read_midx_bitmapped_packs(argv[2]);
return read_midx_file(argv[1], 0);
}
diff --git a/t/perf/p5332-multi-pack-reuse.sh b/t/perf/p5332-multi-pack-reuse.sh
new file mode 100755
index 0000000000..5c6c575d62
--- /dev/null
+++ b/t/perf/p5332-multi-pack-reuse.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+
+test_description='tests pack performance with multi-pack reuse'
+
+. ./perf-lib.sh
+. "${TEST_DIRECTORY}/perf/lib-pack.sh"
+
+packdir=.git/objects/pack
+
+test_perf_large_repo
+
+find_pack () {
+ for idx in $packdir/pack-*.idx
+ do
+ if git show-index <$idx | grep -q "$1"
+ then
+ basename $idx
+ fi || return 1
+ done
+}
+
+repack_into_n_chunks () {
+ git repack -adk &&
+
+ test "$1" -eq 1 && return ||
+
+ find $packdir -type f | sort >packs.before &&
+
+ # partition the repository into $1 chunks of consecutive commits, and
+ # then create $1 packs with the objects reachable from each chunk
+ # (excluding any objects reachable from the previous chunks)
+ sz="$(($(git rev-list --count --all) / $1))"
+ for rev in $(git rev-list --all | awk "NR % $sz == 0" | tac)
+ do
+ pack="$(echo "$rev" | git pack-objects --revs \
+ --honor-pack-keep --delta-base-offset $packdir/pack)" &&
+ touch $packdir/pack-$pack.keep || return 1
+ done
+
+ # grab any remaining objects not packed by the previous step(s)
+ git pack-objects --revs --all --honor-pack-keep --delta-base-offset \
+ $packdir/pack &&
+
+ find $packdir -type f | sort >packs.after &&
+
+ # and install the whole thing
+ for f in $(comm -12 packs.before packs.after)
+ do
+ rm -f "$f" || return 1
+ done
+ rm -fr $packdir/*.keep
+}
+
+for nr_packs in 1 10 100
+do
+ test_expect_success "create $nr_packs-pack scenario" '
+ repack_into_n_chunks $nr_packs
+ '
+
+ test_expect_success "setup bitmaps for $nr_packs-pack scenario" '
+ find $packdir -type f -name "*.idx" | sed -e "s/.*\/\(.*\)$/+\1/g" |
+ git multi-pack-index write --stdin-packs --bitmap \
+ --preferred-pack="$(find_pack $(git rev-parse HEAD))"
+ '
+
+ for reuse in single multi
+ do
+ test_perf "clone for $nr_packs-pack scenario ($reuse-pack reuse)" "
+ git for-each-ref --format='%(objectname)' refs/heads refs/tags >in &&
+ git -c pack.allowPackReuse=$reuse pack-objects \
+ --revs --delta-base-offset --use-bitmap-index \
+ --stdout <in >result
+ "
+
+ test_size "clone size for $nr_packs-pack scenario ($reuse-pack reuse)" '
+ wc -c <result
+ '
+ done
+done
+
+test_done
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index c20aafe99a..dd09134db0 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -1171,4 +1171,39 @@ test_expect_success 'reader notices out-of-bounds fanout' '
test_cmp expect err
'
+test_expect_success 'bitmapped packs are stored via the BTMP chunk' '
+ test_when_finished "rm -fr repo" &&
+ git init repo &&
+ (
+ cd repo &&
+
+ for i in 1 2 3 4 5
+ do
+ test_commit "$i" &&
+ git repack -d || return 1
+ done &&
+
+ find $objdir/pack -type f -name "*.idx" | xargs -n 1 basename |
+ sort >packs &&
+
+ git multi-pack-index write --stdin-packs <packs &&
+ test_must_fail test-tool read-midx --bitmap $objdir 2>err &&
+ cat >expect <<-\EOF &&
+ error: MIDX does not contain the BTMP chunk
+ EOF
+ test_cmp expect err &&
+
+ git multi-pack-index write --stdin-packs --bitmap \
+ --preferred-pack="$(head -n1 <packs)" <packs &&
+ test-tool read-midx --bitmap $objdir >actual &&
+ for i in $(test_seq $(wc -l <packs))
+ do
+ sed -ne "${i}s/\.idx$/\.pack/p" packs &&
+ echo " bitmap_pos: $((($i - 1) * 3))" &&
+ echo " bitmap_nr: 3" || return 1
+ done >expect &&
+ test_cmp expect actual
+ )
+'
+
test_done
diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
new file mode 100755
index 0000000000..2ba788b042
--- /dev/null
+++ b/t/t5332-multi-pack-reuse.sh
@@ -0,0 +1,203 @@
+#!/bin/sh
+
+test_description='pack-objects multi-pack reuse'
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-bitmap.sh
+
+objdir=.git/objects
+packdir=$objdir/pack
+
+test_pack_reused () {
+ test_trace2_data pack-objects pack-reused "$1"
+}
+
+test_packs_reused () {
+ test_trace2_data pack-objects packs-reused "$1"
+}
+
+
+# pack_position <object> </path/to/pack.idx
+pack_position () {
+ git show-index >objects &&
+ grep "$1" objects | cut -d" " -f1
+}
+
+test_expect_success 'preferred pack is reused for single-pack reuse' '
+ test_config pack.allowPackReuse single &&
+
+ for i in A B
+ do
+ test_commit "$i" &&
+ git repack -d || return 1
+ done &&
+
+ git multi-pack-index write --bitmap &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs --all >/dev/null &&
+
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'enable multi-pack reuse' '
+ git config pack.allowPackReuse multi
+'
+
+test_expect_success 'reuse all objects from subset of bitmapped packs' '
+ test_commit C &&
+ git repack -d &&
+
+ git multi-pack-index write --bitmap &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse C)
+ ^$(git rev-parse A)
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs <in >/dev/null &&
+
+ test_pack_reused 6 <trace2.txt &&
+ test_packs_reused 2 <trace2.txt
+'
+
+test_expect_success 'reuse all objects from all packs' '
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs --all >/dev/null &&
+
+ test_pack_reused 9 <trace2.txt &&
+ test_packs_reused 3 <trace2.txt
+'
+
+test_expect_success 'reuse objects from first pack with middle gap' '
+ for i in D E F
+ do
+ test_commit "$i" || return 1
+ done &&
+
+ # Set "pack.window" to zero to ensure that we do not create any
+ # deltas, which could alter the amount of pack reuse we perform
+ # (if, for e.g., we are not sending one or more bases).
+ D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" &&
+
+ d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" &&
+ e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" &&
+ f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" &&
+
+ # commits F, E, and D, should appear in that order at the
+ # beginning of the pack
+ test $f_pos -lt $e_pos &&
+ test $e_pos -lt $d_pos &&
+
+ # Ensure that the pack we are constructing sorts ahead of any
+ # other packs in lexical/bitmap order by choosing it as the
+ # preferred pack.
+ git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse E)
+ ^$(git rev-parse D)
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
+
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'reuse objects from middle pack with middle gap' '
+ rm -fr $packdir/multi-pack-index* &&
+
+ # Ensure that the pack we are constructing sort into any
+ # position *but* the first one, by choosing a different pack as
+ # the preferred one.
+ git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse E)
+ ^$(git rev-parse D)
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
+
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'omit delta with uninteresting base (same pack)' '
+ git repack -adk &&
+
+ test_seq 32 >f &&
+ git add f &&
+ test_tick &&
+ git commit -m "delta" &&
+ delta="$(git rev-parse HEAD)" &&
+
+ test_seq 64 >f &&
+ test_tick &&
+ git commit -a -m "base" &&
+ base="$(git rev-parse HEAD)" &&
+
+ test_commit other &&
+
+ git repack -d &&
+
+ have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" &&
+
+ git multi-pack-index write --bitmap &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse other)
+ ^$base
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
+
+ # We can only reuse the 3 objects corresponding to "other" from
+ # the latest pack.
+ #
+ # This is because even though we want "delta", we do not want
+ # "base", meaning that we have to inflate the delta/base-pair
+ # corresponding to the blob in commit "delta", which bypasses
+ # the pack-reuse mechanism.
+ #
+ # The remaining objects from the other pack are similarly not
+ # reused because their objects are on the uninteresting side of
+ # the query.
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'omit delta from uninteresting base (cross pack)' '
+ cat >in <<-EOF &&
+ $(git rev-parse $base)
+ ^$(git rev-parse $delta)
+ EOF
+
+ P="$(git pack-objects --revs $packdir/pack <in)" &&
+
+ git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --all >/dev/null &&
+
+ packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
+ objects_nr="$(git rev-list --count --all --objects)" &&
+
+ test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
+ test_packs_reused $packs_nr <trace2.txt
+'
+
+test_done
diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh
index 86c70521f1..459f0d7412 100755
--- a/t/t6113-rev-list-bitmap-filters.sh
+++ b/t/t6113-rev-list-bitmap-filters.sh
@@ -4,6 +4,8 @@ test_description='rev-list combining bitmaps and filters'
. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-bitmap.sh
+TEST_PASSES_SANITIZE_LEAK=true
+
test_expect_success 'set up bitmapped repo' '
# one commit will have bitmaps, the other will not
test_commit one &&
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 5eb57914ab..1213b8398b 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1874,6 +1874,20 @@ test_region () {
return 0
}
+# Check that the given data fragment was included as part of the
+# trace2-format trace on stdin.
+#
+# test_trace2_data <category> <key> <value>
+#
+# For example, to look for trace2_data_intmax("pack-objects", repo,
+# "reused", N) in an invocation of "git pack-objects", run:
+#
+# GIT_TRACE2_EVENT="$(pwd)/trace.txt" git pack-objects ... &&
+# test_trace2_data pack-objects reused N <trace2.txt
+test_trace2_data () {
+ grep -e '"category":"'"$1"'","key":"'"$2"'","value":"'"$3"'"'
+}
+
# Given a GIT_TRACE2_EVENT log over stdin, writes to stdout a list of URLs
# sent to git-remote-https child processes.
test_remote_https_urls() {