diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-04-25 13:37:01 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-04-25 15:07:50 +0300 |
commit | 9c700ea473d781eea50eab685d643d95e9c4ffee (patch) | |
tree | 768b9509126d2e15f2f14ffd65dd92a2db86b80d | |
parent | b97d63a6b353018515f04dc424137145b5e7db62 (diff) |
Makefile: Drop bundled Git v2.33.1.gl3pks-drop-bundled-git-v2.33.1
We have finished the migration to bundled Git v2.35.1.gl1 in v14.10. Due
to concerns with zero-downtime upgrades we couldn't yet remove the old
version though. But now that we have waited for a release we can finally
remove the old version.
Remove the infrastructure to build and install bundled Git v2.33.1.gl3.
Changelog: removed
20 files changed, 2 insertions, 2001 deletions
@@ -300,15 +300,13 @@ install: build .PHONY: build-bundled-git ## Build bundled Git binaries. -build-bundled-git: build-bundled-git-v2.33.1.gl2 build-bundled-git-v2.35.1.gl1 -build-bundled-git-v2.33.1.gl2: $(patsubst %,${BUILD_DIR}/bin/gitaly-%,${GIT_EXECUTABLES}) +build-bundled-git: build-bundled-git-v2.35.1.gl1 build-bundled-git-v2.35.1.gl1: $(patsubst %,${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1,${GIT_EXECUTABLES}) .PHONY: install-bundled-git ## Install bundled Git binaries. The target directory can be modified by ## setting PREFIX and DESTDIR. -install-bundled-git: install-bundled-git-v2.33.1.gl2 install-bundled-git-v2.35.1.gl1 -install-bundled-git-v2.33.1.gl2: $(patsubst %,${INSTALL_DEST_DIR}/gitaly-%,${GIT_EXECUTABLES}) +install-bundled-git: install-bundled-git-v2.35.1.gl1 install-bundled-git-v2.35.1.gl1: $(patsubst %,${INSTALL_DEST_DIR}/gitaly-%-v2.35.1.gl1,${GIT_EXECUTABLES}) ifdef WITH_BUNDLED_GIT @@ -569,12 +567,6 @@ ${GIT_PREFIX}/bin/git: ${DEPENDENCY_DIR}/git-${GIT_VERSION}.${GIT_EXTRA_VERSION} ${Q}env -u PROFILE -u MAKEFLAGS -u GIT_VERSION ${MAKE} -C "$(<D)" -j$(shell nproc) prefix=${GIT_PREFIX} ${GIT_BUILD_OPTIONS} install ${Q}touch $@ -${BUILD_DIR}/bin/gitaly-%: override GIT_PATCHES := $(sort $(wildcard ${SOURCE_DIR}/_support/git-patches/v2.33.1.gl3/*)) -${BUILD_DIR}/bin/gitaly-%: override GIT_VERSION = v2.33.1 -${BUILD_DIR}/bin/gitaly-%: override GIT_EXTRA_VERSION = gl3 -${BUILD_DIR}/bin/gitaly-%: ${DEPENDENCY_DIR}/git-v2.33.1.gl3/% | ${BUILD_DIR}/bin - ${Q}install $< $@ - ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES := $(sort $(wildcard ${SOURCE_DIR}/_support/git-patches/v2.35.1.gl1/*)) ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_VERSION = v2.35.1 ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_EXTRA_VERSION = gl1 diff --git a/_support/git-patches/v2.33.1.gl3/0001-fetch-pack-speed-up-loading-of-refs-via-commit-graph.patch b/_support/git-patches/v2.33.1.gl3/0001-fetch-pack-speed-up-loading-of-refs-via-commit-graph.patch deleted file mode 100644 index 23d0d6a33..000000000 --- a/_support/git-patches/v2.33.1.gl3/0001-fetch-pack-speed-up-loading-of-refs-via-commit-graph.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 08519b8ab6f395cffbcd5e530bfba6aaf64241a2 Mon Sep 17 00:00:00 2001 -Message-Id: <08519b8ab6f395cffbcd5e530bfba6aaf64241a2.1628144240.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 4 Aug 2021 15:04:25 +0200 -Subject: [PATCH] fetch-pack: speed up loading of refs via commit graph -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When doing reference negotiation, git-fetch-pack(1) is loading all refs -from disk in order to determine which commits it has in common with the -remote repository. This can be quite expensive in repositories with many -references though: in a real-world repository with around 2.2 million -refs, fetching a single commit by its ID takes around 44 seconds. - -Dominating the loading time is decompression and parsing of the objects -which are referenced by commits. Given the fact that we only care about -commits (or tags which can be peeled to one) in this context, there is -thus an easy performance win by switching the parsing logic to make use -of the commit graph in case we have one available. Like this, we avoid -hitting the object database to parse these commits but instead only load -them from the commit-graph. This results in a significant performance -boost when executing git-fetch in said repository with 2.2 million refs: - - Benchmark #1: HEAD~: git fetch $remote $commit - Time (mean ± σ): 44.168 s ± 0.341 s [User: 42.985 s, System: 1.106 s] - Range (min … max): 43.565 s … 44.577 s 10 runs - - Benchmark #2: HEAD: git fetch $remote $commit - Time (mean ± σ): 19.498 s ± 0.724 s [User: 18.751 s, System: 0.690 s] - Range (min … max): 18.629 s … 20.454 s 10 runs - - Summary - 'HEAD: git fetch $remote $commit' ran - 2.27 ± 0.09 times faster than 'HEAD~: git fetch $remote $commit' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> ---- - fetch-pack.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/fetch-pack.c b/fetch-pack.c -index b0c7be717c..0bf7ed7e47 100644 ---- a/fetch-pack.c -+++ b/fetch-pack.c -@@ -137,8 +137,14 @@ static struct commit *deref_without_lazy_fetch(const struct object_id *oid, - break; - } - } -- if (type == OBJ_COMMIT) -- return (struct commit *) parse_object(the_repository, oid); -+ -+ if (type == OBJ_COMMIT) { -+ struct commit *commit = lookup_commit(the_repository, oid); -+ if (!commit || repo_parse_commit(the_repository, commit)) -+ return NULL; -+ return commit; -+ } -+ - return NULL; - } - --- -2.32.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0002-revision-separate-walk-and-unsorted-flags.patch b/_support/git-patches/v2.33.1.gl3/0002-revision-separate-walk-and-unsorted-flags.patch deleted file mode 100644 index 3cf006650..000000000 --- a/_support/git-patches/v2.33.1.gl3/0002-revision-separate-walk-and-unsorted-flags.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 29ef1f27fed21b5b7d3c996a01f1364e7e841917 Mon Sep 17 00:00:00 2001 -Message-Id: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Thu, 5 Aug 2021 13:25:24 +0200 -Subject: [PATCH 2/6] revision: separate walk and unsorted flags - -The `--no-walk` flag supports two modes: either it sorts the revisions -given as input input or it doesn't. This is reflected in a single -`no_walk` flag, which reflects one of the three states "walk", "don't -walk but without sorting" and "don't walk but with sorting". - -Split up the flag into two separate bits, one indicating whether we -should walk or not and one indicating whether the input should be sorted -or not. This will allow us to more easily introduce a new flag -`--unsorted-input`, which only impacts the sorting bit. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/log.c | 2 +- - builtin/revert.c | 3 ++- - revision.c | 9 +++++---- - revision.h | 7 ++----- - 4 files changed, 10 insertions(+), 11 deletions(-) - -diff --git a/builtin/log.c b/builtin/log.c -index 3d7717ba5c..f75d87e8d7 100644 ---- a/builtin/log.c -+++ b/builtin/log.c -@@ -637,7 +637,7 @@ int cmd_show(int argc, const char **argv, const char *prefix) - repo_init_revisions(the_repository, &rev, prefix); - rev.diff = 1; - rev.always_show_header = 1; -- rev.no_walk = REVISION_WALK_NO_WALK_SORTED; -+ rev.no_walk = 1; - rev.diffopt.stat_width = -1; /* Scale to real terminal size */ - - memset(&opt, 0, sizeof(opt)); -diff --git a/builtin/revert.c b/builtin/revert.c -index 237f2f18d4..2e13660e4b 100644 ---- a/builtin/revert.c -+++ b/builtin/revert.c -@@ -191,7 +191,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts) - struct setup_revision_opt s_r_opt; - opts->revs = xmalloc(sizeof(*opts->revs)); - repo_init_revisions(the_repository, opts->revs, NULL); -- opts->revs->no_walk = REVISION_WALK_NO_WALK_UNSORTED; -+ opts->revs->no_walk = 1; -+ opts->revs->unsorted_input = 1; - if (argc < 2) - usage_with_options(usage_str, options); - if (!strcmp(argv[1], "-")) -diff --git a/revision.c b/revision.c -index cddd0542a6..86bbcd10d2 100644 ---- a/revision.c -+++ b/revision.c -@@ -2651,16 +2651,17 @@ static int handle_revision_pseudo_opt(const char *submodule, - } else if (!strcmp(arg, "--not")) { - *flags ^= UNINTERESTING | BOTTOM; - } else if (!strcmp(arg, "--no-walk")) { -- revs->no_walk = REVISION_WALK_NO_WALK_SORTED; -+ revs->no_walk = 1; - } else if (skip_prefix(arg, "--no-walk=", &optarg)) { - /* - * Detached form ("--no-walk X" as opposed to "--no-walk=X") - * not allowed, since the argument is optional. - */ -+ revs->no_walk = 1; - if (!strcmp(optarg, "sorted")) -- revs->no_walk = REVISION_WALK_NO_WALK_SORTED; -+ revs->unsorted_input = 0; - else if (!strcmp(optarg, "unsorted")) -- revs->no_walk = REVISION_WALK_NO_WALK_UNSORTED; -+ revs->unsorted_input = 1; - else - return error("invalid argument to --no-walk"); - } else if (!strcmp(arg, "--do-walk")) { -@@ -3584,7 +3585,7 @@ int prepare_revision_walk(struct rev_info *revs) - - if (!revs->reflog_info) - prepare_to_use_bloom_filter(revs); -- if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) -+ if (!revs->unsorted_input) - commit_list_sort_by_date(&revs->commits); - if (revs->no_walk) - return 0; -diff --git a/revision.h b/revision.h -index fbb068da9f..0c65a760ee 100644 ---- a/revision.h -+++ b/revision.h -@@ -79,10 +79,6 @@ struct rev_cmdline_info { - } *rev; - }; - --#define REVISION_WALK_WALK 0 --#define REVISION_WALK_NO_WALK_SORTED 1 --#define REVISION_WALK_NO_WALK_UNSORTED 2 -- - struct oidset; - struct topo_walk_info; - -@@ -129,7 +125,8 @@ struct rev_info { - /* Traversal flags */ - unsigned int dense:1, - prune:1, -- no_walk:2, -+ no_walk:1, -+ unsorted_input:1, - remove_empty_trees:1, - simplify_history:1, - show_pulls:1, --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0003-connected-do-not-sort-input-revisions.patch b/_support/git-patches/v2.33.1.gl3/0003-connected-do-not-sort-input-revisions.patch deleted file mode 100644 index 6f4f9d477..000000000 --- a/_support/git-patches/v2.33.1.gl3/0003-connected-do-not-sort-input-revisions.patch +++ /dev/null @@ -1,167 +0,0 @@ -From f45022dc2fd692fd024f2eb41a86a66f19013d43 Mon Sep 17 00:00:00 2001 -Message-Id: <f45022dc2fd692fd024f2eb41a86a66f19013d43.1630319075.git.ps@pks.im> -In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Mon, 9 Aug 2021 10:11:50 +0200 -Subject: [PATCH 3/6] connected: do not sort input revisions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In order to compute whether objects reachable from a set of tips are all -connected, we do a revision walk with these tips as positive references -and `--not --all`. `--not --all` will cause the revision walk to load -all preexisting references as uninteresting, which can be very expensive -in repositories with many references. - -Benchmarking the git-rev-list(1) command highlights that by far the most -expensive single phase is initial sorting of the input revisions: after -all references have been loaded, we first sort commits by author date. -In a real-world repository with about 2.2 million references, it makes -up about 40% of the total runtime of git-rev-list(1). - -Ultimately, the connectivity check shouldn't really bother about the -order of input revisions at all. We only care whether we can actually -walk all objects until we hit the cut-off point. So sorting the input is -a complete waste of time. - -Introduce a new "--unsorted-input" flag to git-rev-list(1) which will -cause it to not sort the commits and adjust the connectivity check to -always pass the flag. This results in the following speedups, executed -in a clone of gitlab-org/gitlab [1]: - - Benchmark #1: git rev-list --objects --quiet --not --all --not $(cat newrev) - Time (mean ± σ): 7.639 s ± 0.065 s [User: 7.304 s, System: 0.335 s] - Range (min … max): 7.543 s … 7.742 s 10 runs - - Benchmark #2: git rev-list --unsorted-input --objects --quiet --not --all --not $newrev - Time (mean ± σ): 4.995 s ± 0.044 s [User: 4.657 s, System: 0.337 s] - Range (min … max): 4.909 s … 5.048 s 10 runs - - Summary - 'git rev-list --unsorted-input --objects --quiet --not --all --not $(cat newrev)' ran - 1.53 ± 0.02 times faster than 'git rev-list --objects --quiet --not --all --not $newrev' - -[1]: https://gitlab.com/gitlab-org/gitlab.git. Note that not all refs - are visible to clients. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - Documentation/rev-list-options.txt | 8 +++++++- - connected.c | 1 + - revision.c | 9 +++++++++ - t/t6000-rev-list-misc.sh | 31 ++++++++++++++++++++++++++++++ - 4 files changed, 48 insertions(+), 1 deletion(-) - -diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt -index 24569b06d1..b7bd27e171 100644 ---- a/Documentation/rev-list-options.txt -+++ b/Documentation/rev-list-options.txt -@@ -968,6 +968,11 @@ list of the missing objects. Object IDs are prefixed with a ``?'' character. - objects. - endif::git-rev-list[] - -+--unsorted-input:: -+ Show commits in the order they were given on the command line instead -+ of sorting them in reverse chronological order by commit time. Cannot -+ be combined with `--no-walk` or `--no-walk=sorted`. -+ - --no-walk[=(sorted|unsorted)]:: - Only show the given commits, but do not traverse their ancestors. - This has no effect if a range is specified. If the argument -@@ -975,7 +980,8 @@ endif::git-rev-list[] - given on the command line. Otherwise (if `sorted` or no argument - was given), the commits are shown in reverse chronological order - by commit time. -- Cannot be combined with `--graph`. -+ Cannot be combined with `--graph`. Cannot be combined with -+ `--unsorted-input` if `sorted` or no argument was given. - - --do-walk:: - Overrides a previous `--no-walk`. -diff --git a/connected.c b/connected.c -index b18299fdf0..b5f9523a5f 100644 ---- a/connected.c -+++ b/connected.c -@@ -106,6 +106,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, - if (opt->progress) - strvec_pushf(&rev_list.args, "--progress=%s", - _("Checking connectivity")); -+ strvec_push(&rev_list.args, "--unsorted-input"); - - rev_list.git_cmd = 1; - rev_list.env = opt->env; -diff --git a/revision.c b/revision.c -index 86bbcd10d2..47541407d2 100644 ---- a/revision.c -+++ b/revision.c -@@ -2256,6 +2256,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg - } else if (!strcmp(arg, "--author-date-order")) { - revs->sort_order = REV_SORT_BY_AUTHOR_DATE; - revs->topo_order = 1; -+ } else if (!strcmp(arg, "--unsorted-input")) { -+ if (revs->no_walk) -+ die(_("--unsorted-input is incompatible with --no-walk")); -+ revs->unsorted_input = 1; - } else if (!strcmp(arg, "--early-output")) { - revs->early_output = 100; - revs->topo_order = 1; -@@ -2651,8 +2655,13 @@ static int handle_revision_pseudo_opt(const char *submodule, - } else if (!strcmp(arg, "--not")) { - *flags ^= UNINTERESTING | BOTTOM; - } else if (!strcmp(arg, "--no-walk")) { -+ if (!revs->no_walk && revs->unsorted_input) -+ die(_("--no-walk is incompatible with --unsorted-input")); - revs->no_walk = 1; - } else if (skip_prefix(arg, "--no-walk=", &optarg)) { -+ if (!revs->no_walk && revs->unsorted_input) -+ die(_("--no-walk is incompatible with --unsorted-input")); -+ - /* - * Detached form ("--no-walk X" as opposed to "--no-walk=X") - * not allowed, since the argument is optional. -diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh -index 12def7bcbf..ef849e5bc8 100755 ---- a/t/t6000-rev-list-misc.sh -+++ b/t/t6000-rev-list-misc.sh -@@ -169,4 +169,35 @@ test_expect_success 'rev-list --count --objects' ' - test_line_count = $count actual - ' - -+test_expect_success 'rev-list --unsorted-input results in different sorting' ' -+ git rev-list --unsorted-input HEAD HEAD~ >first && -+ git rev-list --unsorted-input HEAD~ HEAD >second && -+ ! test_cmp first second && -+ sort first >first.sorted && -+ sort second >second.sorted && -+ test_cmp first.sorted second.sorted -+' -+ -+test_expect_success 'rev-list --unsorted-input incompatible with --no-walk' ' -+ cat >expect <<-EOF && -+ fatal: --no-walk is incompatible with --unsorted-input -+ EOF -+ test_must_fail git rev-list --unsorted-input --no-walk HEAD 2>error && -+ test_cmp expect error && -+ test_must_fail git rev-list --unsorted-input --no-walk=sorted HEAD 2>error && -+ test_cmp expect error && -+ test_must_fail git rev-list --unsorted-input --no-walk=unsorted HEAD 2>error && -+ test_cmp expect error && -+ -+ cat >expect <<-EOF && -+ fatal: --unsorted-input is incompatible with --no-walk -+ EOF -+ test_must_fail git rev-list --no-walk --unsorted-input HEAD 2>error && -+ test_cmp expect error && -+ test_must_fail git rev-list --no-walk=sorted --unsorted-input HEAD 2>error && -+ test_cmp expect error && -+ test_must_fail git rev-list --no-walk=unsorted --unsorted-input HEAD 2>error && -+ test_cmp expect error -+' -+ - test_done --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0004-revision-stop-retrieving-reference-twice.patch b/_support/git-patches/v2.33.1.gl3/0004-revision-stop-retrieving-reference-twice.patch deleted file mode 100644 index aa00f75f1..000000000 --- a/_support/git-patches/v2.33.1.gl3/0004-revision-stop-retrieving-reference-twice.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bf9c0cbddbcd730e4312ba5e19f8b8a2edd65bb3 Mon Sep 17 00:00:00 2001 -Message-Id: <bf9c0cbddbcd730e4312ba5e19f8b8a2edd65bb3.1630319075.git.ps@pks.im> -In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Mon, 9 Aug 2021 10:11:54 +0200 -Subject: [PATCH 4/6] revision: stop retrieving reference twice -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When queueing up references for the revision walk, `handle_one_ref()` -will resolve the reference's object ID via `get_reference()` and then -queue the ID as pending object via `add_pending_oid()`. But given that -`add_pending_oid()` is only a thin wrapper around `add_pending_object()` -which fist calls `get_reference()`, we effectively resolve the reference -twice and thus duplicate some of the work. - -Fix the issue by instead calling `add_pending_object()` directly, which -takes the already-resolved object as input. In a repository with lots of -refs, this translates into a near 10% speedup: - - Benchmark #1: HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev - Time (mean ± σ): 5.015 s ± 0.038 s [User: 4.698 s, System: 0.316 s] - Range (min … max): 4.970 s … 5.089 s 10 runs - - Benchmark #2: HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev - Time (mean ± σ): 4.606 s ± 0.029 s [User: 4.260 s, System: 0.345 s] - Range (min … max): 4.565 s … 4.657 s 10 runs - - Summary - 'HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' ran - 1.09 ± 0.01 times faster than 'HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - revision.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/revision.c b/revision.c -index 47541407d2..80a59896b9 100644 ---- a/revision.c -+++ b/revision.c -@@ -1534,7 +1534,7 @@ static int handle_one_ref(const char *path, const struct object_id *oid, - - object = get_reference(cb->all_revs, path, oid, cb->all_flags); - add_rev_cmdline(cb->all_revs, object, path, REV_CMD_REF, cb->all_flags); -- add_pending_oid(cb->all_revs, path, oid, cb->all_flags); -+ add_pending_object(cb->all_revs, object, path); - return 0; - } - --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0005-commit-graph-split-out-function-to-search-commit-pos.patch b/_support/git-patches/v2.33.1.gl3/0005-commit-graph-split-out-function-to-search-commit-pos.patch deleted file mode 100644 index 0964f5b98..000000000 --- a/_support/git-patches/v2.33.1.gl3/0005-commit-graph-split-out-function-to-search-commit-pos.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 809ea28f809e52d3204b597637b2f5e072c140f8 Mon Sep 17 00:00:00 2001 -Message-Id: <809ea28f809e52d3204b597637b2f5e072c140f8.1630319075.git.ps@pks.im> -In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Mon, 9 Aug 2021 10:11:59 +0200 -Subject: [PATCH 5/6] commit-graph: split out function to search commit - position - -The function `find_commit_in_graph()` assumes that the caller has passed -an object which was already determined to be a commit given that it will -access the commit's graph position, which is stored in a commit slab. In -a subsequent patch, we want to search for an object ID though without -knowing whether it is a commit or not, which is not currently possible. - -Split out the logic to search the commit graph for a given object ID to -prepare for this change. This commit also renames the function to -`find_commit_pos_in_graph()`, which more accurately reflects what this -function does. Furthermore, in order to allow for the searched object ID -to be const, we need to adjust `bsearch_graph()`'s signature to accept a -constant object ID as input, too. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - commit-graph.c | 55 +++++++++++++++++++++++++++----------------------- - 1 file changed, 30 insertions(+), 25 deletions(-) - -diff --git a/commit-graph.c b/commit-graph.c -index 3860a0d847..8c4c7262c8 100644 ---- a/commit-graph.c -+++ b/commit-graph.c -@@ -723,7 +723,7 @@ void close_commit_graph(struct raw_object_store *o) - o->commit_graph = NULL; - } - --static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos) -+static int bsearch_graph(struct commit_graph *g, const struct object_id *oid, uint32_t *pos) - { - return bsearch_hash(oid->hash, g->chunk_oid_fanout, - g->chunk_oid_lookup, g->hash_len, pos); -@@ -864,25 +864,30 @@ static int fill_commit_in_graph(struct repository *r, - return 1; - } - --static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) -+static int search_commit_pos_in_graph(const struct object_id *id, struct commit_graph *g, uint32_t *pos) -+{ -+ struct commit_graph *cur_g = g; -+ uint32_t lex_index; -+ -+ while (cur_g && !bsearch_graph(cur_g, id, &lex_index)) -+ cur_g = cur_g->base_graph; -+ -+ if (cur_g) { -+ *pos = lex_index + cur_g->num_commits_in_base; -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) - { - uint32_t graph_pos = commit_graph_position(item); - if (graph_pos != COMMIT_NOT_FROM_GRAPH) { - *pos = graph_pos; - return 1; - } else { -- struct commit_graph *cur_g = g; -- uint32_t lex_index; -- -- while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index)) -- cur_g = cur_g->base_graph; -- -- if (cur_g) { -- *pos = lex_index + cur_g->num_commits_in_base; -- return 1; -- } -- -- return 0; -+ return search_commit_pos_in_graph(&item->object.oid, g, pos); - } - } - -@@ -895,7 +900,7 @@ static int parse_commit_in_graph_one(struct repository *r, - if (item->object.parsed) - return 1; - -- if (find_commit_in_graph(item, g, &pos)) -+ if (find_commit_pos_in_graph(item, g, &pos)) - return fill_commit_in_graph(r, item, g, pos); - - return 0; -@@ -921,7 +926,7 @@ void load_commit_graph_info(struct repository *r, struct commit *item) - uint32_t pos; - if (!prepare_commit_graph(r)) - return; -- if (find_commit_in_graph(item, r->objects->commit_graph, &pos)) -+ if (find_commit_pos_in_graph(item, r->objects->commit_graph, &pos)) - fill_commit_graph_info(item, r->objects->commit_graph, pos); - } - -@@ -1091,9 +1096,9 @@ static int write_graph_chunk_data(struct hashfile *f, - edge_value += ctx->new_num_commits_in_base; - else if (ctx->new_base_graph) { - uint32_t pos; -- if (find_commit_in_graph(parent->item, -- ctx->new_base_graph, -- &pos)) -+ if (find_commit_pos_in_graph(parent->item, -+ ctx->new_base_graph, -+ &pos)) - edge_value = pos; - } - -@@ -1122,9 +1127,9 @@ static int write_graph_chunk_data(struct hashfile *f, - edge_value += ctx->new_num_commits_in_base; - else if (ctx->new_base_graph) { - uint32_t pos; -- if (find_commit_in_graph(parent->item, -- ctx->new_base_graph, -- &pos)) -+ if (find_commit_pos_in_graph(parent->item, -+ ctx->new_base_graph, -+ &pos)) - edge_value = pos; - } - -@@ -1235,9 +1240,9 @@ static int write_graph_chunk_extra_edges(struct hashfile *f, - edge_value += ctx->new_num_commits_in_base; - else if (ctx->new_base_graph) { - uint32_t pos; -- if (find_commit_in_graph(parent->item, -- ctx->new_base_graph, -- &pos)) -+ if (find_commit_pos_in_graph(parent->item, -+ ctx->new_base_graph, -+ &pos)) - edge_value = pos; - } - --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch b/_support/git-patches/v2.33.1.gl3/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch deleted file mode 100644 index c7e575811..000000000 --- a/_support/git-patches/v2.33.1.gl3/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch +++ /dev/null @@ -1,138 +0,0 @@ -From f559d6d45e7e58ae1f922213948723de77ea77bd Mon Sep 17 00:00:00 2001 -Message-Id: <f559d6d45e7e58ae1f922213948723de77ea77bd.1630319075.git.ps@pks.im> -In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Mon, 9 Aug 2021 10:12:03 +0200 -Subject: [PATCH 6/6] revision: avoid hitting packfiles when commits are in - commit-graph -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When queueing references in git-rev-list(1), we try to optimize parsing -of commits via the commit-graph. To do so, we first look up the object's -type, and if it is a commit we call `repo_parse_commit()` instead of -`parse_object()`. This is quite inefficient though given that we're -always uncompressing the object header in order to determine the type. -Instead, we can opportunistically search the commit-graph for the object -ID: in case it's found, we know it's a commit and can directly fill in -the commit object without having to uncompress the object header. - -Expose a new function `lookup_commit_in_graph()`, which tries to find a -commit in the commit-graph by ID, and convert `get_reference()` to use -this function. This provides a big performance win in cases where we -load references in a repository with lots of references pointing to -commits. The following has been executed in a real-world repository with -about 2.2 million refs: - - Benchmark #1: HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev - Time (mean ± σ): 4.458 s ± 0.044 s [User: 4.115 s, System: 0.342 s] - Range (min … max): 4.409 s … 4.534 s 10 runs - - Benchmark #2: HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev - Time (mean ± σ): 3.089 s ± 0.015 s [User: 2.768 s, System: 0.321 s] - Range (min … max): 3.061 s … 3.105 s 10 runs - - Summary - 'HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' ran - 1.44 ± 0.02 times faster than 'HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - commit-graph.c | 24 ++++++++++++++++++++++++ - commit-graph.h | 8 ++++++++ - revision.c | 18 ++++++++---------- - 3 files changed, 40 insertions(+), 10 deletions(-) - -diff --git a/commit-graph.c b/commit-graph.c -index 8c4c7262c8..00614acd65 100644 ---- a/commit-graph.c -+++ b/commit-graph.c -@@ -891,6 +891,30 @@ static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g, - } - } - -+struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id) -+{ -+ struct commit *commit; -+ uint32_t pos; -+ -+ if (!repo->objects->commit_graph) -+ return NULL; -+ if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos)) -+ return NULL; -+ if (!repo_has_object_file(repo, id)) -+ return NULL; -+ -+ commit = lookup_commit(repo, id); -+ if (!commit) -+ return NULL; -+ if (commit->object.parsed) -+ return commit; -+ -+ if (!fill_commit_in_graph(repo, commit, repo->objects->commit_graph, pos)) -+ return NULL; -+ -+ return commit; -+} -+ - static int parse_commit_in_graph_one(struct repository *r, - struct commit_graph *g, - struct commit *item) -diff --git a/commit-graph.h b/commit-graph.h -index 96c24fb577..04a94e1830 100644 ---- a/commit-graph.h -+++ b/commit-graph.h -@@ -40,6 +40,14 @@ int open_commit_graph(const char *graph_file, int *fd, struct stat *st); - */ - int parse_commit_in_graph(struct repository *r, struct commit *item); - -+/* -+ * Look up the given commit ID in the commit-graph. This will only return a -+ * commit if the ID exists both in the graph and in the object database such -+ * that we don't return commits whose object has been pruned. Otherwise, this -+ * function returns `NULL`. -+ */ -+struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id); -+ - /* - * It is possible that we loaded commit contents from the commit buffer, - * but we also want to ensure the commit-graph content is correctly -diff --git a/revision.c b/revision.c -index 80a59896b9..0dabb5a0bc 100644 ---- a/revision.c -+++ b/revision.c -@@ -360,20 +360,18 @@ static struct object *get_reference(struct rev_info *revs, const char *name, - unsigned int flags) - { - struct object *object; -+ struct commit *commit; - - /* -- * If the repository has commit graphs, repo_parse_commit() avoids -- * reading the object buffer, so use it whenever possible. -+ * If the repository has commit graphs, we try to opportunistically -+ * look up the object ID in those graphs. Like this, we can avoid -+ * parsing commit data from disk. - */ -- if (oid_object_info(revs->repo, oid, NULL) == OBJ_COMMIT) { -- struct commit *c = lookup_commit(revs->repo, oid); -- if (!repo_parse_commit(revs->repo, c)) -- object = (struct object *) c; -- else -- object = NULL; -- } else { -+ commit = lookup_commit_in_graph(revs->repo, oid); -+ if (commit) -+ object = &commit->object; -+ else - object = parse_object(revs->repo, oid); -- } - - if (!object) { - if (revs->ignore_missing) --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0007-fetch-skip-formatting-updated-refs-with-quiet.patch b/_support/git-patches/v2.33.1.gl3/0007-fetch-skip-formatting-updated-refs-with-quiet.patch deleted file mode 100644 index 04c7f382a..000000000 --- a/_support/git-patches/v2.33.1.gl3/0007-fetch-skip-formatting-updated-refs-with-quiet.patch +++ /dev/null @@ -1,107 +0,0 @@ -From f6bb64df82ddd050894ca8a2a0bfbd1997602500 Mon Sep 17 00:00:00 2001 -Message-Id: <f6bb64df82ddd050894ca8a2a0bfbd1997602500.1631166264.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Mon, 30 Aug 2021 12:54:26 +0200 -Subject: [PATCH] fetch: skip formatting updated refs with `--quiet` -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When fetching, Git will by default print a list of all updated refs in a -nicely formatted table. In order to come up with this table, Git needs -to iterate refs twice: first to determine the maximum column width, and -a second time to actually format these changed refs. - -While this table will not be printed in case the user passes `--quiet`, -we still go out of our way and do all these steps. In fact, we even do -more work compared to not passing `--quiet`: without the flag, we will -skip all references in the column width computation which have not been -updated, but if it is set we will now compute widths for all refs. - -Fix this issue by completely skipping both preparation of the format and -formatting data for display in case the user passes `--quiet`, improving -performance especially with many refs. The following benchmark shows a -nice speedup for a quiet mirror-fetch in a repository with 2.3M refs: - - Benchmark #1: HEAD~: git-fetch - Time (mean ± σ): 26.929 s ± 0.145 s [User: 24.194 s, System: 4.656 s] - Range (min … max): 26.692 s … 27.068 s 5 runs - - Benchmark #2: HEAD: git-fetch - Time (mean ± σ): 25.189 s ± 0.094 s [User: 22.556 s, System: 4.606 s] - Range (min … max): 25.070 s … 25.314 s 5 runs - - Summary - 'HEAD: git-fetch' ran - 1.07 ± 0.01 times faster than 'HEAD~: git-fetch' - -While at it, this patch also fixes `adjust_refcol_width()` such that it -skips unchanged refs in case the user passed `--quiet`, where verbosity -will be negative. While this function won't be called anymore if so, -this brings the comment in line with actual code. Furthermore, needless -`verbosity >= 0` checks are now removed in `store_updated_refs()`: we -never print to the `note` buffer anymore in case `verbosity < 0`, so we -won't end up in that code block anyway. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index 25740c13df..334bc7efa6 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -712,7 +712,7 @@ static void adjust_refcol_width(const struct ref *ref) - int max, rlen, llen, len; - - /* uptodate lines are only shown on high verbosity level */ -- if (!verbosity && oideq(&ref->peer_ref->old_oid, &ref->old_oid)) -+ if (verbosity <= 0 && oideq(&ref->peer_ref->old_oid, &ref->old_oid)) - return; - - max = term_columns(); -@@ -748,6 +748,9 @@ static void prepare_format_display(struct ref *ref_map) - struct ref *rm; - const char *format = "full"; - -+ if (verbosity < 0) -+ return; -+ - git_config_get_string_tmp("fetch.output", &format); - if (!strcasecmp(format, "full")) - compact_format = 0; -@@ -827,7 +830,12 @@ static void format_display(struct strbuf *display, char code, - const char *remote, const char *local, - int summary_width) - { -- int width = (summary_width + strlen(summary) - gettext_width(summary)); -+ int width; -+ -+ if (verbosity < 0) -+ return; -+ -+ width = (summary_width + strlen(summary) - gettext_width(summary)); - - strbuf_addf(display, "%c %-*s ", code, width, summary); - if (!compact_format) -@@ -1202,13 +1210,12 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, - "FETCH_HEAD", summary_width); - } - if (note.len) { -- if (verbosity >= 0 && !shown_url) { -+ if (!shown_url) { - fprintf(stderr, _("From %.*s\n"), - url_len, url); - shown_url = 1; - } -- if (verbosity >= 0) -- fprintf(stderr, " %s\n", note.buf); -+ fprintf(stderr, " %s\n", note.buf); - } - } - } --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0008-fetch-speed-up-lookup-of-want-refs-via-commit-graph.patch b/_support/git-patches/v2.33.1.gl3/0008-fetch-speed-up-lookup-of-want-refs-via-commit-graph.patch deleted file mode 100644 index 2b8505d6e..000000000 --- a/_support/git-patches/v2.33.1.gl3/0008-fetch-speed-up-lookup-of-want-refs-via-commit-graph.patch +++ /dev/null @@ -1,95 +0,0 @@ -From fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692 Mon Sep 17 00:00:00 2001 -Message-Id: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:09:41 +0200 -Subject: [PATCH 08/14] fetch: speed up lookup of want refs via commit-graph -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When updating our local refs based on the refs fetched from the remote, -we need to iterate through all requested refs and load their respective -commits such that we can determine whether they need to be appended to -FETCH_HEAD or not. In cases where we're fetching from a remote with -exceedingly many refs, resolving these refs can be quite expensive given -that we repeatedly need to unpack object headers for each of the -referenced objects. - -Speed this up by opportunistically trying to resolve object IDs via the -commit graph. We only do so for any refs which are not in "refs/tags": -more likely than not, these are going to be a commit anyway, and this -lets us avoid having to unpack object headers completely in case the -object is a commit that is part of the commit-graph. This significantly -speeds up mirror-fetches in a real-world repository with -2.3M refs: - - Benchmark #1: HEAD~: git-fetch - Time (mean ± σ): 56.482 s ± 0.384 s [User: 53.340 s, System: 5.365 s] - Range (min … max): 56.050 s … 57.045 s 5 runs - - Benchmark #2: HEAD: git-fetch - Time (mean ± σ): 33.727 s ± 0.170 s [User: 30.252 s, System: 5.194 s] - Range (min … max): 33.452 s … 33.871 s 5 runs - - Summary - 'HEAD: git-fetch' ran - 1.67 ± 0.01 times faster than 'HEAD~: git-fetch' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 24 ++++++++++++++++++------ - 1 file changed, 18 insertions(+), 6 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index e064687dbd..91d1301613 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -1074,7 +1074,6 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, - int connectivity_checked, struct ref *ref_map) - { - struct fetch_head fetch_head; -- struct commit *commit; - int url_len, i, rc = 0; - struct strbuf note = STRBUF_INIT, err = STRBUF_INIT; - struct ref_transaction *transaction = NULL; -@@ -1122,6 +1121,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, - want_status <= FETCH_HEAD_IGNORE; - want_status++) { - for (rm = ref_map; rm; rm = rm->next) { -+ struct commit *commit = NULL; - struct ref *ref = NULL; - - if (rm->status == REF_STATUS_REJECT_SHALLOW) { -@@ -1131,11 +1131,23 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, - continue; - } - -- commit = lookup_commit_reference_gently(the_repository, -- &rm->old_oid, -- 1); -- if (!commit) -- rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; -+ /* -+ * References in "refs/tags/" are often going to point -+ * to annotated tags, which are not part of the -+ * commit-graph. We thus only try to look up refs in -+ * the graph which are not in that namespace to not -+ * regress performance in repositories with many -+ * annotated tags. -+ */ -+ if (!starts_with(rm->name, "refs/tags/")) -+ commit = lookup_commit_in_graph(the_repository, &rm->old_oid); -+ if (!commit) { -+ commit = lookup_commit_reference_gently(the_repository, -+ &rm->old_oid, -+ 1); -+ if (!commit) -+ rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; -+ } - - if (rm->fetch_head_status != want_status) - continue; --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0009-fetch-avoid-unpacking-headers-in-object-existence-ch.patch b/_support/git-patches/v2.33.1.gl3/0009-fetch-avoid-unpacking-headers-in-object-existence-ch.patch deleted file mode 100644 index 2d35aaebd..000000000 --- a/_support/git-patches/v2.33.1.gl3/0009-fetch-avoid-unpacking-headers-in-object-existence-ch.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 47c61004c7cfbb8662b13fac813b45e3fd214665 Mon Sep 17 00:00:00 2001 -Message-Id: <47c61004c7cfbb8662b13fac813b45e3fd214665.1631166322.git.ps@pks.im> -In-Reply-To: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -References: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:09:45 +0200 -Subject: [PATCH 09/14] fetch: avoid unpacking headers in object existence - check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When updating local refs after the fetch has transferred all objects, we -do an object existence test as a safety guard to avoid updating a ref to -an object which we don't have. We do so via `oid_object_info()`: if it -returns an error, then we know the object does not exist. - -One side effect of `oid_object_info()` is that it parses the object's -type, and to do so it must unpack the object header. This is completely -pointless: we don't care for the type, but only want to assert that the -object exists. - -Refactor the code to use `repo_has_object_file()`, which both makes the -code's intent clearer and is also faster because it does not unpack -object headers. In a real-world repo with 2.3M refs, this results in a -small speedup when doing a mirror-fetch: - - Benchmark #1: HEAD~: git-fetch - Time (mean ± σ): 33.686 s ± 0.176 s [User: 30.119 s, System: 5.262 s] - Range (min … max): 33.512 s … 33.944 s 5 runs - - Benchmark #2: HEAD: git-fetch - Time (mean ± σ): 31.247 s ± 0.195 s [User: 28.135 s, System: 5.066 s] - Range (min … max): 30.948 s … 31.472 s 5 runs - - Summary - 'HEAD: git-fetch' ran - 1.08 ± 0.01 times faster than 'HEAD~: git-fetch' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index 91d1301613..01513e6aea 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -846,13 +846,11 @@ static int update_local_ref(struct ref *ref, - int summary_width) - { - struct commit *current = NULL, *updated; -- enum object_type type; - struct branch *current_branch = branch_get(NULL); - const char *pretty_ref = prettify_refname(ref->name); - int fast_forward = 0; - -- type = oid_object_info(the_repository, &ref->new_oid, NULL); -- if (type < 0) -+ if (!repo_has_object_file(the_repository, &ref->new_oid)) - die(_("object %s not found"), oid_to_hex(&ref->new_oid)); - - if (oideq(&ref->old_oid, &ref->new_oid)) { --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0010-connected-refactor-iterator-to-return-next-object-ID.patch b/_support/git-patches/v2.33.1.gl3/0010-connected-refactor-iterator-to-return-next-object-ID.patch deleted file mode 100644 index 872b92784..000000000 --- a/_support/git-patches/v2.33.1.gl3/0010-connected-refactor-iterator-to-return-next-object-ID.patch +++ /dev/null @@ -1,260 +0,0 @@ -From 9fec7b213045135655354e864d15894175428d5a Mon Sep 17 00:00:00 2001 -Message-Id: <9fec7b213045135655354e864d15894175428d5a.1631166322.git.ps@pks.im> -In-Reply-To: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -References: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:09:50 +0200 -Subject: [PATCH 10/14] connected: refactor iterator to return next object ID - directly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The object ID iterator used by the connectivity checks returns the next -object ID via an out-parameter and then uses a return code to indicate -whether an item was found. This is a bit roundabout: instead of a -separate error code, we can just return the next object ID directly and -use `NULL` pointers as indicator that the iterator got no items left. -Furthermore, this avoids a copy of the object ID. - -Refactor the iterator and all its implementations to return object IDs -directly. This brings a tiny performance improvement when doing a mirror-fetch of a repository with about 2.3M refs: - - Benchmark #1: 328dc58b49919c43897240f2eabfa30be2ce32a4~: git-fetch - Time (mean ± σ): 30.110 s ± 0.148 s [User: 27.161 s, System: 5.075 s] - Range (min … max): 29.934 s … 30.406 s 10 runs - - Benchmark #2: 328dc58b49919c43897240f2eabfa30be2ce32a4: git-fetch - Time (mean ± σ): 29.899 s ± 0.109 s [User: 26.916 s, System: 5.104 s] - Range (min … max): 29.696 s … 29.996 s 10 runs - - Summary - '328dc58b49919c43897240f2eabfa30be2ce32a4: git-fetch' ran - 1.01 ± 0.01 times faster than '328dc58b49919c43897240f2eabfa30be2ce32a4~: git-fetch' - -While this 1% speedup could be labelled as statistically insignificant, -the speedup is consistent on my machine. Furthermore, this is an end to -end test, so it is expected that the improvement in the connectivity -check itself is more significant. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/clone.c | 8 +++----- - builtin/fetch.c | 7 +++---- - builtin/receive-pack.c | 17 +++++++---------- - connected.c | 15 ++++++++------- - connected.h | 2 +- - fetch-pack.c | 7 +++---- - 6 files changed, 25 insertions(+), 31 deletions(-) - -diff --git a/builtin/clone.c b/builtin/clone.c -index 66fe66679c..4a1056fcc2 100644 ---- a/builtin/clone.c -+++ b/builtin/clone.c -@@ -657,7 +657,7 @@ static void write_followtags(const struct ref *refs, const char *msg) - } - } - --static int iterate_ref_map(void *cb_data, struct object_id *oid) -+static const struct object_id *iterate_ref_map(void *cb_data) - { - struct ref **rm = cb_data; - struct ref *ref = *rm; -@@ -668,13 +668,11 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid) - */ - while (ref && !ref->peer_ref) - ref = ref->next; -- /* Returning -1 notes "end of list" to the caller. */ - if (!ref) -- return -1; -+ return NULL; - -- oidcpy(oid, &ref->old_oid); - *rm = ref->next; -- return 0; -+ return &ref->old_oid; - } - - static void update_remote_refs(const struct ref *refs, -diff --git a/builtin/fetch.c b/builtin/fetch.c -index 01513e6aea..cdf0d0d671 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -962,7 +962,7 @@ static int update_local_ref(struct ref *ref, - } - } - --static int iterate_ref_map(void *cb_data, struct object_id *oid) -+static const struct object_id *iterate_ref_map(void *cb_data) - { - struct ref **rm = cb_data; - struct ref *ref = *rm; -@@ -970,10 +970,9 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid) - while (ref && ref->status == REF_STATUS_REJECT_SHALLOW) - ref = ref->next; - if (!ref) -- return -1; /* end of the list */ -+ return NULL; - *rm = ref->next; -- oidcpy(oid, &ref->old_oid); -- return 0; -+ return &ref->old_oid; - } - - struct fetch_head { -diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c -index 2d1f97e1ca..041e915454 100644 ---- a/builtin/receive-pack.c -+++ b/builtin/receive-pack.c -@@ -1306,7 +1306,7 @@ static void refuse_unconfigured_deny_delete_current(void) - rp_error("%s", _(refuse_unconfigured_deny_delete_current_msg)); - } - --static int command_singleton_iterator(void *cb_data, struct object_id *oid); -+static const struct object_id *command_singleton_iterator(void *cb_data); - static int update_shallow_ref(struct command *cmd, struct shallow_info *si) - { - struct shallow_lock shallow_lock = SHALLOW_LOCK_INIT; -@@ -1731,16 +1731,15 @@ static void check_aliased_updates(struct command *commands) - string_list_clear(&ref_list, 0); - } - --static int command_singleton_iterator(void *cb_data, struct object_id *oid) -+static const struct object_id *command_singleton_iterator(void *cb_data) - { - struct command **cmd_list = cb_data; - struct command *cmd = *cmd_list; - - if (!cmd || is_null_oid(&cmd->new_oid)) -- return -1; /* end of list */ -+ return NULL; - *cmd_list = NULL; /* this returns only one */ -- oidcpy(oid, &cmd->new_oid); -- return 0; -+ return &cmd->new_oid; - } - - static void set_connectivity_errors(struct command *commands, -@@ -1770,7 +1769,7 @@ struct iterate_data { - struct shallow_info *si; - }; - --static int iterate_receive_command_list(void *cb_data, struct object_id *oid) -+static const struct object_id *iterate_receive_command_list(void *cb_data) - { - struct iterate_data *data = cb_data; - struct command **cmd_list = &data->cmds; -@@ -1781,13 +1780,11 @@ static int iterate_receive_command_list(void *cb_data, struct object_id *oid) - /* to be checked in update_shallow_ref() */ - continue; - if (!is_null_oid(&cmd->new_oid) && !cmd->skip_update) { -- oidcpy(oid, &cmd->new_oid); - *cmd_list = cmd->next; -- return 0; -+ return &cmd->new_oid; - } - } -- *cmd_list = NULL; -- return -1; /* end of list */ -+ return NULL; - } - - static void reject_updates_to_hidden(struct command *commands) -diff --git a/connected.c b/connected.c -index b18299fdf0..35bd4a2638 100644 ---- a/connected.c -+++ b/connected.c -@@ -24,7 +24,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, - struct child_process rev_list = CHILD_PROCESS_INIT; - FILE *rev_list_in; - struct check_connected_options defaults = CHECK_CONNECTED_INIT; -- struct object_id oid; -+ const struct object_id *oid; - int err = 0; - struct packed_git *new_pack = NULL; - struct transport *transport; -@@ -34,7 +34,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data, - opt = &defaults; - transport = opt->transport; - -- if (fn(cb_data, &oid)) { -+ oid = fn(cb_data); -+ if (!oid) { - if (opt->err_fd) - close(opt->err_fd); - return err; -@@ -73,7 +74,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, - for (p = get_all_packs(the_repository); p; p = p->next) { - if (!p->pack_promisor) - continue; -- if (find_pack_entry_one(oid.hash, p)) -+ if (find_pack_entry_one(oid->hash, p)) - goto promisor_pack_found; - } - /* -@@ -83,7 +84,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, - goto no_promisor_pack_found; - promisor_pack_found: - ; -- } while (!fn(cb_data, &oid)); -+ } while ((oid = fn(cb_data)) != NULL); - return 0; - } - -@@ -132,12 +133,12 @@ int check_connected(oid_iterate_fn fn, void *cb_data, - * are sure the ref is good and not sending it to - * rev-list for verification. - */ -- if (new_pack && find_pack_entry_one(oid.hash, new_pack)) -+ if (new_pack && find_pack_entry_one(oid->hash, new_pack)) - continue; - -- if (fprintf(rev_list_in, "%s\n", oid_to_hex(&oid)) < 0) -+ if (fprintf(rev_list_in, "%s\n", oid_to_hex(oid)) < 0) - break; -- } while (!fn(cb_data, &oid)); -+ } while ((oid = fn(cb_data)) != NULL); - - if (ferror(rev_list_in) || fflush(rev_list_in)) { - if (errno != EPIPE && errno != EINVAL) -diff --git a/connected.h b/connected.h -index 8d5a6b3ad6..6e59c92aa3 100644 ---- a/connected.h -+++ b/connected.h -@@ -9,7 +9,7 @@ struct transport; - * When called after returning the name for the last object, return -1 - * to signal EOF, otherwise return 0. - */ --typedef int (*oid_iterate_fn)(void *, struct object_id *oid); -+typedef const struct object_id *(*oid_iterate_fn)(void *); - - /* - * Named-arguments struct for check_connected. All arguments are -diff --git a/fetch-pack.c b/fetch-pack.c -index 0bf7ed7e47..e6ec79f81a 100644 ---- a/fetch-pack.c -+++ b/fetch-pack.c -@@ -1912,16 +1912,15 @@ static void update_shallow(struct fetch_pack_args *args, - oid_array_clear(&ref); - } - --static int iterate_ref_map(void *cb_data, struct object_id *oid) -+static const struct object_id *iterate_ref_map(void *cb_data) - { - struct ref **rm = cb_data; - struct ref *ref = *rm; - - if (!ref) -- return -1; /* end of the list */ -+ return NULL; - *rm = ref->next; -- oidcpy(oid, &ref->old_oid); -- return 0; -+ return &ref->old_oid; - } - - struct ref *fetch_pack(struct fetch_pack_args *args, --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0011-fetch-pack-optimize-loading-of-refs-via-commit-graph.patch b/_support/git-patches/v2.33.1.gl3/0011-fetch-pack-optimize-loading-of-refs-via-commit-graph.patch deleted file mode 100644 index f0b9af0c1..000000000 --- a/_support/git-patches/v2.33.1.gl3/0011-fetch-pack-optimize-loading-of-refs-via-commit-graph.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 62b5a35a33ad6a4537e2ae75a49036e4173fcc87 Mon Sep 17 00:00:00 2001 -Message-Id: <62b5a35a33ad6a4537e2ae75a49036e4173fcc87.1631166322.git.ps@pks.im> -In-Reply-To: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -References: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:09:54 +0200 -Subject: [PATCH 11/14] fetch-pack: optimize loading of refs via commit graph -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In order to negotiate a packfile, we need to dereference refs to see -which commits we have in common with the remote. To do so, we first look -up the object's type -- if it's a tag, we peel until we hit a non-tag -object. If we hit a commit eventually, then we return that commit. - -In case the object ID points to a commit directly, we can avoid the -initial lookup of the object type by opportunistically looking up the -commit via the commit-graph, if available, which gives us a slight speed -bump of about 2% in a huge repository with about 2.3M refs: - - Benchmark #1: HEAD~: git-fetch - Time (mean ± σ): 31.634 s ± 0.258 s [User: 28.400 s, System: 5.090 s] - Range (min … max): 31.280 s … 31.896 s 5 runs - - Benchmark #2: HEAD: git-fetch - Time (mean ± σ): 31.129 s ± 0.543 s [User: 27.976 s, System: 5.056 s] - Range (min … max): 30.172 s … 31.479 s 5 runs - - Summary - 'HEAD: git-fetch' ran - 1.02 ± 0.02 times faster than 'HEAD~: git-fetch' - -In case this fails, we fall back to the old code which peels the -objects to a commit. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - fetch-pack.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/fetch-pack.c b/fetch-pack.c -index e6ec79f81a..a9604f35a3 100644 ---- a/fetch-pack.c -+++ b/fetch-pack.c -@@ -119,6 +119,11 @@ static struct commit *deref_without_lazy_fetch(const struct object_id *oid, - { - enum object_type type; - struct object_info info = { .typep = &type }; -+ struct commit *commit; -+ -+ commit = lookup_commit_in_graph(the_repository, oid); -+ if (commit) -+ return commit; - - while (1) { - if (oid_object_info_extended(the_repository, oid, &info, --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0012-fetch-refactor-fetch-refs-to-be-more-extendable.patch b/_support/git-patches/v2.33.1.gl3/0012-fetch-refactor-fetch-refs-to-be-more-extendable.patch deleted file mode 100644 index e2ef70836..000000000 --- a/_support/git-patches/v2.33.1.gl3/0012-fetch-refactor-fetch-refs-to-be-more-extendable.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 284b2ce8fcb100e7194b9cca6d9b99bca7da39b6 Mon Sep 17 00:00:00 2001 -Message-Id: <284b2ce8fcb100e7194b9cca6d9b99bca7da39b6.1631166322.git.ps@pks.im> -In-Reply-To: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -References: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:09:58 +0200 -Subject: [PATCH 12/14] fetch: refactor fetch refs to be more extendable - -Refactor `fetch_refs()` code to make it more extendable by explicitly -handling error cases. The refactored code should behave the same. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 24 +++++++++++++++++------- - 1 file changed, 17 insertions(+), 7 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index cdf0d0d671..ef6f9b3a33 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -1293,18 +1293,28 @@ static int check_exist_and_connected(struct ref *ref_map) - - static int fetch_refs(struct transport *transport, struct ref *ref_map) - { -- int ret = check_exist_and_connected(ref_map); -+ int ret; -+ -+ /* -+ * We don't need to perform a fetch in case we can already satisfy all -+ * refs. -+ */ -+ ret = check_exist_and_connected(ref_map); - if (ret) { - trace2_region_enter("fetch", "fetch_refs", the_repository); - ret = transport_fetch_refs(transport, ref_map); - trace2_region_leave("fetch", "fetch_refs", the_repository); -+ if (ret) -+ goto out; - } -- if (!ret) -- /* -- * Keep the new pack's ".keep" file around to allow the caller -- * time to update refs to reference the new objects. -- */ -- return 0; -+ -+ /* -+ * Keep the new pack's ".keep" file around to allow the caller -+ * time to update refs to reference the new objects. -+ */ -+ return ret; -+ -+out: - transport_unlock_pack(transport); - return ret; - } --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0013-fetch-merge-fetching-and-consuming-refs.patch b/_support/git-patches/v2.33.1.gl3/0013-fetch-merge-fetching-and-consuming-refs.patch deleted file mode 100644 index ba792717c..000000000 --- a/_support/git-patches/v2.33.1.gl3/0013-fetch-merge-fetching-and-consuming-refs.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 1c7d1ab6f4a79e44406f304ec01b0a143dae9abb Mon Sep 17 00:00:00 2001 -Message-Id: <1c7d1ab6f4a79e44406f304ec01b0a143dae9abb.1631166322.git.ps@pks.im> -In-Reply-To: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -References: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:10:02 +0200 -Subject: [PATCH 13/14] fetch: merge fetching and consuming refs - -The functions `fetch_refs()` and `consume_refs()` must always be called -together such that we first obtain all missing objects and then update -our local refs to match the remote refs. In a subsequent patch, we'll -further require that `fetch_refs()` must always be called before -`consume_refs()` such that it can correctly assert that we have all -objects after the fetch given that we're about to move the connectivity -check. - -Make this requirement explicit by merging both functions into a single -`fetch_and_consume_refs()` function. - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 30 +++++++++--------------------- - 1 file changed, 9 insertions(+), 21 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index ef6f9b3a33..a1e17edd8b 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -1291,8 +1291,9 @@ static int check_exist_and_connected(struct ref *ref_map) - return check_connected(iterate_ref_map, &rm, &opt); - } - --static int fetch_refs(struct transport *transport, struct ref *ref_map) -+static int fetch_and_consume_refs(struct transport *transport, struct ref *ref_map) - { -+ int connectivity_checked; - int ret; - - /* -@@ -1308,30 +1309,18 @@ static int fetch_refs(struct transport *transport, struct ref *ref_map) - goto out; - } - -- /* -- * Keep the new pack's ".keep" file around to allow the caller -- * time to update refs to reference the new objects. -- */ -- return ret; -- --out: -- transport_unlock_pack(transport); -- return ret; --} -- --/* Update local refs based on the ref values fetched from a remote */ --static int consume_refs(struct transport *transport, struct ref *ref_map) --{ -- int connectivity_checked = transport->smart_options -+ connectivity_checked = transport->smart_options - ? transport->smart_options->connectivity_checked : 0; -- int ret; -+ - trace2_region_enter("fetch", "consume_refs", the_repository); - ret = store_updated_refs(transport->url, - transport->remote->name, - connectivity_checked, - ref_map); -- transport_unlock_pack(transport); - trace2_region_leave("fetch", "consume_refs", the_repository); -+ -+out: -+ transport_unlock_pack(transport); - return ret; - } - -@@ -1520,8 +1509,7 @@ static void backfill_tags(struct transport *transport, struct ref *ref_map) - transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL); - transport_set_option(transport, TRANS_OPT_DEPTH, "0"); - transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL); -- if (!fetch_refs(transport, ref_map)) -- consume_refs(transport, ref_map); -+ fetch_and_consume_refs(transport, ref_map); - - if (gsecondary) { - transport_disconnect(gsecondary); -@@ -1612,7 +1600,7 @@ static int do_fetch(struct transport *transport, - transport->url); - } - } -- if (fetch_refs(transport, ref_map) || consume_refs(transport, ref_map)) { -+ if (fetch_and_consume_refs(transport, ref_map)) { - free_refs(ref_map); - retcode = 1; - goto cleanup; --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0014-fetch-avoid-second-connectivity-check-if-we-already-.patch b/_support/git-patches/v2.33.1.gl3/0014-fetch-avoid-second-connectivity-check-if-we-already-.patch deleted file mode 100644 index 89a16dfa0..000000000 --- a/_support/git-patches/v2.33.1.gl3/0014-fetch-avoid-second-connectivity-check-if-we-already-.patch +++ /dev/null @@ -1,78 +0,0 @@ -From caff8b73402d4b5edb2c6c755506c5a90351b69a Mon Sep 17 00:00:00 2001 -Message-Id: <caff8b73402d4b5edb2c6c755506c5a90351b69a.1631166322.git.ps@pks.im> -In-Reply-To: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -References: <fe7df03a9a2fa434ebce38b2cd5e6da42f8b2692.1631166322.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Wed, 1 Sep 2021 15:10:06 +0200 -Subject: [PATCH 14/14] fetch: avoid second connectivity check if we already - have all objects -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When fetching refs, we are doing two connectivity checks: - - - The first one is done such that we can skip fetching refs in the - case where we already have all objects referenced by the updated - set of refs. - - - The second one verifies that we have all objects after we have - fetched objects. - -We always execute both connectivity checks, but this is wasteful in case -the first connectivity check already notices that we have all objects -locally available. - -Skip the second connectivity check in case we already had all objects -available. This gives us a nice speedup when doing a mirror-fetch in a -repository with about 2.3M refs where the fetching repo already has all -objects: - - Benchmark #1: HEAD~: git-fetch - Time (mean ± σ): 30.025 s ± 0.081 s [User: 27.070 s, System: 4.933 s] - Range (min … max): 29.900 s … 30.111 s 5 runs - - Benchmark #2: HEAD: git-fetch - Time (mean ± σ): 25.574 s ± 0.177 s [User: 22.855 s, System: 4.683 s] - Range (min … max): 25.399 s … 25.765 s 5 runs - - Summary - 'HEAD: git-fetch' ran - 1.17 ± 0.01 times faster than 'HEAD~: git-fetch' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index a1e17edd8b..e2c952ec67 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -1293,7 +1293,7 @@ static int check_exist_and_connected(struct ref *ref_map) - - static int fetch_and_consume_refs(struct transport *transport, struct ref *ref_map) - { -- int connectivity_checked; -+ int connectivity_checked = 1; - int ret; - - /* -@@ -1307,11 +1307,10 @@ static int fetch_and_consume_refs(struct transport *transport, struct ref *ref_m - trace2_region_leave("fetch", "fetch_refs", the_repository); - if (ret) - goto out; -+ connectivity_checked = transport->smart_options ? -+ transport->smart_options->connectivity_checked : 0; - } - -- connectivity_checked = transport->smart_options -- ? transport->smart_options->connectivity_checked : 0; -- - trace2_region_enter("fetch", "consume_refs", the_repository); - ret = store_updated_refs(transport->url, - transport->remote->name, --- -2.33.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0016-pkt-line-add-stdio-packet-write-functions.patch b/_support/git-patches/v2.33.1.gl3/0016-pkt-line-add-stdio-packet-write-functions.patch deleted file mode 100644 index ea4e221f3..000000000 --- a/_support/git-patches/v2.33.1.gl3/0016-pkt-line-add-stdio-packet-write-functions.patch +++ /dev/null @@ -1,126 +0,0 @@ -From c7e2dd1a100170e1dbd204be68d54c0e230113df Mon Sep 17 00:00:00 2001 -From: Jacob Vosmaer <jacob@gitlab.com> -Date: Wed, 1 Sep 2021 14:54:41 +0200 -Subject: [PATCH 16/17] pkt-line: add stdio packet write functions - -This adds three new functions to pkt-line.c: packet_fwrite, -packet_fwrite_fmt and packet_fflush. Besides writing a pktline flush -packet, packet_fflush also flushes the stdio buffer of the stream. - -Helped-by: Patrick Steinhardt <ps@pks.im> -Helped-by: Jeff King <peff@peff.net> -Signed-off-by: Jacob Vosmaer <jacob@gitlab.com> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - cache.h | 2 ++ - pkt-line.c | 37 +++++++++++++++++++++++++++++++++++++ - pkt-line.h | 11 +++++++++++ - write-or-die.c | 12 ++++++++++++ - 4 files changed, 62 insertions(+) - -diff --git a/cache.h b/cache.h -index bd4869beee..dcf2454c3b 100644 ---- a/cache.h -+++ b/cache.h -@@ -1736,6 +1736,8 @@ extern const char *git_mailmap_blob; - void maybe_flush_or_die(FILE *, const char *); - __attribute__((format (printf, 2, 3))) - void fprintf_or_die(FILE *, const char *fmt, ...); -+void fwrite_or_die(FILE *f, const void *buf, size_t count); -+void fflush_or_die(FILE *f); - - #define COPY_READ_ERROR (-2) - #define COPY_WRITE_ERROR (-3) -diff --git a/pkt-line.c b/pkt-line.c -index 9f63eae2e6..de4a94b437 100644 ---- a/pkt-line.c -+++ b/pkt-line.c -@@ -243,6 +243,43 @@ void packet_write(int fd_out, const char *buf, size_t size) - die("%s", err.buf); - } - -+void packet_fwrite(FILE *f, const char *buf, size_t size) -+{ -+ size_t packet_size; -+ char header[4]; -+ -+ if (size > LARGE_PACKET_DATA_MAX) -+ die(_("packet write failed - data exceeds max packet size")); -+ -+ packet_trace(buf, size, 1); -+ packet_size = size + 4; -+ -+ set_packet_header(header, packet_size); -+ fwrite_or_die(f, header, 4); -+ fwrite_or_die(f, buf, size); -+} -+ -+void packet_fwrite_fmt(FILE *fh, const char *fmt, ...) -+{ -+ static struct strbuf buf = STRBUF_INIT; -+ va_list args; -+ -+ strbuf_reset(&buf); -+ -+ va_start(args, fmt); -+ format_packet(&buf, "", fmt, args); -+ va_end(args); -+ -+ fwrite_or_die(fh, buf.buf, buf.len); -+} -+ -+void packet_fflush(FILE *f) -+{ -+ packet_trace("0000", 4, 1); -+ fwrite_or_die(f, "0000", 4); -+ fflush_or_die(f); -+} -+ - void packet_buf_write(struct strbuf *buf, const char *fmt, ...) - { - va_list args; -diff --git a/pkt-line.h b/pkt-line.h -index 5af5f45687..82b95e4bdd 100644 ---- a/pkt-line.h -+++ b/pkt-line.h -@@ -35,6 +35,17 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format - int write_packetized_from_fd_no_flush(int fd_in, int fd_out); - int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out); - -+/* -+ * Stdio versions of packet_write functions. When mixing these with fd -+ * based functions, take care to call fflush(3) before doing fd writes or -+ * closing the fd. -+ */ -+void packet_fwrite(FILE *f, const char *buf, size_t size); -+void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3))); -+ -+/* packet_fflush writes a flush packet and flushes the stdio buffer of f */ -+void packet_fflush(FILE *f); -+ - /* - * Read a packetized line into the buffer, which must be at least size bytes - * long. The return value specifies the number of bytes read into the buffer. -diff --git a/write-or-die.c b/write-or-die.c -index d33e68f6ab..0b1ec8190b 100644 ---- a/write-or-die.c -+++ b/write-or-die.c -@@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count) - die_errno("write error"); - } - } -+ -+void fwrite_or_die(FILE *f, const void *buf, size_t count) -+{ -+ if (fwrite(buf, 1, count, f) != count) -+ die_errno("fwrite error"); -+} -+ -+void fflush_or_die(FILE *f) -+{ -+ if (fflush(f)) -+ die_errno("fflush error"); -+} --- -2.32.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch b/_support/git-patches/v2.33.1.gl3/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch deleted file mode 100644 index 83db05f80..000000000 --- a/_support/git-patches/v2.33.1.gl3/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 66949334dbd9e0d36b0fa8e50ef120aa88e7c094 Mon Sep 17 00:00:00 2001 -From: Jacob Vosmaer <jacob@gitlab.com> -Date: Wed, 1 Sep 2021 14:54:42 +0200 -Subject: [PATCH 17/17] upload-pack: use stdio in send_ref callbacks - -In both protocol v0 and v2, upload-pack writes one pktline packet per -advertised ref to stdout. That means one or two write(2) syscalls per -ref. This is problematic if these writes become network sends with -high overhead. - -This commit changes both send_ref callbacks to use buffered IO using -stdio. - -To give an example of the impact: I set up a single-threaded loop that -calls ls-remote (with HTTP and protocol v2) on a local GitLab -instance, on a repository with 11K refs. When I switch from Git -v2.32.0 to this patch, I see a 40% reduction in CPU time for Git, and -65% for Gitaly (GitLab's Git RPC service). - -So using buffered IO not only saves syscalls in upload-pack, it also -saves time in things that consume upload-pack's output. - -Helped-by: Jeff King <peff@peff.net> -Signed-off-by: Jacob Vosmaer <jacob@gitlab.com> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - ls-refs.c | 4 ++-- - upload-pack.c | 11 ++++++++--- - 2 files changed, 10 insertions(+), 5 deletions(-) - -diff --git a/ls-refs.c b/ls-refs.c -index 88f6c3f60d..e6a2dbd962 100644 ---- a/ls-refs.c -+++ b/ls-refs.c -@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid, - } - - strbuf_addch(&refline, '\n'); -- packet_write(1, refline.buf, refline.len); -+ packet_fwrite(stdout, refline.buf, refline.len); - - strbuf_release(&refline); - return 0; -@@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys, - strvec_push(&data.prefixes, ""); - for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v, - send_ref, &data, 0); -- packet_flush(1); -+ packet_fflush(stdout); - strvec_clear(&data.prefixes); - return 0; - } -diff --git a/upload-pack.c b/upload-pack.c -index 297b76fcb4..0ed377b1fb 100644 ---- a/upload-pack.c -+++ b/upload-pack.c -@@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid, - - format_symref_info(&symref_info, &data->symref); - format_session_id(&session_id, data); -- packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", -+ packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", - oid_to_hex(oid), refname_nons, - 0, capabilities, - (data->allow_uor & ALLOW_TIP_SHA1) ? -@@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid, - strbuf_release(&symref_info); - strbuf_release(&session_id); - } else { -- packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons); -+ packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons); - } - capabilities = NULL; - if (!peel_iterated_oid(oid, &peeled)) -- packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); -+ packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); - return 0; - } - -@@ -1348,6 +1348,11 @@ void upload_pack(struct upload_pack_options *options) - reset_timeout(data.timeout); - head_ref_namespaced(send_ref, &data); - for_each_namespaced_ref(send_ref, &data); -+ /* -+ * fflush stdout before calling advertise_shallow_grafts because send_ref -+ * uses stdio. -+ */ -+ fflush_or_die(stdout); - advertise_shallow_grafts(1); - packet_flush(1); - } else { --- -2.32.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0018-upload-pack.c-increase-output-buffer-size.patch b/_support/git-patches/v2.33.1.gl3/0018-upload-pack.c-increase-output-buffer-size.patch deleted file mode 100644 index 6adb09deb..000000000 --- a/_support/git-patches/v2.33.1.gl3/0018-upload-pack.c-increase-output-buffer-size.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 8dee71501ba8dca0872fe3166a0a817c41e14d6e Mon Sep 17 00:00:00 2001 -From: Jacob Vosmaer <jacob@gitlab.com> -Date: Tue, 14 Dec 2021 20:46:26 +0100 -Subject: [PATCH] upload-pack.c: increase output buffer size - -When serving a fetch, git upload-pack copies data from a git -pack-objects stdout pipe to its stdout. This commit increases the size -of the buffer used for that copying from 8192 to 65515, the maximum -sideband-64k packet size. - -Previously, this buffer was allocated on the stack. Because the new -buffer size is nearly 64KB, we switch this to a heap allocation. - -On GitLab.com we use GitLab's pack-objects cache which does writes of -65515 bytes. Because of the default 8KB buffer size, propagating these -cache writes requires 8 pipe reads and 8 pipe writes from -git-upload-pack, and 8 pipe reads from Gitaly (our Git RPC service). -If we increase the size of the buffer to the maximum Git packet size, -we need only 1 pipe read and 1 pipe write in git-upload-pack, and 1 -pipe read in Gitaly to transfer the same amount of data. In benchmarks -with a pure fetch and 100% cache hit rate workload we are seeing CPU -utilization reductions of over 30%. - -Signed-off-by: Jacob Vosmaer <jacob@gitlab.com> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - upload-pack.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/upload-pack.c b/upload-pack.c -index 6ce07231d3..c52f7709d4 100644 ---- a/upload-pack.c -+++ b/upload-pack.c -@@ -194,7 +194,13 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) - } - - struct output_state { -- char buffer[8193]; -+ /* -+ * We do writes no bigger than LARGE_PACKET_DATA_MAX - 1, because with -+ * sideband-64k the band designator takes up 1 byte of space. Because -+ * relay_pack_data keeps the last byte to itself, we make the buffer 1 -+ * byte bigger than the intended maximum write size. -+ */ -+ char buffer[(LARGE_PACKET_DATA_MAX - 1) + 1]; - int used; - unsigned packfile_uris_started : 1; - unsigned packfile_started : 1; -@@ -269,7 +275,7 @@ static void create_pack_file(struct upload_pack_data *pack_data, - const struct string_list *uri_protocols) - { - struct child_process pack_objects = CHILD_PROCESS_INIT; -- struct output_state output_state = { { 0 } }; -+ struct output_state *output_state = xcalloc(1, sizeof(struct output_state)); - char progress[128]; - char abort_msg[] = "aborting due to possible repository " - "corruption on the remote side."; -@@ -404,7 +410,7 @@ static void create_pack_file(struct upload_pack_data *pack_data, - } - if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) { - int result = relay_pack_data(pack_objects.out, -- &output_state, -+ output_state, - pack_data->use_sideband, - !!uri_protocols); - -@@ -438,11 +444,12 @@ static void create_pack_file(struct upload_pack_data *pack_data, - } - - /* flush the data */ -- if (output_state.used > 0) { -- send_client_data(1, output_state.buffer, output_state.used, -+ if (output_state->used > 0) { -+ send_client_data(1, output_state->buffer, output_state->used, - pack_data->use_sideband); - fprintf(stderr, "flushed.\n"); - } -+ free(output_state); - if (pack_data->use_sideband) - packet_flush(1); - return; --- -2.32.0 - diff --git a/_support/git-patches/v2.33.1.gl3/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch b/_support/git-patches/v2.33.1.gl3/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch deleted file mode 100644 index 855bcc193..000000000 --- a/_support/git-patches/v2.33.1.gl3/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 6fd1cc8f985ccd8b014e945a819482b267dae21f Mon Sep 17 00:00:00 2001 -Message-Id: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Thu, 10 Feb 2022 13:28:09 +0100 -Subject: [PATCH 1/2] fetch-pack: use commit-graph when computing cutoff -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -During packfile negotiation we iterate over all refs announced by the -remote side to check whether their IDs refer to commits already known to -us. If a commit is known to us already, then its date is a potential -cutoff point for commits we have in common with the remote side. - -There is potentially a lot of commits announced by the remote depending -on how many refs there are in the remote repository, and for every one -of them we need to search for it in our object database and, if found, -parse the corresponding object to find out whether it is a candidate for -the cutoff date. This can be sped up by trying to look up commits via -the commit-graph first, which is a lot more efficient. - -Benchmarks in a repository with about 2,1 million refs and an up-to-date -commit-graph show an almost 20% speedup when mirror-fetching: - - Benchmark 1: git fetch +refs/*:refs/* (v2.35.0) - Time (mean ± σ): 115.587 s ± 2.009 s [User: 109.874 s, System: 11.305 s] - Range (min … max): 113.584 s … 118.820 s 5 runs - - Benchmark 2: git fetch +refs/*:refs/* (HEAD) - Time (mean ± σ): 96.859 s ± 0.624 s [User: 91.948 s, System: 10.980 s] - Range (min … max): 96.180 s … 97.875 s 5 runs - - Summary - 'git fetch +refs/*:refs/* (HEAD)' ran - 1.19 ± 0.02 times faster than 'git fetch +refs/*:refs/* (v2.35.0)' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - fetch-pack.c | 28 ++++++++++++++++------------ - 1 file changed, 16 insertions(+), 12 deletions(-) - -diff --git a/fetch-pack.c b/fetch-pack.c -index dd6ec449f2..c5967e228e 100644 ---- a/fetch-pack.c -+++ b/fetch-pack.c -@@ -696,26 +696,30 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, - - trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); - for (ref = *refs; ref; ref = ref->next) { -- struct object *o; -+ struct commit *commit; - -- if (!has_object_file_with_flags(&ref->old_oid, -+ commit = lookup_commit_in_graph(the_repository, &ref->old_oid); -+ if (!commit) { -+ struct object *o; -+ -+ if (!has_object_file_with_flags(&ref->old_oid, - OBJECT_INFO_QUICK | -- OBJECT_INFO_SKIP_FETCH_OBJECT)) -- continue; -- o = parse_object(the_repository, &ref->old_oid); -- if (!o) -- continue; -+ OBJECT_INFO_SKIP_FETCH_OBJECT)) -+ continue; -+ o = parse_object(the_repository, &ref->old_oid); -+ if (!o || o->type != OBJ_COMMIT) -+ continue; -+ -+ commit = (struct commit *)o; -+ } - - /* - * We already have it -- which may mean that we were - * in sync with the other side at some time after - * that (it is OK if we guess wrong here). - */ -- if (o->type == OBJ_COMMIT) { -- struct commit *commit = (struct commit *)o; -- if (!cutoff || cutoff < commit->date) -- cutoff = commit->date; -- } -+ if (!cutoff || cutoff < commit->date) -+ cutoff = commit->date; - } - trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); - --- -2.35.1 - diff --git a/_support/git-patches/v2.33.1.gl3/0020-fetch-skip-computing-output-width-when-not-printing-.patch b/_support/git-patches/v2.33.1.gl3/0020-fetch-skip-computing-output-width-when-not-printing-.patch deleted file mode 100644 index 2ef3c109a..000000000 --- a/_support/git-patches/v2.33.1.gl3/0020-fetch-skip-computing-output-width-when-not-printing-.patch +++ /dev/null @@ -1,84 +0,0 @@ -From b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03 Mon Sep 17 00:00:00 2001 -Message-Id: <b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03.1645001444.git.ps@pks.im> -In-Reply-To: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im> -References: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im> -From: Patrick Steinhardt <ps@pks.im> -Date: Thu, 10 Feb 2022 13:28:16 +0100 -Subject: [PATCH 2/2] fetch: skip computing output width when not printing - anything -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When updating references via git-fetch(1), then by default we report to -the user which references have been changed. This output is formatted in -a nice table such that the different columns are aligned. Because the -first column contains abbreviated object IDs we thus need to iterate -over all refs which have changed and compute the minimum length for -their respective abbreviated hashes. While this effort makes sense in -most cases, it is wasteful when the user passes the `--quiet` flag: we -don't print the summary, but still compute the length. - -Skip computing the summary width when the user asked for us to be quiet. -This gives us a speedup of nearly 10% when doing a mirror-fetch in a -repository with thousands of references being updated: - - Benchmark 1: git fetch --quiet +refs/*:refs/* (HEAD~) - Time (mean ± σ): 96.078 s ± 0.508 s [User: 91.378 s, System: 10.870 s] - Range (min … max): 95.449 s … 96.760 s 5 runs - - Benchmark 2: git fetch --quiet +refs/*:refs/* (HEAD) - Time (mean ± σ): 88.214 s ± 0.192 s [User: 83.274 s, System: 10.978 s] - Range (min … max): 87.998 s … 88.446 s 5 runs - - Summary - 'git fetch --quiet +refs/*:refs/* (HEAD)' ran - 1.09 ± 0.01 times faster than 'git fetch --quiet +refs/*:refs/* (HEAD~)' - -Signed-off-by: Patrick Steinhardt <ps@pks.im> -Signed-off-by: Junio C Hamano <gitster@pobox.com> ---- - builtin/fetch.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/builtin/fetch.c b/builtin/fetch.c -index 5b3b18a72f..7ef305c66d 100644 ---- a/builtin/fetch.c -+++ b/builtin/fetch.c -@@ -1094,12 +1094,15 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, - struct ref *rm; - char *url; - int want_status; -- int summary_width = transport_summary_width(ref_map); -+ int summary_width = 0; - - rc = open_fetch_head(&fetch_head); - if (rc) - return -1; - -+ if (verbosity >= 0) -+ summary_width = transport_summary_width(ref_map); -+ - if (raw_url) - url = transport_anonymize_url(raw_url); - else -@@ -1345,7 +1348,6 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, - int url_len, i, result = 0; - struct ref *ref, *stale_refs = get_stale_heads(rs, ref_map); - char *url; -- int summary_width = transport_summary_width(stale_refs); - const char *dangling_msg = dry_run - ? _(" (%s will become dangling)") - : _(" (%s has become dangling)"); -@@ -1374,6 +1376,8 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, - } - - if (verbosity >= 0) { -+ int summary_width = transport_summary_width(stale_refs); -+ - for (ref = stale_refs; ref; ref = ref->next) { - struct strbuf sb = STRBUF_INIT; - if (!shown_url) { --- -2.35.1 - |