From 3ad8de83877f4512fc68d255e93a81bba603952e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 16 Feb 2022 09:54:37 +0100 Subject: git: Backport patches to speed up git-fetch(1) in repos with many refs We have upstreamed two patches for git-fetch(1) which speed up mirror fetches in repositories with hundreds of millions of references. The first patch makes better use of the commit-graph when computing common objects, and the second patch will cause us to skip computing the output width when fetching with the `--quiet` flag. The patches were tested with our notorious problem repo `www-gitlab-com` with speedups of about 25% when doing mirror-fetches. Backport them into our own Git version now that they have been merged via 2b331293fb (Merge branch 'ps/fetch-optim-with-commit-graph' into next, 2022-02-14). Changelog: performance --- ...ck-use-commit-graph-when-computing-cutoff.patch | 92 ++++++++++++++++++++++ ...computing-output-width-when-not-printing-.patch | 84 ++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 _support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch create mode 100644 _support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch (limited to '_support') diff --git a/_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch b/_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch new file mode 100644 index 000000000..855bcc193 --- /dev/null +++ b/_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch @@ -0,0 +1,92 @@ +From 6fd1cc8f985ccd8b014e945a819482b267dae21f Mon Sep 17 00:00:00 2001 +Message-Id: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im> +From: Patrick Steinhardt +Date: Thu, 10 Feb 2022 13:28:09 +0100 +Subject: [PATCH 1/2] fetch-pack: use commit-graph when computing cutoff +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +During packfile negotiation we iterate over all refs announced by the +remote side to check whether their IDs refer to commits already known to +us. If a commit is known to us already, then its date is a potential +cutoff point for commits we have in common with the remote side. + +There is potentially a lot of commits announced by the remote depending +on how many refs there are in the remote repository, and for every one +of them we need to search for it in our object database and, if found, +parse the corresponding object to find out whether it is a candidate for +the cutoff date. This can be sped up by trying to look up commits via +the commit-graph first, which is a lot more efficient. + +Benchmarks in a repository with about 2,1 million refs and an up-to-date +commit-graph show an almost 20% speedup when mirror-fetching: + + Benchmark 1: git fetch +refs/*:refs/* (v2.35.0) + Time (mean ± σ): 115.587 s ± 2.009 s [User: 109.874 s, System: 11.305 s] + Range (min … max): 113.584 s … 118.820 s 5 runs + + Benchmark 2: git fetch +refs/*:refs/* (HEAD) + Time (mean ± σ): 96.859 s ± 0.624 s [User: 91.948 s, System: 10.980 s] + Range (min … max): 96.180 s … 97.875 s 5 runs + + Summary + 'git fetch +refs/*:refs/* (HEAD)' ran + 1.19 ± 0.02 times faster than 'git fetch +refs/*:refs/* (v2.35.0)' + +Signed-off-by: Patrick Steinhardt +Signed-off-by: Junio C Hamano +--- + fetch-pack.c | 28 ++++++++++++++++------------ + 1 file changed, 16 insertions(+), 12 deletions(-) + +diff --git a/fetch-pack.c b/fetch-pack.c +index dd6ec449f2..c5967e228e 100644 +--- a/fetch-pack.c ++++ b/fetch-pack.c +@@ -696,26 +696,30 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, + + trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); + for (ref = *refs; ref; ref = ref->next) { +- struct object *o; ++ struct commit *commit; + +- if (!has_object_file_with_flags(&ref->old_oid, ++ commit = lookup_commit_in_graph(the_repository, &ref->old_oid); ++ if (!commit) { ++ struct object *o; ++ ++ if (!has_object_file_with_flags(&ref->old_oid, + OBJECT_INFO_QUICK | +- OBJECT_INFO_SKIP_FETCH_OBJECT)) +- continue; +- o = parse_object(the_repository, &ref->old_oid); +- if (!o) +- continue; ++ OBJECT_INFO_SKIP_FETCH_OBJECT)) ++ continue; ++ o = parse_object(the_repository, &ref->old_oid); ++ if (!o || o->type != OBJ_COMMIT) ++ continue; ++ ++ commit = (struct commit *)o; ++ } + + /* + * We already have it -- which may mean that we were + * in sync with the other side at some time after + * that (it is OK if we guess wrong here). + */ +- if (o->type == OBJ_COMMIT) { +- struct commit *commit = (struct commit *)o; +- if (!cutoff || cutoff < commit->date) +- cutoff = commit->date; +- } ++ if (!cutoff || cutoff < commit->date) ++ cutoff = commit->date; + } + trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); + +-- +2.35.1 + diff --git a/_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch b/_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch new file mode 100644 index 000000000..2ef3c109a --- /dev/null +++ b/_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch @@ -0,0 +1,84 @@ +From b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im> +References: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im> +From: Patrick Steinhardt +Date: Thu, 10 Feb 2022 13:28:16 +0100 +Subject: [PATCH 2/2] fetch: skip computing output width when not printing + anything +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When updating references via git-fetch(1), then by default we report to +the user which references have been changed. This output is formatted in +a nice table such that the different columns are aligned. Because the +first column contains abbreviated object IDs we thus need to iterate +over all refs which have changed and compute the minimum length for +their respective abbreviated hashes. While this effort makes sense in +most cases, it is wasteful when the user passes the `--quiet` flag: we +don't print the summary, but still compute the length. + +Skip computing the summary width when the user asked for us to be quiet. +This gives us a speedup of nearly 10% when doing a mirror-fetch in a +repository with thousands of references being updated: + + Benchmark 1: git fetch --quiet +refs/*:refs/* (HEAD~) + Time (mean ± σ): 96.078 s ± 0.508 s [User: 91.378 s, System: 10.870 s] + Range (min … max): 95.449 s … 96.760 s 5 runs + + Benchmark 2: git fetch --quiet +refs/*:refs/* (HEAD) + Time (mean ± σ): 88.214 s ± 0.192 s [User: 83.274 s, System: 10.978 s] + Range (min … max): 87.998 s … 88.446 s 5 runs + + Summary + 'git fetch --quiet +refs/*:refs/* (HEAD)' ran + 1.09 ± 0.01 times faster than 'git fetch --quiet +refs/*:refs/* (HEAD~)' + +Signed-off-by: Patrick Steinhardt +Signed-off-by: Junio C Hamano +--- + builtin/fetch.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/builtin/fetch.c b/builtin/fetch.c +index 5b3b18a72f..7ef305c66d 100644 +--- a/builtin/fetch.c ++++ b/builtin/fetch.c +@@ -1094,12 +1094,15 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, + struct ref *rm; + char *url; + int want_status; +- int summary_width = transport_summary_width(ref_map); ++ int summary_width = 0; + + rc = open_fetch_head(&fetch_head); + if (rc) + return -1; + ++ if (verbosity >= 0) ++ summary_width = transport_summary_width(ref_map); ++ + if (raw_url) + url = transport_anonymize_url(raw_url); + else +@@ -1345,7 +1348,6 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, + int url_len, i, result = 0; + struct ref *ref, *stale_refs = get_stale_heads(rs, ref_map); + char *url; +- int summary_width = transport_summary_width(stale_refs); + const char *dangling_msg = dry_run + ? _(" (%s will become dangling)") + : _(" (%s has become dangling)"); +@@ -1374,6 +1376,8 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, + } + + if (verbosity >= 0) { ++ int summary_width = transport_summary_width(stale_refs); ++ + for (ref = stale_refs; ref; ref = ref->next) { + struct strbuf sb = STRBUF_INIT; + if (!shown_url) { +-- +2.35.1 + -- cgit v1.2.3