Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2022-02-17 07:58:20 +0300
committerJohn Cai <jcai@gitlab.com>2022-02-17 07:58:20 +0300
commit4ac6a5906d27098bf0f6fb9e19c190ea9722c70a (patch)
tree8d77504c28a3bf10b386af1e162b4ecdde43712d
parent5d557a52c40e2641d6ea1b44b60a897d0e9401e7 (diff)
parent3ad8de83877f4512fc68d255e93a81bba603952e (diff)
Merge branch 'pks-git-fetch-optim-with-commit-graph' into 'master'
git: Backport patches to speed up git-fetch(1) in repos with many refs Closes #4050 See merge request gitlab-org/gitaly!4355
-rw-r--r--Makefile9
-rw-r--r--_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch92
-rw-r--r--_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch84
3 files changed, 184 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 56eb19ca2..6dda56e5e 100644
--- a/Makefile
+++ b/Makefile
@@ -171,13 +171,20 @@ ifdef GIT_APPLY_DEFAULT_PATCHES
# 2021-12-27).
GIT_PATCHES += 0018-upload-pack.c-increase-output-buffer-size.patch
+ # Speed up fetches by making better use of the commit-graph and by not
+ # computing the output-width if not requested. Merged into next via
+ # 2b331293fb (Merge branch 'ps/fetch-optim-with-commit-graph' into next,
+ # 2022-02-14).
+ GIT_PATCHES += 0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch
+ GIT_PATCHES += 0020-fetch-skip-computing-output-width-when-not-printing-.patch
+
# This extra version has two intentions: first, it allows us to detect
# capabilities of the command at runtime. Second, it helps admins to
# discover which version is currently in use. As such, this version must be
# incremented whenever a new patch is added above. When no patches exist,
# then this should be undefined. Otherwise, it must be set to at least
# `gl1` given that `0` is the "default" GitLab patch level.
- GIT_EXTRA_VERSION := gl2
+ GIT_EXTRA_VERSION := gl3
endif
ifeq ($(origin GIT_BUILD_OPTIONS),undefined)
diff --git a/_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch b/_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch
new file mode 100644
index 000000000..855bcc193
--- /dev/null
+++ b/_support/git-patches/0019-fetch-pack-use-commit-graph-when-computing-cutoff.patch
@@ -0,0 +1,92 @@
+From 6fd1cc8f985ccd8b014e945a819482b267dae21f Mon Sep 17 00:00:00 2001
+Message-Id: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Thu, 10 Feb 2022 13:28:09 +0100
+Subject: [PATCH 1/2] fetch-pack: use commit-graph when computing cutoff
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+During packfile negotiation we iterate over all refs announced by the
+remote side to check whether their IDs refer to commits already known to
+us. If a commit is known to us already, then its date is a potential
+cutoff point for commits we have in common with the remote side.
+
+There is potentially a lot of commits announced by the remote depending
+on how many refs there are in the remote repository, and for every one
+of them we need to search for it in our object database and, if found,
+parse the corresponding object to find out whether it is a candidate for
+the cutoff date. This can be sped up by trying to look up commits via
+the commit-graph first, which is a lot more efficient.
+
+Benchmarks in a repository with about 2,1 million refs and an up-to-date
+commit-graph show an almost 20% speedup when mirror-fetching:
+
+ Benchmark 1: git fetch +refs/*:refs/* (v2.35.0)
+ Time (mean ± σ): 115.587 s ± 2.009 s [User: 109.874 s, System: 11.305 s]
+ Range (min … max): 113.584 s … 118.820 s 5 runs
+
+ Benchmark 2: git fetch +refs/*:refs/* (HEAD)
+ Time (mean ± σ): 96.859 s ± 0.624 s [User: 91.948 s, System: 10.980 s]
+ Range (min … max): 96.180 s … 97.875 s 5 runs
+
+ Summary
+ 'git fetch +refs/*:refs/* (HEAD)' ran
+ 1.19 ± 0.02 times faster than 'git fetch +refs/*:refs/* (v2.35.0)'
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ fetch-pack.c | 28 ++++++++++++++++------------
+ 1 file changed, 16 insertions(+), 12 deletions(-)
+
+diff --git a/fetch-pack.c b/fetch-pack.c
+index dd6ec449f2..c5967e228e 100644
+--- a/fetch-pack.c
++++ b/fetch-pack.c
+@@ -696,26 +696,30 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
+
+ trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL);
+ for (ref = *refs; ref; ref = ref->next) {
+- struct object *o;
++ struct commit *commit;
+
+- if (!has_object_file_with_flags(&ref->old_oid,
++ commit = lookup_commit_in_graph(the_repository, &ref->old_oid);
++ if (!commit) {
++ struct object *o;
++
++ if (!has_object_file_with_flags(&ref->old_oid,
+ OBJECT_INFO_QUICK |
+- OBJECT_INFO_SKIP_FETCH_OBJECT))
+- continue;
+- o = parse_object(the_repository, &ref->old_oid);
+- if (!o)
+- continue;
++ OBJECT_INFO_SKIP_FETCH_OBJECT))
++ continue;
++ o = parse_object(the_repository, &ref->old_oid);
++ if (!o || o->type != OBJ_COMMIT)
++ continue;
++
++ commit = (struct commit *)o;
++ }
+
+ /*
+ * We already have it -- which may mean that we were
+ * in sync with the other side at some time after
+ * that (it is OK if we guess wrong here).
+ */
+- if (o->type == OBJ_COMMIT) {
+- struct commit *commit = (struct commit *)o;
+- if (!cutoff || cutoff < commit->date)
+- cutoff = commit->date;
+- }
++ if (!cutoff || cutoff < commit->date)
++ cutoff = commit->date;
+ }
+ trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL);
+
+--
+2.35.1
+
diff --git a/_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch b/_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch
new file mode 100644
index 000000000..2ef3c109a
--- /dev/null
+++ b/_support/git-patches/0020-fetch-skip-computing-output-width-when-not-printing-.patch
@@ -0,0 +1,84 @@
+From b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03 Mon Sep 17 00:00:00 2001
+Message-Id: <b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03.1645001444.git.ps@pks.im>
+In-Reply-To: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im>
+References: <6fd1cc8f985ccd8b014e945a819482b267dae21f.1645001444.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Thu, 10 Feb 2022 13:28:16 +0100
+Subject: [PATCH 2/2] fetch: skip computing output width when not printing
+ anything
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When updating references via git-fetch(1), then by default we report to
+the user which references have been changed. This output is formatted in
+a nice table such that the different columns are aligned. Because the
+first column contains abbreviated object IDs we thus need to iterate
+over all refs which have changed and compute the minimum length for
+their respective abbreviated hashes. While this effort makes sense in
+most cases, it is wasteful when the user passes the `--quiet` flag: we
+don't print the summary, but still compute the length.
+
+Skip computing the summary width when the user asked for us to be quiet.
+This gives us a speedup of nearly 10% when doing a mirror-fetch in a
+repository with thousands of references being updated:
+
+ Benchmark 1: git fetch --quiet +refs/*:refs/* (HEAD~)
+ Time (mean ± σ): 96.078 s ± 0.508 s [User: 91.378 s, System: 10.870 s]
+ Range (min … max): 95.449 s … 96.760 s 5 runs
+
+ Benchmark 2: git fetch --quiet +refs/*:refs/* (HEAD)
+ Time (mean ± σ): 88.214 s ± 0.192 s [User: 83.274 s, System: 10.978 s]
+ Range (min … max): 87.998 s … 88.446 s 5 runs
+
+ Summary
+ 'git fetch --quiet +refs/*:refs/* (HEAD)' ran
+ 1.09 ± 0.01 times faster than 'git fetch --quiet +refs/*:refs/* (HEAD~)'
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ builtin/fetch.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/builtin/fetch.c b/builtin/fetch.c
+index 5b3b18a72f..7ef305c66d 100644
+--- a/builtin/fetch.c
++++ b/builtin/fetch.c
+@@ -1094,12 +1094,15 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
+ struct ref *rm;
+ char *url;
+ int want_status;
+- int summary_width = transport_summary_width(ref_map);
++ int summary_width = 0;
+
+ rc = open_fetch_head(&fetch_head);
+ if (rc)
+ return -1;
+
++ if (verbosity >= 0)
++ summary_width = transport_summary_width(ref_map);
++
+ if (raw_url)
+ url = transport_anonymize_url(raw_url);
+ else
+@@ -1345,7 +1348,6 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map,
+ int url_len, i, result = 0;
+ struct ref *ref, *stale_refs = get_stale_heads(rs, ref_map);
+ char *url;
+- int summary_width = transport_summary_width(stale_refs);
+ const char *dangling_msg = dry_run
+ ? _(" (%s will become dangling)")
+ : _(" (%s has become dangling)");
+@@ -1374,6 +1376,8 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map,
+ }
+
+ if (verbosity >= 0) {
++ int summary_width = transport_summary_width(stale_refs);
++
+ for (ref = stale_refs; ref; ref = ref->next) {
+ struct strbuf sb = STRBUF_INIT;
+ if (!shown_url) {
+--
+2.35.1
+