diff options
Diffstat (limited to '_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch')
-rw-r--r-- | _support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch b/_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch new file mode 100644 index 000000000..686dfde8e --- /dev/null +++ b/_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch @@ -0,0 +1,119 @@ +From 8e55634b47858f036ca773f3e1d754e5dbd4bb59 Mon Sep 17 00:00:00 2001 +Message-Id: <8e55634b47858f036ca773f3e1d754e5dbd4bb59.1647334702.git.ps@pks.im> +In-Reply-To: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +References: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +From: Patrick Steinhardt <ps@pks.im> +Date: Tue, 1 Mar 2022 10:33:41 +0100 +Subject: [PATCH 36/39] fetch: avoid lookup of commits when not appending to + FETCH_HEAD +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When fetching from a remote repository we will by default write what has +been fetched into the special FETCH_HEAD reference. The order in which +references are written depends on whether the reference is for merge or +not, which, despite some other conditions, is also determined based on +whether the old object ID the reference is being updated from actually +exists in the repository. + +To write FETCH_HEAD we thus loop through all references thrice: once for +the references that are about to be merged, once for the references that +are not for merge, and finally for all references that are ignored. For +every iteration, we then look up the old object ID to determine whether +the referenced object exists so that we can label it as "not-for-merge" +if it doesn't exist. It goes without saying that this can be expensive +in case where we are fetching a lot of references. + +While this is hard to avoid in the case where we're writing FETCH_HEAD, +users can in fact ask us to skip this work via `--no-write-fetch-head`. +In that case, we do not care for the result of those lookups at all +because we don't have to order writes to FETCH_HEAD in the first place. + +Skip this busywork in case we're not writing to FETCH_HEAD. The +following benchmark performs a mirror-fetch in a repository with about +two million references via `git fetch --prune --no-write-fetch-head ++refs/*:refs/*`: + + Benchmark 1: HEAD~ + Time (mean ± σ): 75.388 s ± 1.942 s [User: 71.103 s, System: 8.953 s] + Range (min … max): 73.184 s … 76.845 s 3 runs + + Benchmark 2: HEAD + Time (mean ± σ): 69.486 s ± 1.016 s [User: 65.941 s, System: 8.806 s] + Range (min … max): 68.864 s … 70.659 s 3 runs + + Summary + 'HEAD' ran + 1.08 ± 0.03 times faster than 'HEAD~' + +Signed-off-by: Patrick Steinhardt <ps@pks.im> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + builtin/fetch.c | 42 +++++++++++++++++++++++++++--------------- + 1 file changed, 27 insertions(+), 15 deletions(-) + +diff --git a/builtin/fetch.c b/builtin/fetch.c +index ec1ec91da2..3d4581d3c6 100644 +--- a/builtin/fetch.c ++++ b/builtin/fetch.c +@@ -1143,7 +1143,6 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, + want_status <= FETCH_HEAD_IGNORE; + want_status++) { + for (rm = ref_map; rm; rm = rm->next) { +- struct commit *commit = NULL; + struct ref *ref = NULL; + + if (rm->status == REF_STATUS_REJECT_SHALLOW) { +@@ -1154,21 +1153,34 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, + } + + /* +- * References in "refs/tags/" are often going to point +- * to annotated tags, which are not part of the +- * commit-graph. We thus only try to look up refs in +- * the graph which are not in that namespace to not +- * regress performance in repositories with many +- * annotated tags. ++ * When writing FETCH_HEAD we need to determine whether ++ * we already have the commit or not. If not, then the ++ * reference is not for merge and needs to be written ++ * to the reflog after other commits which we already ++ * have. We're not interested in this property though ++ * in case FETCH_HEAD is not to be updated, so we can ++ * skip the classification in that case. + */ +- if (!starts_with(rm->name, "refs/tags/")) +- commit = lookup_commit_in_graph(the_repository, &rm->old_oid); +- if (!commit) { +- commit = lookup_commit_reference_gently(the_repository, +- &rm->old_oid, +- 1); +- if (!commit) +- rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; ++ if (fetch_head->fp) { ++ struct commit *commit = NULL; ++ ++ /* ++ * References in "refs/tags/" are often going to point ++ * to annotated tags, which are not part of the ++ * commit-graph. We thus only try to look up refs in ++ * the graph which are not in that namespace to not ++ * regress performance in repositories with many ++ * annotated tags. ++ */ ++ if (!starts_with(rm->name, "refs/tags/")) ++ commit = lookup_commit_in_graph(the_repository, &rm->old_oid); ++ if (!commit) { ++ commit = lookup_commit_reference_gently(the_repository, ++ &rm->old_oid, ++ 1); ++ if (!commit) ++ rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; ++ } + } + + if (rm->fetch_head_status != want_status) +-- +2.35.1 + |