diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-16 10:27:21 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-16 10:27:21 +0300 |
commit | cd2cd3592b5bc9f97f2850afce8208577169a112 (patch) | |
tree | deea5b3d31eaeac37e515127f3e09e553120e92e | |
parent | 40e19e808a320ce357c2ce5edfa3cf09def58477 (diff) | |
parent | 4c5b7a2a4fa9960381a214b057062ed3693b41ba (diff) |
Merge branch 'pks-git-v2.35.1.gl1-more-fetch-speedups' into 'master'
Makefile: Add more patches to speed up git-fetch(1) to v2.35.1.gl1
Closes #4104
See merge request gitlab-org/gitaly!4408
6 files changed, 616 insertions, 0 deletions
@@ -685,6 +685,17 @@ ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0032-refs-add-int ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0033-fetch-make-atomic-flag-cover-backfilling-of-tags.patch ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0034-fetch-make-atomic-flag-cover-pruning-of-refs.patch +# Some more optimizations to git-fetch(1). Most importantly, these patches +# cause us to skip reading the packed-refs file to find symbolic references, +# which provides a 13% speedup in benchmarks. These patches have been merged +# into `next` via 60aae8731c (Merge branch 'ps/fetch-mirror-optim' into next, +# 2022-03-08). +${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0035-upload-pack-look-up-want-lines-via-commit-graph.patch +${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch +${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0037-refs-add-ability-for-backends-to-special-case-readin.patch +${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0038-remote-read-symbolic-refs-via-refs_read_symbolic_ref.patch +${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_PATCHES += 0039-refs-files-backend-optimize-reading-of-symbolic-refs.patch + ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_VERSION = v2.35.1 ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: override GIT_EXTRA_VERSION = gl1 ${BUILD_DIR}/bin/gitaly-%-v2.35.1.gl1: ${DEPENDENCY_DIR}/git-v2.35.1.gl1/% | ${BUILD_DIR}/bin diff --git a/_support/git-patches/0035-upload-pack-look-up-want-lines-via-commit-graph.patch b/_support/git-patches/0035-upload-pack-look-up-want-lines-via-commit-graph.patch new file mode 100644 index 000000000..c8ada5b44 --- /dev/null +++ b/_support/git-patches/0035-upload-pack-look-up-want-lines-via-commit-graph.patch @@ -0,0 +1,97 @@ +From 4de656263aa195080495fc0a103351b9eaac8160 Mon Sep 17 00:00:00 2001 +Message-Id: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +From: Patrick Steinhardt <ps@pks.im> +Date: Tue, 1 Mar 2022 10:33:37 +0100 +Subject: [PATCH 35/39] upload-pack: look up "want" lines via commit-graph +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +During packfile negotiation the client will send "want" and "want-ref" +lines to the server to tell it which objects it is interested in. The +server-side parses each of those and looks them up to see whether it +actually has requested objects. This lookup is performed by calling +`parse_object()` directly, which thus hits the object database. In the +general case though most of the objects the client requests will be +commits. We can thus try to look up the object via the commit-graph +opportunistically, which is much faster than doing the same via the +object database. + +Refactor parsing of both "want" and "want-ref" lines to do so. + +The following benchmark is executed in a repository with a huge number +of references. It uses cached request from git-fetch(1) as input to +git-upload-pack(1) that contains about 876,000 "want" lines: + + Benchmark 1: HEAD~ + Time (mean ± σ): 7.113 s ± 0.028 s [User: 6.900 s, System: 0.662 s] + Range (min … max): 7.072 s … 7.168 s 10 runs + + Benchmark 2: HEAD + Time (mean ± σ): 6.622 s ± 0.061 s [User: 6.452 s, System: 0.650 s] + Range (min … max): 6.535 s … 6.727 s 10 runs + + Summary + 'HEAD' ran + 1.07 ± 0.01 times faster than 'HEAD~' + +Signed-off-by: Patrick Steinhardt <ps@pks.im> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + upload-pack.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/upload-pack.c b/upload-pack.c +index 8acc98741b..3a851b3606 100644 +--- a/upload-pack.c ++++ b/upload-pack.c +@@ -1400,13 +1400,19 @@ static int parse_want(struct packet_writer *writer, const char *line, + const char *arg; + if (skip_prefix(line, "want ", &arg)) { + struct object_id oid; ++ struct commit *commit; + struct object *o; + + if (get_oid_hex(arg, &oid)) + die("git upload-pack: protocol error, " + "expected to get oid, not '%s'", line); + +- o = parse_object(the_repository, &oid); ++ commit = lookup_commit_in_graph(the_repository, &oid); ++ if (commit) ++ o = &commit->object; ++ else ++ o = parse_object(the_repository, &oid); ++ + if (!o) { + packet_writer_error(writer, + "upload-pack: not our ref %s", +@@ -1434,7 +1440,7 @@ static int parse_want_ref(struct packet_writer *writer, const char *line, + if (skip_prefix(line, "want-ref ", &refname_nons)) { + struct object_id oid; + struct string_list_item *item; +- struct object *o; ++ struct object *o = NULL; + struct strbuf refname = STRBUF_INIT; + + strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons); +@@ -1448,7 +1454,15 @@ static int parse_want_ref(struct packet_writer *writer, const char *line, + item = string_list_append(wanted_refs, refname_nons); + item->util = oiddup(&oid); + +- o = parse_object_or_die(&oid, refname_nons); ++ if (!starts_with(refname_nons, "refs/tags/")) { ++ struct commit *commit = lookup_commit_in_graph(the_repository, &oid); ++ if (commit) ++ o = &commit->object; ++ } ++ ++ if (!o) ++ o = parse_object_or_die(&oid, refname_nons); ++ + if (!(o->flags & WANTED)) { + o->flags |= WANTED; + add_object_array(o, NULL, want_obj); +-- +2.35.1 + diff --git a/_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch b/_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch new file mode 100644 index 000000000..686dfde8e --- /dev/null +++ b/_support/git-patches/0036-fetch-avoid-lookup-of-commits-when-not-appending-to-.patch @@ -0,0 +1,119 @@ +From 8e55634b47858f036ca773f3e1d754e5dbd4bb59 Mon Sep 17 00:00:00 2001 +Message-Id: <8e55634b47858f036ca773f3e1d754e5dbd4bb59.1647334702.git.ps@pks.im> +In-Reply-To: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +References: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +From: Patrick Steinhardt <ps@pks.im> +Date: Tue, 1 Mar 2022 10:33:41 +0100 +Subject: [PATCH 36/39] fetch: avoid lookup of commits when not appending to + FETCH_HEAD +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When fetching from a remote repository we will by default write what has +been fetched into the special FETCH_HEAD reference. The order in which +references are written depends on whether the reference is for merge or +not, which, despite some other conditions, is also determined based on +whether the old object ID the reference is being updated from actually +exists in the repository. + +To write FETCH_HEAD we thus loop through all references thrice: once for +the references that are about to be merged, once for the references that +are not for merge, and finally for all references that are ignored. For +every iteration, we then look up the old object ID to determine whether +the referenced object exists so that we can label it as "not-for-merge" +if it doesn't exist. It goes without saying that this can be expensive +in case where we are fetching a lot of references. + +While this is hard to avoid in the case where we're writing FETCH_HEAD, +users can in fact ask us to skip this work via `--no-write-fetch-head`. +In that case, we do not care for the result of those lookups at all +because we don't have to order writes to FETCH_HEAD in the first place. + +Skip this busywork in case we're not writing to FETCH_HEAD. The +following benchmark performs a mirror-fetch in a repository with about +two million references via `git fetch --prune --no-write-fetch-head ++refs/*:refs/*`: + + Benchmark 1: HEAD~ + Time (mean ± σ): 75.388 s ± 1.942 s [User: 71.103 s, System: 8.953 s] + Range (min … max): 73.184 s … 76.845 s 3 runs + + Benchmark 2: HEAD + Time (mean ± σ): 69.486 s ± 1.016 s [User: 65.941 s, System: 8.806 s] + Range (min … max): 68.864 s … 70.659 s 3 runs + + Summary + 'HEAD' ran + 1.08 ± 0.03 times faster than 'HEAD~' + +Signed-off-by: Patrick Steinhardt <ps@pks.im> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + builtin/fetch.c | 42 +++++++++++++++++++++++++++--------------- + 1 file changed, 27 insertions(+), 15 deletions(-) + +diff --git a/builtin/fetch.c b/builtin/fetch.c +index ec1ec91da2..3d4581d3c6 100644 +--- a/builtin/fetch.c ++++ b/builtin/fetch.c +@@ -1143,7 +1143,6 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, + want_status <= FETCH_HEAD_IGNORE; + want_status++) { + for (rm = ref_map; rm; rm = rm->next) { +- struct commit *commit = NULL; + struct ref *ref = NULL; + + if (rm->status == REF_STATUS_REJECT_SHALLOW) { +@@ -1154,21 +1153,34 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, + } + + /* +- * References in "refs/tags/" are often going to point +- * to annotated tags, which are not part of the +- * commit-graph. We thus only try to look up refs in +- * the graph which are not in that namespace to not +- * regress performance in repositories with many +- * annotated tags. ++ * When writing FETCH_HEAD we need to determine whether ++ * we already have the commit or not. If not, then the ++ * reference is not for merge and needs to be written ++ * to the reflog after other commits which we already ++ * have. We're not interested in this property though ++ * in case FETCH_HEAD is not to be updated, so we can ++ * skip the classification in that case. + */ +- if (!starts_with(rm->name, "refs/tags/")) +- commit = lookup_commit_in_graph(the_repository, &rm->old_oid); +- if (!commit) { +- commit = lookup_commit_reference_gently(the_repository, +- &rm->old_oid, +- 1); +- if (!commit) +- rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; ++ if (fetch_head->fp) { ++ struct commit *commit = NULL; ++ ++ /* ++ * References in "refs/tags/" are often going to point ++ * to annotated tags, which are not part of the ++ * commit-graph. We thus only try to look up refs in ++ * the graph which are not in that namespace to not ++ * regress performance in repositories with many ++ * annotated tags. ++ */ ++ if (!starts_with(rm->name, "refs/tags/")) ++ commit = lookup_commit_in_graph(the_repository, &rm->old_oid); ++ if (!commit) { ++ commit = lookup_commit_reference_gently(the_repository, ++ &rm->old_oid, ++ 1); ++ if (!commit) ++ rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE; ++ } + } + + if (rm->fetch_head_status != want_status) +-- +2.35.1 + diff --git a/_support/git-patches/0037-refs-add-ability-for-backends-to-special-case-readin.patch b/_support/git-patches/0037-refs-add-ability-for-backends-to-special-case-readin.patch new file mode 100644 index 000000000..695f04a1b --- /dev/null +++ b/_support/git-patches/0037-refs-add-ability-for-backends-to-special-case-readin.patch @@ -0,0 +1,166 @@ +From cd475b3b03809b1b1c664e0dca9f16f815456719 Mon Sep 17 00:00:00 2001 +Message-Id: <cd475b3b03809b1b1c664e0dca9f16f815456719.1647334702.git.ps@pks.im> +In-Reply-To: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +References: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +From: Patrick Steinhardt <ps@pks.im> +Date: Tue, 1 Mar 2022 10:33:46 +0100 +Subject: [PATCH 37/39] refs: add ability for backends to special-case reading + of symbolic refs + +Reading of symbolic and non-symbolic references is currently treated the +same in reference backends: we always call `refs_read_raw_ref()` and +then decide based on the returned flags what type it is. This has one +downside though: symbolic references may be treated different from +normal references in a backend from normal references. The packed-refs +backend for example doesn't even know about symbolic references, and as +a result it is pointless to even ask it for one. + +There are cases where we really only care about whether a reference is +symbolic or not, but don't care about whether it exists at all or may be +a non-symbolic reference. But it is not possible to optimize for this +case right now, and as a consequence we will always first check for a +loose reference to exist, and if it doesn't, we'll query the packed-refs +backend for a known-to-not-be-symbolic reference. This is inefficient +and requires us to search all packed references even though we know to +not care for the result at all. + +Introduce a new function `refs_read_symbolic_ref()` which allows us to +fix this case. This function will only ever return symbolic references +and can thus optimize for the scenario layed out above. By default, if +the backend doesn't provide an implementation for it, we just use the +old code path and fall back to `read_raw_ref()`. But in case the backend +provides its own, more efficient implementation, we will use that one +instead. + +Note that this function is explicitly designed to not distinguish +between missing references and non-symbolic references. If it did, we'd +be forced to always search the packed-refs backend to see whether the +symbolic reference the user asked for really doesn't exist, or if it +exists as a non-symbolic reference. + +Signed-off-by: Patrick Steinhardt <ps@pks.im> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + refs.c | 17 +++++++++++++++++ + refs.h | 3 +++ + refs/debug.c | 1 + + refs/files-backend.c | 1 + + refs/packed-backend.c | 1 + + refs/refs-internal.h | 16 ++++++++++++++++ + 6 files changed, 39 insertions(+) + +diff --git a/refs.c b/refs.c +index 35d4e69687..d75a5dd53d 100644 +--- a/refs.c ++++ b/refs.c +@@ -1672,6 +1672,23 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, + type, failure_errno); + } + ++int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname, ++ struct strbuf *referent) ++{ ++ struct object_id oid; ++ int ret, failure_errno = 0; ++ unsigned int type = 0; ++ ++ if (ref_store->be->read_symbolic_ref) ++ return ref_store->be->read_symbolic_ref(ref_store, refname, referent); ++ ++ ret = refs_read_raw_ref(ref_store, refname, &oid, referent, &type, &failure_errno); ++ if (ret || !(type & REF_ISSYMREF)) ++ return -1; ++ ++ return 0; ++} ++ + const char *refs_resolve_ref_unsafe(struct ref_store *refs, + const char *refname, + int resolve_flags, +diff --git a/refs.h b/refs.h +index 1ae12c410a..23479c7ee0 100644 +--- a/refs.h ++++ b/refs.h +@@ -82,6 +82,9 @@ int read_ref_full(const char *refname, int resolve_flags, + struct object_id *oid, int *flags); + int read_ref(const char *refname, struct object_id *oid); + ++int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname, ++ struct strbuf *referent); ++ + /* + * Return 0 if a reference named refname could be created without + * conflicting with the name of an existing reference. Otherwise, +diff --git a/refs/debug.c b/refs/debug.c +index 2b0771ca53..c590d37720 100644 +--- a/refs/debug.c ++++ b/refs/debug.c +@@ -435,6 +435,7 @@ struct ref_storage_be refs_be_debug = { + + debug_ref_iterator_begin, + debug_read_raw_ref, ++ NULL, + + debug_reflog_iterator_begin, + debug_for_each_reflog_ent, +diff --git a/refs/files-backend.c b/refs/files-backend.c +index f59589d6cc..f3428a9f12 100644 +--- a/refs/files-backend.c ++++ b/refs/files-backend.c +@@ -3286,6 +3286,7 @@ struct ref_storage_be refs_be_files = { + + files_ref_iterator_begin, + files_read_raw_ref, ++ NULL, + + files_reflog_iterator_begin, + files_for_each_reflog_ent, +diff --git a/refs/packed-backend.c b/refs/packed-backend.c +index 27dd8c3922..f56e2cc635 100644 +--- a/refs/packed-backend.c ++++ b/refs/packed-backend.c +@@ -1684,6 +1684,7 @@ struct ref_storage_be refs_be_packed = { + + packed_ref_iterator_begin, + packed_read_raw_ref, ++ NULL, + + packed_reflog_iterator_begin, + packed_for_each_reflog_ent, +diff --git a/refs/refs-internal.h b/refs/refs-internal.h +index 6e15db3ca4..001ef15835 100644 +--- a/refs/refs-internal.h ++++ b/refs/refs-internal.h +@@ -649,6 +649,21 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno); + ++/* ++ * Read a symbolic reference from the specified reference store. This function ++ * is optional: if not implemented by a backend, then `read_raw_ref_fn` is used ++ * to read the symbolcic reference instead. It is intended to be implemented ++ * only in case the backend can optimize the reading of symbolic references. ++ * ++ * Return 0 on success, or -1 on failure. `referent` will be set to the target ++ * of the symbolic reference on success. This function explicitly does not ++ * distinguish between error cases and the reference not being a symbolic ++ * reference to allow backends to optimize this operation in case symbolic and ++ * non-symbolic references are treated differently. ++ */ ++typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname, ++ struct strbuf *referent); ++ + struct ref_storage_be { + struct ref_storage_be *next; + const char *name; +@@ -668,6 +683,7 @@ struct ref_storage_be { + + ref_iterator_begin_fn *iterator_begin; + read_raw_ref_fn *read_raw_ref; ++ read_symbolic_ref_fn *read_symbolic_ref; + + reflog_iterator_begin_fn *reflog_iterator_begin; + for_each_reflog_ent_fn *for_each_reflog_ent; +-- +2.35.1 + diff --git a/_support/git-patches/0038-remote-read-symbolic-refs-via-refs_read_symbolic_ref.patch b/_support/git-patches/0038-remote-read-symbolic-refs-via-refs_read_symbolic_ref.patch new file mode 100644 index 000000000..0ea1df7b0 --- /dev/null +++ b/_support/git-patches/0038-remote-read-symbolic-refs-via-refs_read_symbolic_ref.patch @@ -0,0 +1,101 @@ +From 1553f5e76c72243f61b454412b651706a869388f Mon Sep 17 00:00:00 2001 +Message-Id: <1553f5e76c72243f61b454412b651706a869388f.1647334702.git.ps@pks.im> +In-Reply-To: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +References: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +From: Patrick Steinhardt <ps@pks.im> +Date: Tue, 1 Mar 2022 10:33:50 +0100 +Subject: [PATCH 38/39] remote: read symbolic refs via + `refs_read_symbolic_ref()` + +We have two cases in the remote code where we check whether a reference +is symbolic or not, but don't mind in case it doesn't exist or in case +it exists but is a non-symbolic reference. Convert these two callsites +to use the new `refs_read_symbolic_ref()` function, whose intent is to +implement exactly that usecase. + +No change in behaviour is expected from this change. + +Signed-off-by: Patrick Steinhardt <ps@pks.im> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + builtin/remote.c | 8 +++++--- + remote.c | 14 +++++++------- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/builtin/remote.c b/builtin/remote.c +index 299c466116..805a517192 100644 +--- a/builtin/remote.c ++++ b/builtin/remote.c +@@ -766,13 +766,15 @@ static int mv(int argc, const char **argv) + for_each_ref(read_remote_branches, &rename); + for (i = 0; i < remote_branches.nr; i++) { + struct string_list_item *item = remote_branches.items + i; +- int flag = 0; ++ struct strbuf referent = STRBUF_INIT; + +- read_ref_full(item->string, RESOLVE_REF_READING, NULL, &flag); +- if (!(flag & REF_ISSYMREF)) ++ if (refs_read_symbolic_ref(get_main_ref_store(the_repository), item->string, ++ &referent)) + continue; + if (delete_ref(NULL, item->string, NULL, REF_NO_DEREF)) + die(_("deleting '%s' failed"), item->string); ++ ++ strbuf_release(&referent); + } + for (i = 0; i < remote_branches.nr; i++) { + struct string_list_item *item = remote_branches.items + i; +diff --git a/remote.c b/remote.c +index c97c626eaa..42a4e7106e 100644 +--- a/remote.c ++++ b/remote.c +@@ -1945,13 +1945,9 @@ const char *branch_get_push(struct branch *branch, struct strbuf *err) + return branch->push_tracking_ref; + } + +-static int ignore_symref_update(const char *refname) ++static int ignore_symref_update(const char *refname, struct strbuf *scratch) + { +- int flag; +- +- if (!resolve_ref_unsafe(refname, 0, NULL, &flag)) +- return 0; /* non-existing refs are OK */ +- return (flag & REF_ISSYMREF); ++ return !refs_read_symbolic_ref(get_main_ref_store(the_repository), refname, scratch); + } + + /* +@@ -1964,6 +1960,7 @@ static int ignore_symref_update(const char *refname) + static struct ref *get_expanded_map(const struct ref *remote_refs, + const struct refspec_item *refspec) + { ++ struct strbuf scratch = STRBUF_INIT; + const struct ref *ref; + struct ref *ret = NULL; + struct ref **tail = &ret; +@@ -1971,11 +1968,13 @@ static struct ref *get_expanded_map(const struct ref *remote_refs, + for (ref = remote_refs; ref; ref = ref->next) { + char *expn_name = NULL; + ++ strbuf_reset(&scratch); ++ + if (strchr(ref->name, '^')) + continue; /* a dereference item */ + if (match_name_with_pattern(refspec->src, ref->name, + refspec->dst, &expn_name) && +- !ignore_symref_update(expn_name)) { ++ !ignore_symref_update(expn_name, &scratch)) { + struct ref *cpy = copy_ref(ref); + + cpy->peer_ref = alloc_ref(expn_name); +@@ -1987,6 +1986,7 @@ static struct ref *get_expanded_map(const struct ref *remote_refs, + free(expn_name); + } + ++ strbuf_release(&scratch); + return ret; + } + +-- +2.35.1 + diff --git a/_support/git-patches/0039-refs-files-backend-optimize-reading-of-symbolic-refs.patch b/_support/git-patches/0039-refs-files-backend-optimize-reading-of-symbolic-refs.patch new file mode 100644 index 000000000..465257950 --- /dev/null +++ b/_support/git-patches/0039-refs-files-backend-optimize-reading-of-symbolic-refs.patch @@ -0,0 +1,122 @@ +From 0a7b38707d5d5c8a7baeb88a85d6259cee4f4e4d Mon Sep 17 00:00:00 2001 +Message-Id: <0a7b38707d5d5c8a7baeb88a85d6259cee4f4e4d.1647334703.git.ps@pks.im> +In-Reply-To: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +References: <4de656263aa195080495fc0a103351b9eaac8160.1647334702.git.ps@pks.im> +From: Patrick Steinhardt <ps@pks.im> +Date: Tue, 1 Mar 2022 10:33:54 +0100 +Subject: [PATCH 39/39] refs/files-backend: optimize reading of symbolic refs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When reading references via `files_read_raw_ref()` we always consult +both the loose reference, and if that wasn't found, we also consult the +packed-refs file. While this makes sense to read a generic reference, it +is wasteful in the case where we only care about symbolic references: +the packed-refs backend does not support them, and thus it cannot ever +return one for us. + +Special-case reading of symbolic references for the files backend such +that we always skip asking the packed-refs backend. + +We use `refs_read_symbolic_ref()` extensively to determine whether we +need to skip updating local symbolic references during a fetch, which is +why the change results in a significant speedup when doing fetches in +repositories with huge numbers of references. The following benchmark +executes a mirror-fetch in a repository with about 2 million references +via `git fetch --prune --no-write-fetch-head +refs/*:refs/*`: + + Benchmark 1: HEAD~ + Time (mean ± σ): 68.372 s ± 2.344 s [User: 65.629 s, System: 8.786 s] + Range (min … max): 65.745 s … 70.246 s 3 runs + + Benchmark 2: HEAD + Time (mean ± σ): 60.259 s ± 0.343 s [User: 61.019 s, System: 7.245 s] + Range (min … max): 60.003 s … 60.649 s 3 runs + + Summary + 'HEAD' ran + 1.13 ± 0.04 times faster than 'HEAD~' + +Signed-off-by: Patrick Steinhardt <ps@pks.im> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + refs/files-backend.c | 34 ++++++++++++++++++++++++++++------ + 1 file changed, 28 insertions(+), 6 deletions(-) + +diff --git a/refs/files-backend.c b/refs/files-backend.c +index f3428a9f12..0457ecdb42 100644 +--- a/refs/files-backend.c ++++ b/refs/files-backend.c +@@ -338,9 +338,9 @@ static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs) + return refs->loose; + } + +-static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, +- struct object_id *oid, struct strbuf *referent, +- unsigned int *type, int *failure_errno) ++static int read_ref_internal(struct ref_store *ref_store, const char *refname, ++ struct object_id *oid, struct strbuf *referent, ++ unsigned int *type, int *failure_errno, int skip_packed_refs) + { + struct files_ref_store *refs = + files_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); +@@ -381,7 +381,7 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, + if (lstat(path, &st) < 0) { + int ignore_errno; + myerr = errno; +- if (myerr != ENOENT) ++ if (myerr != ENOENT || skip_packed_refs) + goto out; + if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, + referent, type, &ignore_errno)) { +@@ -425,7 +425,8 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, + * ref is supposed to be, there could still be a + * packed ref: + */ +- if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, ++ if (skip_packed_refs || ++ refs_read_raw_ref(refs->packed_ref_store, refname, oid, + referent, type, &ignore_errno)) { + myerr = EISDIR; + goto out; +@@ -470,6 +471,27 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, + return ret; + } + ++static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, ++ struct object_id *oid, struct strbuf *referent, ++ unsigned int *type, int *failure_errno) ++{ ++ return read_ref_internal(ref_store, refname, oid, referent, type, failure_errno, 0); ++} ++ ++static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refname, ++ struct strbuf *referent) ++{ ++ struct object_id oid; ++ int failure_errno, ret; ++ unsigned int type; ++ ++ ret = read_ref_internal(ref_store, refname, &oid, referent, &type, &failure_errno, 1); ++ if (ret) ++ return ret; ++ ++ return !(type & REF_ISSYMREF); ++} ++ + int parse_loose_ref_contents(const char *buf, struct object_id *oid, + struct strbuf *referent, unsigned int *type, + int *failure_errno) +@@ -3286,7 +3308,7 @@ struct ref_storage_be refs_be_files = { + + files_ref_iterator_begin, + files_read_raw_ref, +- NULL, ++ files_read_symbolic_ref, + + files_reflog_iterator_begin, + files_for_each_reflog_ent, +-- +2.35.1 + |