Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2021-09-01 19:16:42 +0300
committerToon Claes <toon@gitlab.com>2021-09-01 19:16:42 +0300
commitcfddeaf96d5d55bd91421cebb40bb2ab240eef96 (patch)
tree822db999b51fd2f5396f5a8b165a0b6672c678a4
parentd24e5ced4c9546a7e963c910dbc5dc82969c7d63 (diff)
parent94003d7d1a67ebc185feb0f7954707ad2866a035 (diff)
Merge branch 'pks-git-speed-up-connectivity-checks' into 'master'
git: Speed up connectivity checks Closes git#92 See merge request gitlab-org/gitaly!3810
-rw-r--r--Makefile5
-rw-r--r--_support/git-patches/0002-revision-separate-walk-and-unsorted-flags.patch114
-rw-r--r--_support/git-patches/0003-connected-do-not-sort-input-revisions.patch167
-rw-r--r--_support/git-patches/0004-revision-stop-retrieving-reference-twice.patch56
-rw-r--r--_support/git-patches/0005-commit-graph-split-out-function-to-search-commit-pos.patch145
-rw-r--r--_support/git-patches/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch138
6 files changed, 625 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 66dc0a8f7..54154c18b 100644
--- a/Makefile
+++ b/Makefile
@@ -115,6 +115,11 @@ ifndef GIT_PATCHES
# first to make sure your patches meet our acceptance criteria. Patches
# must be put into `_support/git-patches`.
GIT_PATCHES += 0001-fetch-pack-speed-up-loading-of-refs-via-commit-graph.patch
+ GIT_PATCHES += 0002-revision-separate-walk-and-unsorted-flags.patch
+ GIT_PATCHES += 0003-connected-do-not-sort-input-revisions.patch
+ GIT_PATCHES += 0004-revision-stop-retrieving-reference-twice.patch
+ GIT_PATCHES += 0005-commit-graph-split-out-function-to-search-commit-pos.patch
+ GIT_PATCHES += 0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch
endif
ifndef GIT_BUILD_OPTIONS
diff --git a/_support/git-patches/0002-revision-separate-walk-and-unsorted-flags.patch b/_support/git-patches/0002-revision-separate-walk-and-unsorted-flags.patch
new file mode 100644
index 000000000..3cf006650
--- /dev/null
+++ b/_support/git-patches/0002-revision-separate-walk-and-unsorted-flags.patch
@@ -0,0 +1,114 @@
+From 29ef1f27fed21b5b7d3c996a01f1364e7e841917 Mon Sep 17 00:00:00 2001
+Message-Id: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Thu, 5 Aug 2021 13:25:24 +0200
+Subject: [PATCH 2/6] revision: separate walk and unsorted flags
+
+The `--no-walk` flag supports two modes: either it sorts the revisions
+given as input input or it doesn't. This is reflected in a single
+`no_walk` flag, which reflects one of the three states "walk", "don't
+walk but without sorting" and "don't walk but with sorting".
+
+Split up the flag into two separate bits, one indicating whether we
+should walk or not and one indicating whether the input should be sorted
+or not. This will allow us to more easily introduce a new flag
+`--unsorted-input`, which only impacts the sorting bit.
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ builtin/log.c | 2 +-
+ builtin/revert.c | 3 ++-
+ revision.c | 9 +++++----
+ revision.h | 7 ++-----
+ 4 files changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/builtin/log.c b/builtin/log.c
+index 3d7717ba5c..f75d87e8d7 100644
+--- a/builtin/log.c
++++ b/builtin/log.c
+@@ -637,7 +637,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
+ repo_init_revisions(the_repository, &rev, prefix);
+ rev.diff = 1;
+ rev.always_show_header = 1;
+- rev.no_walk = REVISION_WALK_NO_WALK_SORTED;
++ rev.no_walk = 1;
+ rev.diffopt.stat_width = -1; /* Scale to real terminal size */
+
+ memset(&opt, 0, sizeof(opt));
+diff --git a/builtin/revert.c b/builtin/revert.c
+index 237f2f18d4..2e13660e4b 100644
+--- a/builtin/revert.c
++++ b/builtin/revert.c
+@@ -191,7 +191,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
+ struct setup_revision_opt s_r_opt;
+ opts->revs = xmalloc(sizeof(*opts->revs));
+ repo_init_revisions(the_repository, opts->revs, NULL);
+- opts->revs->no_walk = REVISION_WALK_NO_WALK_UNSORTED;
++ opts->revs->no_walk = 1;
++ opts->revs->unsorted_input = 1;
+ if (argc < 2)
+ usage_with_options(usage_str, options);
+ if (!strcmp(argv[1], "-"))
+diff --git a/revision.c b/revision.c
+index cddd0542a6..86bbcd10d2 100644
+--- a/revision.c
++++ b/revision.c
+@@ -2651,16 +2651,17 @@ static int handle_revision_pseudo_opt(const char *submodule,
+ } else if (!strcmp(arg, "--not")) {
+ *flags ^= UNINTERESTING | BOTTOM;
+ } else if (!strcmp(arg, "--no-walk")) {
+- revs->no_walk = REVISION_WALK_NO_WALK_SORTED;
++ revs->no_walk = 1;
+ } else if (skip_prefix(arg, "--no-walk=", &optarg)) {
+ /*
+ * Detached form ("--no-walk X" as opposed to "--no-walk=X")
+ * not allowed, since the argument is optional.
+ */
++ revs->no_walk = 1;
+ if (!strcmp(optarg, "sorted"))
+- revs->no_walk = REVISION_WALK_NO_WALK_SORTED;
++ revs->unsorted_input = 0;
+ else if (!strcmp(optarg, "unsorted"))
+- revs->no_walk = REVISION_WALK_NO_WALK_UNSORTED;
++ revs->unsorted_input = 1;
+ else
+ return error("invalid argument to --no-walk");
+ } else if (!strcmp(arg, "--do-walk")) {
+@@ -3584,7 +3585,7 @@ int prepare_revision_walk(struct rev_info *revs)
+
+ if (!revs->reflog_info)
+ prepare_to_use_bloom_filter(revs);
+- if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
++ if (!revs->unsorted_input)
+ commit_list_sort_by_date(&revs->commits);
+ if (revs->no_walk)
+ return 0;
+diff --git a/revision.h b/revision.h
+index fbb068da9f..0c65a760ee 100644
+--- a/revision.h
++++ b/revision.h
+@@ -79,10 +79,6 @@ struct rev_cmdline_info {
+ } *rev;
+ };
+
+-#define REVISION_WALK_WALK 0
+-#define REVISION_WALK_NO_WALK_SORTED 1
+-#define REVISION_WALK_NO_WALK_UNSORTED 2
+-
+ struct oidset;
+ struct topo_walk_info;
+
+@@ -129,7 +125,8 @@ struct rev_info {
+ /* Traversal flags */
+ unsigned int dense:1,
+ prune:1,
+- no_walk:2,
++ no_walk:1,
++ unsorted_input:1,
+ remove_empty_trees:1,
+ simplify_history:1,
+ show_pulls:1,
+--
+2.33.0
+
diff --git a/_support/git-patches/0003-connected-do-not-sort-input-revisions.patch b/_support/git-patches/0003-connected-do-not-sort-input-revisions.patch
new file mode 100644
index 000000000..6f4f9d477
--- /dev/null
+++ b/_support/git-patches/0003-connected-do-not-sort-input-revisions.patch
@@ -0,0 +1,167 @@
+From f45022dc2fd692fd024f2eb41a86a66f19013d43 Mon Sep 17 00:00:00 2001
+Message-Id: <f45022dc2fd692fd024f2eb41a86a66f19013d43.1630319075.git.ps@pks.im>
+In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Mon, 9 Aug 2021 10:11:50 +0200
+Subject: [PATCH 3/6] connected: do not sort input revisions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In order to compute whether objects reachable from a set of tips are all
+connected, we do a revision walk with these tips as positive references
+and `--not --all`. `--not --all` will cause the revision walk to load
+all preexisting references as uninteresting, which can be very expensive
+in repositories with many references.
+
+Benchmarking the git-rev-list(1) command highlights that by far the most
+expensive single phase is initial sorting of the input revisions: after
+all references have been loaded, we first sort commits by author date.
+In a real-world repository with about 2.2 million references, it makes
+up about 40% of the total runtime of git-rev-list(1).
+
+Ultimately, the connectivity check shouldn't really bother about the
+order of input revisions at all. We only care whether we can actually
+walk all objects until we hit the cut-off point. So sorting the input is
+a complete waste of time.
+
+Introduce a new "--unsorted-input" flag to git-rev-list(1) which will
+cause it to not sort the commits and adjust the connectivity check to
+always pass the flag. This results in the following speedups, executed
+in a clone of gitlab-org/gitlab [1]:
+
+ Benchmark #1: git rev-list --objects --quiet --not --all --not $(cat newrev)
+ Time (mean ± σ): 7.639 s ± 0.065 s [User: 7.304 s, System: 0.335 s]
+ Range (min … max): 7.543 s … 7.742 s 10 runs
+
+ Benchmark #2: git rev-list --unsorted-input --objects --quiet --not --all --not $newrev
+ Time (mean ± σ): 4.995 s ± 0.044 s [User: 4.657 s, System: 0.337 s]
+ Range (min … max): 4.909 s … 5.048 s 10 runs
+
+ Summary
+ 'git rev-list --unsorted-input --objects --quiet --not --all --not $(cat newrev)' ran
+ 1.53 ± 0.02 times faster than 'git rev-list --objects --quiet --not --all --not $newrev'
+
+[1]: https://gitlab.com/gitlab-org/gitlab.git. Note that not all refs
+ are visible to clients.
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ Documentation/rev-list-options.txt | 8 +++++++-
+ connected.c | 1 +
+ revision.c | 9 +++++++++
+ t/t6000-rev-list-misc.sh | 31 ++++++++++++++++++++++++++++++
+ 4 files changed, 48 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
+index 24569b06d1..b7bd27e171 100644
+--- a/Documentation/rev-list-options.txt
++++ b/Documentation/rev-list-options.txt
+@@ -968,6 +968,11 @@ list of the missing objects. Object IDs are prefixed with a ``?'' character.
+ objects.
+ endif::git-rev-list[]
+
++--unsorted-input::
++ Show commits in the order they were given on the command line instead
++ of sorting them in reverse chronological order by commit time. Cannot
++ be combined with `--no-walk` or `--no-walk=sorted`.
++
+ --no-walk[=(sorted|unsorted)]::
+ Only show the given commits, but do not traverse their ancestors.
+ This has no effect if a range is specified. If the argument
+@@ -975,7 +980,8 @@ endif::git-rev-list[]
+ given on the command line. Otherwise (if `sorted` or no argument
+ was given), the commits are shown in reverse chronological order
+ by commit time.
+- Cannot be combined with `--graph`.
++ Cannot be combined with `--graph`. Cannot be combined with
++ `--unsorted-input` if `sorted` or no argument was given.
+
+ --do-walk::
+ Overrides a previous `--no-walk`.
+diff --git a/connected.c b/connected.c
+index b18299fdf0..b5f9523a5f 100644
+--- a/connected.c
++++ b/connected.c
+@@ -106,6 +106,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
+ if (opt->progress)
+ strvec_pushf(&rev_list.args, "--progress=%s",
+ _("Checking connectivity"));
++ strvec_push(&rev_list.args, "--unsorted-input");
+
+ rev_list.git_cmd = 1;
+ rev_list.env = opt->env;
+diff --git a/revision.c b/revision.c
+index 86bbcd10d2..47541407d2 100644
+--- a/revision.c
++++ b/revision.c
+@@ -2256,6 +2256,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
+ } else if (!strcmp(arg, "--author-date-order")) {
+ revs->sort_order = REV_SORT_BY_AUTHOR_DATE;
+ revs->topo_order = 1;
++ } else if (!strcmp(arg, "--unsorted-input")) {
++ if (revs->no_walk)
++ die(_("--unsorted-input is incompatible with --no-walk"));
++ revs->unsorted_input = 1;
+ } else if (!strcmp(arg, "--early-output")) {
+ revs->early_output = 100;
+ revs->topo_order = 1;
+@@ -2651,8 +2655,13 @@ static int handle_revision_pseudo_opt(const char *submodule,
+ } else if (!strcmp(arg, "--not")) {
+ *flags ^= UNINTERESTING | BOTTOM;
+ } else if (!strcmp(arg, "--no-walk")) {
++ if (!revs->no_walk && revs->unsorted_input)
++ die(_("--no-walk is incompatible with --unsorted-input"));
+ revs->no_walk = 1;
+ } else if (skip_prefix(arg, "--no-walk=", &optarg)) {
++ if (!revs->no_walk && revs->unsorted_input)
++ die(_("--no-walk is incompatible with --unsorted-input"));
++
+ /*
+ * Detached form ("--no-walk X" as opposed to "--no-walk=X")
+ * not allowed, since the argument is optional.
+diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh
+index 12def7bcbf..ef849e5bc8 100755
+--- a/t/t6000-rev-list-misc.sh
++++ b/t/t6000-rev-list-misc.sh
+@@ -169,4 +169,35 @@ test_expect_success 'rev-list --count --objects' '
+ test_line_count = $count actual
+ '
+
++test_expect_success 'rev-list --unsorted-input results in different sorting' '
++ git rev-list --unsorted-input HEAD HEAD~ >first &&
++ git rev-list --unsorted-input HEAD~ HEAD >second &&
++ ! test_cmp first second &&
++ sort first >first.sorted &&
++ sort second >second.sorted &&
++ test_cmp first.sorted second.sorted
++'
++
++test_expect_success 'rev-list --unsorted-input incompatible with --no-walk' '
++ cat >expect <<-EOF &&
++ fatal: --no-walk is incompatible with --unsorted-input
++ EOF
++ test_must_fail git rev-list --unsorted-input --no-walk HEAD 2>error &&
++ test_cmp expect error &&
++ test_must_fail git rev-list --unsorted-input --no-walk=sorted HEAD 2>error &&
++ test_cmp expect error &&
++ test_must_fail git rev-list --unsorted-input --no-walk=unsorted HEAD 2>error &&
++ test_cmp expect error &&
++
++ cat >expect <<-EOF &&
++ fatal: --unsorted-input is incompatible with --no-walk
++ EOF
++ test_must_fail git rev-list --no-walk --unsorted-input HEAD 2>error &&
++ test_cmp expect error &&
++ test_must_fail git rev-list --no-walk=sorted --unsorted-input HEAD 2>error &&
++ test_cmp expect error &&
++ test_must_fail git rev-list --no-walk=unsorted --unsorted-input HEAD 2>error &&
++ test_cmp expect error
++'
++
+ test_done
+--
+2.33.0
+
diff --git a/_support/git-patches/0004-revision-stop-retrieving-reference-twice.patch b/_support/git-patches/0004-revision-stop-retrieving-reference-twice.patch
new file mode 100644
index 000000000..aa00f75f1
--- /dev/null
+++ b/_support/git-patches/0004-revision-stop-retrieving-reference-twice.patch
@@ -0,0 +1,56 @@
+From bf9c0cbddbcd730e4312ba5e19f8b8a2edd65bb3 Mon Sep 17 00:00:00 2001
+Message-Id: <bf9c0cbddbcd730e4312ba5e19f8b8a2edd65bb3.1630319075.git.ps@pks.im>
+In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Mon, 9 Aug 2021 10:11:54 +0200
+Subject: [PATCH 4/6] revision: stop retrieving reference twice
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When queueing up references for the revision walk, `handle_one_ref()`
+will resolve the reference's object ID via `get_reference()` and then
+queue the ID as pending object via `add_pending_oid()`. But given that
+`add_pending_oid()` is only a thin wrapper around `add_pending_object()`
+which fist calls `get_reference()`, we effectively resolve the reference
+twice and thus duplicate some of the work.
+
+Fix the issue by instead calling `add_pending_object()` directly, which
+takes the already-resolved object as input. In a repository with lots of
+refs, this translates into a near 10% speedup:
+
+ Benchmark #1: HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev
+ Time (mean ± σ): 5.015 s ± 0.038 s [User: 4.698 s, System: 0.316 s]
+ Range (min … max): 4.970 s … 5.089 s 10 runs
+
+ Benchmark #2: HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev
+ Time (mean ± σ): 4.606 s ± 0.029 s [User: 4.260 s, System: 0.345 s]
+ Range (min … max): 4.565 s … 4.657 s 10 runs
+
+ Summary
+ 'HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' ran
+ 1.09 ± 0.01 times faster than 'HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev'
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ revision.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/revision.c b/revision.c
+index 47541407d2..80a59896b9 100644
+--- a/revision.c
++++ b/revision.c
+@@ -1534,7 +1534,7 @@ static int handle_one_ref(const char *path, const struct object_id *oid,
+
+ object = get_reference(cb->all_revs, path, oid, cb->all_flags);
+ add_rev_cmdline(cb->all_revs, object, path, REV_CMD_REF, cb->all_flags);
+- add_pending_oid(cb->all_revs, path, oid, cb->all_flags);
++ add_pending_object(cb->all_revs, object, path);
+ return 0;
+ }
+
+--
+2.33.0
+
diff --git a/_support/git-patches/0005-commit-graph-split-out-function-to-search-commit-pos.patch b/_support/git-patches/0005-commit-graph-split-out-function-to-search-commit-pos.patch
new file mode 100644
index 000000000..0964f5b98
--- /dev/null
+++ b/_support/git-patches/0005-commit-graph-split-out-function-to-search-commit-pos.patch
@@ -0,0 +1,145 @@
+From 809ea28f809e52d3204b597637b2f5e072c140f8 Mon Sep 17 00:00:00 2001
+Message-Id: <809ea28f809e52d3204b597637b2f5e072c140f8.1630319075.git.ps@pks.im>
+In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Mon, 9 Aug 2021 10:11:59 +0200
+Subject: [PATCH 5/6] commit-graph: split out function to search commit
+ position
+
+The function `find_commit_in_graph()` assumes that the caller has passed
+an object which was already determined to be a commit given that it will
+access the commit's graph position, which is stored in a commit slab. In
+a subsequent patch, we want to search for an object ID though without
+knowing whether it is a commit or not, which is not currently possible.
+
+Split out the logic to search the commit graph for a given object ID to
+prepare for this change. This commit also renames the function to
+`find_commit_pos_in_graph()`, which more accurately reflects what this
+function does. Furthermore, in order to allow for the searched object ID
+to be const, we need to adjust `bsearch_graph()`'s signature to accept a
+constant object ID as input, too.
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ commit-graph.c | 55 +++++++++++++++++++++++++++-----------------------
+ 1 file changed, 30 insertions(+), 25 deletions(-)
+
+diff --git a/commit-graph.c b/commit-graph.c
+index 3860a0d847..8c4c7262c8 100644
+--- a/commit-graph.c
++++ b/commit-graph.c
+@@ -723,7 +723,7 @@ void close_commit_graph(struct raw_object_store *o)
+ o->commit_graph = NULL;
+ }
+
+-static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
++static int bsearch_graph(struct commit_graph *g, const struct object_id *oid, uint32_t *pos)
+ {
+ return bsearch_hash(oid->hash, g->chunk_oid_fanout,
+ g->chunk_oid_lookup, g->hash_len, pos);
+@@ -864,25 +864,30 @@ static int fill_commit_in_graph(struct repository *r,
+ return 1;
+ }
+
+-static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
++static int search_commit_pos_in_graph(const struct object_id *id, struct commit_graph *g, uint32_t *pos)
++{
++ struct commit_graph *cur_g = g;
++ uint32_t lex_index;
++
++ while (cur_g && !bsearch_graph(cur_g, id, &lex_index))
++ cur_g = cur_g->base_graph;
++
++ if (cur_g) {
++ *pos = lex_index + cur_g->num_commits_in_base;
++ return 1;
++ }
++
++ return 0;
++}
++
++static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
+ {
+ uint32_t graph_pos = commit_graph_position(item);
+ if (graph_pos != COMMIT_NOT_FROM_GRAPH) {
+ *pos = graph_pos;
+ return 1;
+ } else {
+- struct commit_graph *cur_g = g;
+- uint32_t lex_index;
+-
+- while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index))
+- cur_g = cur_g->base_graph;
+-
+- if (cur_g) {
+- *pos = lex_index + cur_g->num_commits_in_base;
+- return 1;
+- }
+-
+- return 0;
++ return search_commit_pos_in_graph(&item->object.oid, g, pos);
+ }
+ }
+
+@@ -895,7 +900,7 @@ static int parse_commit_in_graph_one(struct repository *r,
+ if (item->object.parsed)
+ return 1;
+
+- if (find_commit_in_graph(item, g, &pos))
++ if (find_commit_pos_in_graph(item, g, &pos))
+ return fill_commit_in_graph(r, item, g, pos);
+
+ return 0;
+@@ -921,7 +926,7 @@ void load_commit_graph_info(struct repository *r, struct commit *item)
+ uint32_t pos;
+ if (!prepare_commit_graph(r))
+ return;
+- if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
++ if (find_commit_pos_in_graph(item, r->objects->commit_graph, &pos))
+ fill_commit_graph_info(item, r->objects->commit_graph, pos);
+ }
+
+@@ -1091,9 +1096,9 @@ static int write_graph_chunk_data(struct hashfile *f,
+ edge_value += ctx->new_num_commits_in_base;
+ else if (ctx->new_base_graph) {
+ uint32_t pos;
+- if (find_commit_in_graph(parent->item,
+- ctx->new_base_graph,
+- &pos))
++ if (find_commit_pos_in_graph(parent->item,
++ ctx->new_base_graph,
++ &pos))
+ edge_value = pos;
+ }
+
+@@ -1122,9 +1127,9 @@ static int write_graph_chunk_data(struct hashfile *f,
+ edge_value += ctx->new_num_commits_in_base;
+ else if (ctx->new_base_graph) {
+ uint32_t pos;
+- if (find_commit_in_graph(parent->item,
+- ctx->new_base_graph,
+- &pos))
++ if (find_commit_pos_in_graph(parent->item,
++ ctx->new_base_graph,
++ &pos))
+ edge_value = pos;
+ }
+
+@@ -1235,9 +1240,9 @@ static int write_graph_chunk_extra_edges(struct hashfile *f,
+ edge_value += ctx->new_num_commits_in_base;
+ else if (ctx->new_base_graph) {
+ uint32_t pos;
+- if (find_commit_in_graph(parent->item,
+- ctx->new_base_graph,
+- &pos))
++ if (find_commit_pos_in_graph(parent->item,
++ ctx->new_base_graph,
++ &pos))
+ edge_value = pos;
+ }
+
+--
+2.33.0
+
diff --git a/_support/git-patches/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch b/_support/git-patches/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch
new file mode 100644
index 000000000..c7e575811
--- /dev/null
+++ b/_support/git-patches/0006-revision-avoid-hitting-packfiles-when-commits-are-in.patch
@@ -0,0 +1,138 @@
+From f559d6d45e7e58ae1f922213948723de77ea77bd Mon Sep 17 00:00:00 2001
+Message-Id: <f559d6d45e7e58ae1f922213948723de77ea77bd.1630319075.git.ps@pks.im>
+In-Reply-To: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+References: <29ef1f27fed21b5b7d3c996a01f1364e7e841917.1630319075.git.ps@pks.im>
+From: Patrick Steinhardt <ps@pks.im>
+Date: Mon, 9 Aug 2021 10:12:03 +0200
+Subject: [PATCH 6/6] revision: avoid hitting packfiles when commits are in
+ commit-graph
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When queueing references in git-rev-list(1), we try to optimize parsing
+of commits via the commit-graph. To do so, we first look up the object's
+type, and if it is a commit we call `repo_parse_commit()` instead of
+`parse_object()`. This is quite inefficient though given that we're
+always uncompressing the object header in order to determine the type.
+Instead, we can opportunistically search the commit-graph for the object
+ID: in case it's found, we know it's a commit and can directly fill in
+the commit object without having to uncompress the object header.
+
+Expose a new function `lookup_commit_in_graph()`, which tries to find a
+commit in the commit-graph by ID, and convert `get_reference()` to use
+this function. This provides a big performance win in cases where we
+load references in a repository with lots of references pointing to
+commits. The following has been executed in a real-world repository with
+about 2.2 million refs:
+
+ Benchmark #1: HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev
+ Time (mean ± σ): 4.458 s ± 0.044 s [User: 4.115 s, System: 0.342 s]
+ Range (min … max): 4.409 s … 4.534 s 10 runs
+
+ Benchmark #2: HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev
+ Time (mean ± σ): 3.089 s ± 0.015 s [User: 2.768 s, System: 0.321 s]
+ Range (min … max): 3.061 s … 3.105 s 10 runs
+
+ Summary
+ 'HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' ran
+ 1.44 ± 0.02 times faster than 'HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev'
+
+Signed-off-by: Patrick Steinhardt <ps@pks.im>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ commit-graph.c | 24 ++++++++++++++++++++++++
+ commit-graph.h | 8 ++++++++
+ revision.c | 18 ++++++++----------
+ 3 files changed, 40 insertions(+), 10 deletions(-)
+
+diff --git a/commit-graph.c b/commit-graph.c
+index 8c4c7262c8..00614acd65 100644
+--- a/commit-graph.c
++++ b/commit-graph.c
+@@ -891,6 +891,30 @@ static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g,
+ }
+ }
+
++struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id)
++{
++ struct commit *commit;
++ uint32_t pos;
++
++ if (!repo->objects->commit_graph)
++ return NULL;
++ if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos))
++ return NULL;
++ if (!repo_has_object_file(repo, id))
++ return NULL;
++
++ commit = lookup_commit(repo, id);
++ if (!commit)
++ return NULL;
++ if (commit->object.parsed)
++ return commit;
++
++ if (!fill_commit_in_graph(repo, commit, repo->objects->commit_graph, pos))
++ return NULL;
++
++ return commit;
++}
++
+ static int parse_commit_in_graph_one(struct repository *r,
+ struct commit_graph *g,
+ struct commit *item)
+diff --git a/commit-graph.h b/commit-graph.h
+index 96c24fb577..04a94e1830 100644
+--- a/commit-graph.h
++++ b/commit-graph.h
+@@ -40,6 +40,14 @@ int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
+ */
+ int parse_commit_in_graph(struct repository *r, struct commit *item);
+
++/*
++ * Look up the given commit ID in the commit-graph. This will only return a
++ * commit if the ID exists both in the graph and in the object database such
++ * that we don't return commits whose object has been pruned. Otherwise, this
++ * function returns `NULL`.
++ */
++struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id);
++
+ /*
+ * It is possible that we loaded commit contents from the commit buffer,
+ * but we also want to ensure the commit-graph content is correctly
+diff --git a/revision.c b/revision.c
+index 80a59896b9..0dabb5a0bc 100644
+--- a/revision.c
++++ b/revision.c
+@@ -360,20 +360,18 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
+ unsigned int flags)
+ {
+ struct object *object;
++ struct commit *commit;
+
+ /*
+- * If the repository has commit graphs, repo_parse_commit() avoids
+- * reading the object buffer, so use it whenever possible.
++ * If the repository has commit graphs, we try to opportunistically
++ * look up the object ID in those graphs. Like this, we can avoid
++ * parsing commit data from disk.
+ */
+- if (oid_object_info(revs->repo, oid, NULL) == OBJ_COMMIT) {
+- struct commit *c = lookup_commit(revs->repo, oid);
+- if (!repo_parse_commit(revs->repo, c))
+- object = (struct object *) c;
+- else
+- object = NULL;
+- } else {
++ commit = lookup_commit_in_graph(revs->repo, oid);
++ if (commit)
++ object = &commit->object;
++ else
+ object = parse_object(revs->repo, oid);
+- }
+
+ if (!object) {
+ if (revs->ignore_missing)
+--
+2.33.0
+