From 3afc679b3c13d99e4f02bceb686f11d51576d3ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:11 +0000 Subject: commit: use generations in paint_down_to_common() Define compare_commits_by_gen_then_commit_date(), which uses generation numbers as a primary comparison and commit date to break ties (or as a comparison when both commits do not have computed generation numbers). Since the commit-graph file is closed under reachability, we know that all commits in the file have generation at most GENERATION_NUMBER_MAX which is less than GENERATION_NUMBER_INFINITY. This change does not affect the number of commits that are walked during the execution of paint_down_to_common(), only the order that those commits are inspected. In the case that commit dates violate topological order (i.e. a parent is "newer" than a child), the previous code could walk a commit twice: if a commit is reached with the PARENT1 bit, but later is re-visited with the PARENT2 bit, then that PARENT2 bit must be propagated to its parents. Using generation numbers avoids this extra effort, even if it is somewhat rare. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 711f674c18..4d00b0a1d6 100644 --- a/commit.c +++ b/commit.c @@ -640,6 +640,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_, return 0; } +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused) +{ + const struct commit *a = a_, *b = b_; + + /* newer commits first */ + if (a->generation < b->generation) + return 1; + else if (a->generation > b->generation) + return -1; + + /* use date as a heuristic when generations are equal */ + if (a->date < b->date) + return 1; + else if (a->date > b->date) + return -1; + return 0; +} + int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) { const struct commit *a = a_, *b = b_; @@ -789,7 +807,7 @@ static int queue_has_nonstale(struct prio_queue *queue) /* all input commits in one and twos[] must have been parsed! */ static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) { - struct prio_queue queue = { compare_commits_by_commit_date }; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; -- cgit v1.2.3 From e2838d85b6d35592ff5851d67f0232a78083ada7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:13 +0000 Subject: commit-graph: always load commit-graph information Most code paths load commits using lookup_commit() and then parse_commit(). In some cases, including some branch lookups, the commit is parsed using parse_object_buffer() which side-steps parse_commit() in favor of parse_commit_buffer(). With generation numbers in the commit-graph, we need to ensure that any commit that exists in the commit-graph file has its generation number loaded. Create new load_commit_graph_info() method to fill in the information for a commit that exists only in the commit-graph file. Call it from parse_commit_buffer() after loading the other commit information from the given buffer. Only fill this information when specified by the 'check_graph' parameter. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 4d00b0a1d6..39a3749abd 100644 --- a/commit.c +++ b/commit.c @@ -331,7 +331,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) return ret; } -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph) { const char *tail = buffer; const char *bufptr = buffer; @@ -386,6 +386,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s } item->date = parse_commit_date(bufptr, tail); + if (check_graph) + load_commit_graph_info(item); + return 0; } @@ -412,7 +415,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return error("Object %s not a commit", oid_to_hex(&item->object.oid)); } - ret = parse_commit_buffer(item, buffer, size); + ret = parse_commit_buffer(item, buffer, size, 0); if (save_commit_buffer && !ret) { set_commit_buffer(item, buffer, size); return 0; -- cgit v1.2.3 From f9b8908b85247ef60001a683c281af0080e9ee77 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:17 +0000 Subject: commit: use generation numbers for in_merge_bases() The containment algorithm for 'git branch --contains' is different from that for 'git tag --contains' in that it uses is_descendant_of() instead of contains_tag_algo(). The expensive portion of the branch algorithm is computing merge bases. When a commit-graph file exists with generation numbers computed, we can avoid this merge-base calculation when the target commit has a larger generation number than the initial commits. Performance tests were run on a copy of the Linux repository where HEAD is contained in v4.13 but no earlier tag. Also, all tags were copied to branches and 'git branch --contains' was tested: Before: 60.0s After: 0.4s Rel %: -99.3% Reported-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 39a3749abd..3ecdc13356 100644 --- a/commit.c +++ b/commit.c @@ -1056,12 +1056,19 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit * { struct commit_list *bases; int ret = 0, i; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; if (parse_commit(commit)) return ret; - for (i = 0; i < nr_reference; i++) + for (i = 0; i < nr_reference; i++) { if (parse_commit(reference[i])) return ret; + if (reference[i]->generation < min_generation) + min_generation = reference[i]->generation; + } + + if (commit->generation > min_generation) + return ret; bases = paint_down_to_common(commit, nr_reference, reference); if (commit->object.flags & PARENT2) -- cgit v1.2.3 From d7c1ec3efd0092ee665085cba4e8a2bcef95143b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:19 +0000 Subject: commit: add short-circuit to paint_down_to_common() When running 'git branch --contains', the in_merge_bases_many() method calls paint_down_to_common() to discover if a specific commit is reachable from a set of branches. Commits with lower generation number are not needed to correctly answer the containment query of in_merge_bases_many(). Add a new parameter, min_generation, to paint_down_to_common() that prevents walking commits with generation number strictly less than min_generation. If 0 is given, then there is no functional change. For in_merge_bases_many(), we can pass commit->generation as the cutoff, and this saves time during 'git branch --contains' queries that would otherwise walk "around" the commit we are inspecting. For a copy of the Linux repository, where HEAD is checked out at v4.13~100, we get the following performance improvement for 'git branch --contains' over the previous commit: Before: 0.21s After: 0.13s Rel %: -38% Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 3ecdc13356..9875feec01 100644 --- a/commit.c +++ b/commit.c @@ -808,11 +808,14 @@ static int queue_has_nonstale(struct prio_queue *queue) } /* all input commits in one and twos[] must have been parsed! */ -static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) +static struct commit_list *paint_down_to_common(struct commit *one, int n, + struct commit **twos, + int min_generation) { struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; + uint32_t last_gen = GENERATION_NUMBER_INFINITY; one->object.flags |= PARENT1; if (!n) { @@ -831,6 +834,15 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc struct commit_list *parents; int flags; + if (commit->generation > last_gen) + BUG("bad generation skip %8x > %8x at %s", + commit->generation, last_gen, + oid_to_hex(&commit->object.oid)); + last_gen = commit->generation; + + if (commit->generation < min_generation) + break; + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { @@ -879,7 +891,7 @@ static struct commit_list *merge_bases_many(struct commit *one, int n, struct co return NULL; } - list = paint_down_to_common(one, n, twos); + list = paint_down_to_common(one, n, twos, 0); while (list) { struct commit *commit = pop_commit(&list); @@ -946,7 +958,7 @@ static int remove_redundant(struct commit **array, int cnt) filled_index[filled] = j; work[filled++] = array[j]; } - common = paint_down_to_common(array[i], filled, work); + common = paint_down_to_common(array[i], filled, work, 0); if (array[i]->object.flags & PARENT2) redundant[i] = 1; for (j = 0; j < filled; j++) @@ -1070,7 +1082,7 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit * if (commit->generation > min_generation) return ret; - bases = paint_down_to_common(commit, nr_reference, reference); + bases = paint_down_to_common(commit, nr_reference, reference, commit->generation); if (commit->object.flags & PARENT2) ret = 1; clear_commit_marks(commit, all_flags); -- cgit v1.2.3 From 04bc8d1ecc30bae9589f3d77a0a89f3dca28a024 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:21 +0000 Subject: commit: use generation number in remove_redundant() The static remove_redundant() method is used to filter a list of commits by removing those that are reachable from another commit in the list. This is used to remove all possible merge- bases except a maximal, mutually independent set. To determine these commits are independent, we use a number of paint_down_to_common() walks and use the PARENT1, PARENT2 flags to determine reachability. Since we only care about reachability and not the full set of merge-bases between 'one' and 'twos', we can use the 'min_generation' parameter to short-circuit the walk. When no commit-graph exists, there is no change in behavior. For a copy of the Linux repository, we measured the following performance improvements: git merge-base v3.3 v4.5 Before: 234 ms After: 208 ms Rel %: -11% git merge-base v4.3 v4.5 Before: 102 ms After: 83 ms Rel %: -19% The experiments above were chosen to demonstrate that we are improving the filtering of the merge-base set. In the first example, more time is spent walking the history to find the set of merge bases before the remove_redundant() call. The starting commits are closer together in the second example, therefore more time is spent in remove_redundant(). The relative change in performance differs as expected. Reported-by: Jakub Narebski Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 9875feec01..1d28677dfb 100644 --- a/commit.c +++ b/commit.c @@ -949,6 +949,7 @@ static int remove_redundant(struct commit **array, int cnt) parse_commit(array[i]); for (i = 0; i < cnt; i++) { struct commit_list *common; + uint32_t min_generation = array[i]->generation; if (redundant[i]) continue; @@ -957,8 +958,12 @@ static int remove_redundant(struct commit **array, int cnt) continue; filled_index[filled] = j; work[filled++] = array[j]; + + if (array[j]->generation < min_generation) + min_generation = array[j]->generation; } - common = paint_down_to_common(array[i], filled, work, 0); + common = paint_down_to_common(array[i], filled, work, + min_generation); if (array[i]->object.flags & PARENT2) redundant[i] = 1; for (j = 0; j < filled; j++) -- cgit v1.2.3