From 368d19b0b7fa780d84918cf4ed4a31a410ed865d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Mar 2023 11:26:49 +0000 Subject: commit-graph: refactor compute_topological_levels() This patch extracts the common code used to compute topological levels and corrected committer dates into a common routine, compute_reachable_generation_numbers(). For ease of reading, it only modifies compute_topological_levels() to use this new routine, leaving compute_generation_numbers() to be modified in the next change. This new routine dispatches to call the necessary functions to get and set the generation number for a given commit through a vtable (the compute_generation_info struct). Computing the generation number itself is done in compute_generation_from_max(), which dispatches its implementation based on the generation version requested, or issuing a BUG() for unrecognized generation versions. This does not use a vtable because the logic depends only on the generation number version, not where the data is being loaded from or being stored to. This is a subtle point that will make more sense in a future change that modifies the in-memory generation values instead of just preparing values for writing to a commit-graph file. This change looks like it adds a lot of new code. However, two upcoming changes will be quite small due to the work being done in this change. Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 106 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 83 insertions(+), 23 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index c11b59f28b..4356c8c1f4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1446,24 +1446,52 @@ static void close_reachable(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } -static void compute_topological_levels(struct write_commit_graph_context *ctx) +struct compute_generation_info { + struct repository *r; + struct packed_commit_list *commits; + struct progress *progress; + int progress_cnt; + + timestamp_t (*get_generation)(struct commit *c, void *data); + void (*set_generation)(struct commit *c, timestamp_t gen, void *data); + void *data; +}; + +static timestamp_t compute_generation_from_max(struct commit *c, + timestamp_t max_gen, + int generation_version) +{ + switch (generation_version) { + case 1: /* topological levels */ + if (max_gen > GENERATION_NUMBER_V1_MAX - 1) + max_gen = GENERATION_NUMBER_V1_MAX - 1; + return max_gen + 1; + + case 2: /* corrected commit date */ + if (c->date && c->date > max_gen) + max_gen = c->date - 1; + return max_gen + 1; + + default: + BUG("attempting unimplemented version"); + } +} + +static void compute_reachable_generation_numbers( + struct compute_generation_info *info, + int generation_version) { int i; struct commit_list *list = NULL; - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Computing commit graph topological levels"), - ctx->commits.nr); - for (i = 0; i < ctx->commits.nr; i++) { - struct commit *c = ctx->commits.list[i]; - uint32_t level; - - repo_parse_commit(ctx->r, c); - level = *topo_level_slab_at(ctx->topo_levels, c); + for (i = 0; i < info->commits->nr; i++) { + struct commit *c = info->commits->list[i]; + timestamp_t gen; + repo_parse_commit(info->r, c); + gen = info->get_generation(c, info->data); + display_progress(info->progress, info->progress_cnt + 1); - display_progress(ctx->progress, i + 1); - if (level != GENERATION_NUMBER_ZERO) + if (gen != GENERATION_NUMBER_ZERO && gen != GENERATION_NUMBER_INFINITY) continue; commit_list_insert(c, &list); @@ -1471,31 +1499,63 @@ static void compute_topological_levels(struct write_commit_graph_context *ctx) struct commit *current = list->item; struct commit_list *parent; int all_parents_computed = 1; - uint32_t max_level = 0; + uint32_t max_gen = 0; for (parent = current->parents; parent; parent = parent->next) { - repo_parse_commit(ctx->r, parent->item); - level = *topo_level_slab_at(ctx->topo_levels, parent->item); + repo_parse_commit(info->r, parent->item); + gen = info->get_generation(parent->item, info->data); - if (level == GENERATION_NUMBER_ZERO) { + if (gen == GENERATION_NUMBER_ZERO) { all_parents_computed = 0; commit_list_insert(parent->item, &list); break; } - if (level > max_level) - max_level = level; + if (gen > max_gen) + max_gen = gen; } if (all_parents_computed) { pop_commit(&list); - - if (max_level > GENERATION_NUMBER_V1_MAX - 1) - max_level = GENERATION_NUMBER_V1_MAX - 1; - *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1; + gen = compute_generation_from_max( + current, max_gen, + generation_version); + info->set_generation(current, gen, info->data); } } } +} + +static timestamp_t get_topo_level(struct commit *c, void *data) +{ + struct write_commit_graph_context *ctx = data; + return *topo_level_slab_at(ctx->topo_levels, c); +} + +static void set_topo_level(struct commit *c, timestamp_t t, void *data) +{ + struct write_commit_graph_context *ctx = data; + *topo_level_slab_at(ctx->topo_levels, c) = (uint32_t)t; +} + +static void compute_topological_levels(struct write_commit_graph_context *ctx) +{ + struct compute_generation_info info = { + .r = ctx->r, + .commits = &ctx->commits, + .get_generation = get_topo_level, + .set_generation = set_topo_level, + .data = ctx, + }; + + if (ctx->report_progress) + info.progress = ctx->progress + = start_delayed_progress( + _("Computing commit graph topological levels"), + ctx->commits.nr); + + compute_reachable_generation_numbers(&info, 1); + stop_progress(&ctx->progress); } -- cgit v1.2.3 From 80c928d947c257ef4d9f13c358117d0c1914f71f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Mar 2023 11:26:50 +0000 Subject: commit-graph: simplify compute_generation_numbers() The previous change introduced the generic algorithm compute_reachable_generation_numbers() and used it as the core functionality of compute_topological_levels(). Now, use it as the core functionality of compute_generation_numbers(). The main difference here is that we use generation version 2, which is used in to toggle the logic in compute_generation_from_max() for computing the corrected commit date based on the corrected commit dates of the parent commits (and the commit date of the current commit). It also uses different methods for (get|set)_generation in the vtable in order to store and access the value in the correct places. Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 64 +++++++++++++++++++--------------------------------------- 1 file changed, 21 insertions(+), 43 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 4356c8c1f4..d1c98681e8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1559,13 +1559,31 @@ static void compute_topological_levels(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static timestamp_t get_generation_from_graph_data(struct commit *c, void *data) +{ + return commit_graph_data_at(c)->generation; +} + +static void set_generation_v2(struct commit *c, timestamp_t t, void *data) +{ + struct commit_graph_data *g = commit_graph_data_at(c); + g->generation = (uint32_t)t; +} + static void compute_generation_numbers(struct write_commit_graph_context *ctx) { int i; - struct commit_list *list = NULL; + struct compute_generation_info info = { + .r = ctx->r, + .commits = &ctx->commits, + .get_generation = get_generation_from_graph_data, + .set_generation = set_generation_v2, + .data = ctx, + }; if (ctx->report_progress) - ctx->progress = start_delayed_progress( + info.progress = ctx->progress + = start_delayed_progress( _("Computing commit graph generation numbers"), ctx->commits.nr); @@ -1577,47 +1595,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) } } - for (i = 0; i < ctx->commits.nr; i++) { - struct commit *c = ctx->commits.list[i]; - timestamp_t corrected_commit_date; - - repo_parse_commit(ctx->r, c); - corrected_commit_date = commit_graph_data_at(c)->generation; - - display_progress(ctx->progress, i + 1); - if (corrected_commit_date != GENERATION_NUMBER_ZERO) - continue; - - commit_list_insert(c, &list); - while (list) { - struct commit *current = list->item; - struct commit_list *parent; - int all_parents_computed = 1; - timestamp_t max_corrected_commit_date = 0; - - for (parent = current->parents; parent; parent = parent->next) { - repo_parse_commit(ctx->r, parent->item); - corrected_commit_date = commit_graph_data_at(parent->item)->generation; - - if (corrected_commit_date == GENERATION_NUMBER_ZERO) { - all_parents_computed = 0; - commit_list_insert(parent->item, &list); - break; - } - - if (corrected_commit_date > max_corrected_commit_date) - max_corrected_commit_date = corrected_commit_date; - } - - if (all_parents_computed) { - pop_commit(&list); - - if (current->date && current->date > max_corrected_commit_date) - max_corrected_commit_date = current->date - 1; - commit_graph_data_at(current)->generation = max_corrected_commit_date + 1; - } - } - } + compute_reachable_generation_numbers(&info, 2); for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.list[i]; -- cgit v1.2.3 From 2ee11f7261cc7ac386ec683f774472b0309dcc82 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Mar 2023 11:26:51 +0000 Subject: commit-graph: return generation from memory The commit_graph_generation() method used to report a value of GENERATION_NUMBER_INFINITY if the commit_graph_data_slab had an instance for the given commit but the graph_pos indicated the commit was not in the commit-graph file. However, an upcoming change will introduce the ability to set generation values in-memory without writing the commit-graph file. Thus, we can no longer trust 'graph_pos' to indicate whether or not the generation member can be trusted. Instead, trust the 'generation' member if the commit has a value in the slab _and_ the 'generation' member is non-zero. Otherwise, treat it as GENERATION_NUMBER_INFINITY. This only makes a difference for a very old case for the commit-graph: the very first Git release to write commit-graph files wrote zeroes in the topological level positions. If we are parsing a commit-graph with all zeroes, those commits will now appear to have GENERATION_NUMBER_INFINITY (as if they were not parsed from the commit-graph). I attempted several variations to work around the need for providing an uninitialized 'generation' member, but this was the best one I found. It does require a change to a verification test in t5318 because it reports a different error than the one about non-zero generation numbers. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index d1c98681e8..63a56483cf 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -116,12 +116,10 @@ timestamp_t commit_graph_generation(const struct commit *c) struct commit_graph_data *data = commit_graph_data_slab_peek(&commit_graph_data_slab, c); - if (!data) - return GENERATION_NUMBER_INFINITY; - else if (data->graph_pos == COMMIT_NOT_FROM_GRAPH) - return GENERATION_NUMBER_INFINITY; + if (data && data->generation) + return data->generation; - return data->generation; + return GENERATION_NUMBER_INFINITY; } static struct commit_graph_data *commit_graph_data_at(const struct commit *c) -- cgit v1.2.3 From c08645b353514fe14dbd62cf52afd49d0e88146b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 20 Mar 2023 11:26:52 +0000 Subject: commit-graph: introduce `ensure_generations_valid()` Use the just-introduced compute_reachable_generation_numbers_1() to implement a function which dynamically computes topological levels (or corrected commit dates) for out-of-graph commits. This will be useful for the ahead-behind algorithm we are about to introduce, which needs accurate topological levels on _all_ commits reachable from the tips in order to avoid over-counting. Co-authored-by: Derrick Stolee Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 63a56483cf..172e679db1 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1604,6 +1604,35 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static void set_generation_in_graph_data(struct commit *c, timestamp_t t, + void *data) +{ + commit_graph_data_at(c)->generation = t; +} + +/* + * After this method, all commits reachable from those in the given + * list will have non-zero, non-infinite generation numbers. + */ +void ensure_generations_valid(struct repository *r, + struct commit **commits, size_t nr) +{ + int generation_version = get_configured_generation_version(r); + struct packed_commit_list list = { + .list = commits, + .alloc = nr, + .nr = nr, + }; + struct compute_generation_info info = { + .r = r, + .commits = &list, + .get_generation = get_generation_from_graph_data, + .set_generation = set_generation_in_graph_data, + }; + + compute_reachable_generation_numbers(&info, generation_version); +} + static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx) { trace2_data_intmax("commit-graph", ctx->r, "filter-computed", -- cgit v1.2.3