Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-01-24 00:52:26 +0300
committerJunio C Hamano <gitster@pobox.com>2024-01-24 00:52:26 +0300
commiteed8e4df1f603dc40afb6ebcf8421d2cb1426861 (patch)
treeb9ac478613c45ac1445f43cb2a475999f38c790c /commit-graph.c
parentfbb832ce9a095a0146f6fb1a160a16614b118dc6 (diff)
parentcb88efa184c4c482af7eaac978289dd0fee513d4 (diff)
Merge branch 'tb/path-filter-fix' into seen
The Bloom filter used for path limited history traversal was broken on systems whose "char" is unsigned; update the implementation and bump the format version to 2. * tb/path-filter-fix: bloom: introduce `deinit_bloom_filters()` commit-graph: reuse existing Bloom filters where possible object.h: fix mis-aligned flag bits table commit-graph: drop unnecessary `graph_read_bloom_data_context` commit-graph.c: unconditionally load Bloom filters bloom: prepare to discard incompatible Bloom filters bloom: annotate filters with hash version commit-graph: new Bloom filter version that fixes murmur3 repo-settings: introduce commitgraph.changedPathsVersion t4216: test changed path filters with high bit paths t/helper/test-read-graph: implement `bloom-filters` mode bloom.h: make `load_bloom_filter_from_graph()` public t/helper/test-read-graph.c: extract `dump_graph_info()` gitformat-commit-graph: describe version 2 of BDAT commit-graph: ensure Bloom filters are read with consistent settings revision.c: consult Bloom filters for root commits t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
Diffstat (limited to 'commit-graph.c')
-rw-r--r--commit-graph.c68
1 files changed, 58 insertions, 10 deletions
diff --git a/commit-graph.c b/commit-graph.c
index f0fc7c6fd1..32583991f8 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -312,13 +312,10 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
return -1;
}
- g->chunk_bloom_data = chunk_start;
- g->chunk_bloom_data_size = chunk_size;
hash_version = get_be32(chunk_start);
- if (hash_version != 1)
- return 0;
-
+ g->chunk_bloom_data = chunk_start;
+ g->chunk_bloom_data_size = chunk_size;
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
g->bloom_filter_settings->hash_version = hash_version;
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
@@ -423,10 +420,10 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
graph->read_generation_data = 1;
}
- if (s->commit_graph_read_changed_paths) {
+ if (s->commit_graph_changed_paths_version) {
int res = pair_chunk_expect(cf, GRAPH_CHUNKID_BLOOMINDEXES,
- &graph->chunk_bloom_indexes,
- sizeof(uint32_t), graph->num_commits);
+ &graph->chunk_bloom_indexes,
+ sizeof(uint32_t), graph->num_commits);
if (res && res != CHUNK_NOT_FOUND)
warning(_("commit-graph changed-path index chunk is too small"));
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
@@ -510,6 +507,31 @@ static int validate_mixed_generation_chain(struct commit_graph *g)
return 0;
}
+static void validate_mixed_bloom_settings(struct commit_graph *g)
+{
+ struct bloom_filter_settings *settings = NULL;
+ for (; g; g = g->base_graph) {
+ if (!g->bloom_filter_settings)
+ continue;
+ if (!settings) {
+ settings = g->bloom_filter_settings;
+ continue;
+ }
+
+ if (g->bloom_filter_settings->bits_per_entry != settings->bits_per_entry ||
+ g->bloom_filter_settings->num_hashes != settings->num_hashes ||
+ g->bloom_filter_settings->hash_version != settings->hash_version) {
+ g->chunk_bloom_indexes = NULL;
+ g->chunk_bloom_data = NULL;
+ FREE_AND_NULL(g->bloom_filter_settings);
+
+ warning(_("disabling Bloom filters for commit-graph "
+ "layer '%s' due to incompatible settings"),
+ oid_to_hex(&g->oid));
+ }
+ }
+}
+
static int add_graph_to_chain(struct commit_graph *g,
struct commit_graph *chain,
struct object_id *oids,
@@ -633,6 +655,7 @@ struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
}
validate_mixed_generation_chain(graph_chain);
+ validate_mixed_bloom_settings(graph_chain);
free(oids);
fclose(fp);
@@ -777,6 +800,7 @@ void close_commit_graph(struct raw_object_store *o)
return;
clear_commit_graph_data_slab(&commit_graph_data_slab);
+ deinit_bloom_filters();
free_commit_graph(o->commit_graph);
o->commit_graph = NULL;
}
@@ -1114,6 +1138,7 @@ struct write_commit_graph_context {
int count_bloom_filter_not_computed;
int count_bloom_filter_trunc_empty;
int count_bloom_filter_trunc_large;
+ int count_bloom_filter_upgraded;
};
static int write_graph_chunk_fanout(struct hashfile *f,
@@ -1721,6 +1746,8 @@ static void trace2_bloom_filter_write_statistics(struct write_commit_graph_conte
ctx->count_bloom_filter_trunc_empty);
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
ctx->count_bloom_filter_trunc_large);
+ trace2_data_intmax("commit-graph", ctx->r, "filter-upgraded",
+ ctx->count_bloom_filter_upgraded);
}
static void compute_bloom_filters(struct write_commit_graph_context *ctx)
@@ -1762,6 +1789,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->count_bloom_filter_trunc_empty++;
if (computed & BLOOM_TRUNC_LARGE)
ctx->count_bloom_filter_trunc_large++;
+ } else if (computed & BLOOM_UPGRADED) {
+ ctx->count_bloom_filter_upgraded++;
} else if (computed & BLOOM_NOT_COMPUTED)
ctx->count_bloom_filter_not_computed++;
ctx->total_bloom_filter_data_size += filter
@@ -2445,6 +2474,13 @@ int write_commit_graph(struct object_directory *odb,
}
if (!commit_graph_compatible(r))
return 0;
+ if (r->settings.commit_graph_changed_paths_version < -1
+ || r->settings.commit_graph_changed_paths_version > 2) {
+ warning(_("attempting to write a commit-graph, but "
+ "'commitgraph.changedPathsVersion' (%d) is not supported"),
+ r->settings.commit_graph_changed_paths_version);
+ return 0;
+ }
CALLOC_ARRAY(ctx, 1);
ctx->r = r;
@@ -2457,6 +2493,7 @@ int write_commit_graph(struct object_directory *odb,
ctx->write_generation_data = (get_configured_generation_version(r) == 2);
ctx->num_generation_data_overflows = 0;
+ bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version;
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
bloom_settings.bits_per_entry);
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
@@ -2486,12 +2523,20 @@ int write_commit_graph(struct object_directory *odb,
g = ctx->r->objects->commit_graph;
/* We have changed-paths already. Keep them in the next graph */
- if (g && g->chunk_bloom_data) {
+ if (g && g->bloom_filter_settings) {
ctx->changed_paths = 1;
- ctx->bloom_settings = g->bloom_filter_settings;
+
+ /* don't propagate the hash_version unless unspecified */
+ if (bloom_settings.hash_version == -1)
+ bloom_settings.hash_version = g->bloom_filter_settings->hash_version;
+ bloom_settings.bits_per_entry = g->bloom_filter_settings->bits_per_entry;
+ bloom_settings.num_hashes = g->bloom_filter_settings->num_hashes;
+ bloom_settings.max_changed_paths = g->bloom_filter_settings->max_changed_paths;
}
}
+ bloom_settings.hash_version = bloom_settings.hash_version == 2 ? 2 : 1;
+
if (ctx->split) {
struct commit_graph *g = ctx->r->objects->commit_graph;
@@ -2574,6 +2619,9 @@ int write_commit_graph(struct object_directory *odb,
res = write_commit_graph_file(ctx);
+ if (ctx->changed_paths)
+ deinit_bloom_filters();
+
if (ctx->split)
mark_commit_graphs(ctx);