From 1217c03e7b87b15f2c78af5b1e1915a675050454 Mon Sep 17 00:00:00 2001 From: Garima Singh Date: Mon, 6 Apr 2020 16:59:50 +0000 Subject: commit-graph: reuse existing Bloom filters during write Add logic to a) parse Bloom filter information from the commit graph file and, b) re-use existing Bloom filters. See Documentation/technical/commit-graph-format for the format in which the Bloom filter information is written to the commit graph file. To read Bloom filter for a given commit with lexicographic position 'i' we need to: 1. Read BIDX[i] which essentially gives us the starting index in BDAT for filter of commit i+1. It is essentially the index past the end of the filter of commit i. It is called end_index in the code. 2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT for filter of commit i. It is called the start_index in the code. For the first commit, where i = 0, Bloom filter data starts at the beginning, just past the header in the BDAT chunk. Hence, start_index will be 0. 3. The length of the filter will be end_index - start_index, because BIDX[i] gives the cumulative 8-byte words including the ith commit's filter. We toggle whether Bloom filters should be recomputed based on the compute_if_not_present flag. Helped-by: Derrick Stolee Signed-off-by: Garima Singh Signed-off-by: Junio C Hamano --- bloom.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'bloom.c') diff --git a/bloom.c b/bloom.c index a16eee9233..0f714dd76a 100644 --- a/bloom.c +++ b/bloom.c @@ -4,6 +4,8 @@ #include "diffcore.h" #include "revision.h" #include "hashmap.h" +#include "commit-graph.h" +#include "commit.h" define_commit_slab(bloom_filter_slab, struct bloom_filter); @@ -26,6 +28,36 @@ static inline unsigned char get_bitmask(uint32_t pos) return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); } +static int load_bloom_filter_from_graph(struct commit_graph *g, + struct bloom_filter *filter, + struct commit *c) +{ + uint32_t lex_pos, start_index, end_index; + + while (c->graph_pos < g->num_commits_in_base) + g = g->base_graph; + + /* The commit graph commit 'c' lives in doesn't carry bloom filters. */ + if (!g->chunk_bloom_indexes) + return 0; + + lex_pos = c->graph_pos - g->num_commits_in_base; + + end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos); + + if (lex_pos > 0) + start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1)); + else + start_index = 0; + + filter->len = end_index - start_index; + filter->data = (unsigned char *)(g->chunk_bloom_data + + sizeof(unsigned char) * start_index + + BLOOMDATA_CHUNK_HEADER_SIZE); + + return 1; +} + /* * Calculate the murmur3 32-bit hash value for the given data * using the given seed. @@ -127,7 +159,8 @@ void init_bloom_filters(void) } struct bloom_filter *get_bloom_filter(struct repository *r, - struct commit *c) + struct commit *c, + int compute_if_not_present) { struct bloom_filter *filter; struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; @@ -140,6 +173,20 @@ struct bloom_filter *get_bloom_filter(struct repository *r, filter = bloom_filter_slab_at(&bloom_filters, c); + if (!filter->data) { + load_commit_graph_info(r, c); + if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && + r->objects->commit_graph->chunk_bloom_indexes) { + if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) + return filter; + else + return NULL; + } + } + + if (filter->data || !compute_if_not_present) + return filter; + repo_diff_setup(r, &diffopt); diffopt.flags.recursive = 1; diffopt.max_changes = max_changes; -- cgit v1.2.3