From e6c71f239d18af3b99af8fa2b68f16cee813d1e2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:36 -0400 Subject: midx.c: use `size_t`'s for fanout nr and alloc The `midx_fanout` struct is used to keep track of a set of OIDs corresponding to each layer of the MIDX's fanout table. It stores an array of entries, along with the number of entries in the table, and the allocated size of the array. Both `nr` and `alloc` are stored as 32-bit unsigned integers. In practice, this should never cause any problems, since most packs have far fewer than 2^32-1 objects. But storing these as `size_t`'s is more appropriate, and prevents us from accidentally overflowing some result when multiplying or adding to either of these values. Update these struct members to be `size_t`'s as appropriate. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index b500174d1f..0da2faac67 100644 --- a/midx.c +++ b/midx.c @@ -584,12 +584,14 @@ static void fill_pack_entry(uint32_t pack_int_id, struct midx_fanout { struct pack_midx_entry *entries; - uint32_t nr; - uint32_t alloc; + size_t nr, alloc; }; -static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr) +static void midx_fanout_grow(struct midx_fanout *fanout, size_t nr) { + if (nr < fanout->nr) + BUG("negative growth in midx_fanout_grow() (%"PRIuMAX" < %"PRIuMAX")", + (uintmax_t)nr, (uintmax_t)fanout->nr); ALLOC_GROW(fanout->entries, nr, fanout->alloc); } -- cgit v1.2.3 From c2b24ede229dbc6686e37c8cae1e169fc356049e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:38 -0400 Subject: midx.c: prevent overflow in `nth_midxed_object_oid()` In a similar spirit as previous commits, avoid overflow when looking up an object's OID in a MIDX when its position is greater than `2^32-1/m->hash_len`. As usual, it is perfectly OK for a MIDX to have as many as 2^32-1 objects (since we use 32-bit fields to count the number of objects at each fanout layer). But if we have more than `2^32-1/m->hash_len` number of objects, we will incorrectly perform the computation using 32-bit integers, overflowing the result. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 0da2faac67..c774cd69c7 100644 --- a/midx.c +++ b/midx.c @@ -254,7 +254,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, if (n >= m->num_objects) return NULL; - oidread(oid, m->chunk_oid_lookup + m->hash_len * n); + oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n)); return oid; } -- cgit v1.2.3 From 5675150cc3bfc03c5721edcfc49fbe43b15b5209 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:41 -0400 Subject: midx.c: prevent overflow in `nth_midxed_offset()` In a similar spirit as previous patches, avoid an overflow when looking up object offsets in the MIDX's large offset table by guarding the computation via `st_mult()`. This instance is also OK as-is, since the left operand is the result of `sizeof(...)`, which is already a `size_t`. But use `st_mult()` instead here to make it explicit that this computation is to be performed using 64-bit unsigned integers. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index c774cd69c7..cf7d06d78b 100644 --- a/midx.c +++ b/midx.c @@ -271,7 +271,8 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; - return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); + return get_be64(m->chunk_large_offsets + + st_mult(sizeof(uint64_t), offset32)); } return offset32; -- cgit v1.2.3 From cc38127439be50ade60fb2db18c7f5f0cc170a36 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:44 -0400 Subject: midx.c: store `nr`, `alloc` variables as `size_t`'s In the `write_midx_context` structure, we use two `uint32_t`'s to track the length and allocated size of the packs, and one `uint32_t` to track the number of objects in the MIDX. In practice, having these be 32-bit unsigned values shouldn't cause any problems since we are unlikely to have that many objects or packs in any real-world repository. But these values should be `size_t`'s, so change their type to reflect that. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index cf7d06d78b..909639eea5 100644 --- a/midx.c +++ b/midx.c @@ -446,14 +446,14 @@ static int idx_or_pack_name_cmp(const void *_va, const void *_vb) struct write_midx_context { struct pack_info *info; - uint32_t nr; - uint32_t alloc; + size_t nr; + size_t alloc; struct multi_pack_index *m; struct progress *progress; unsigned pack_paths_checked; struct pack_midx_entry *entries; - uint32_t entries_nr; + size_t entries_nr; uint32_t *pack_perm; uint32_t *pack_order; @@ -671,17 +671,18 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, struct pack_info *info, uint32_t nr_packs, - uint32_t *nr_objects, + size_t *nr_objects, int preferred_pack) { uint32_t cur_fanout, cur_pack, cur_object; - uint32_t alloc_objects, total_objects = 0; + size_t alloc_objects, total_objects = 0; struct midx_fanout fanout = { 0 }; struct pack_midx_entry *deduplicated_entries = NULL; uint32_t start_pack = m ? m->num_packs : 0; for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) - total_objects += info[cur_pack].p->num_objects; + total_objects = st_add(total_objects, + info[cur_pack].p->num_objects); /* * As we de-duplicate by fanout value, we expect the fanout @@ -724,7 +725,8 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, &fanout.entries[cur_object].oid)) continue; - ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); + ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1), + alloc_objects); memcpy(&deduplicated_entries[*nr_objects], &fanout.entries[cur_object], sizeof(struct pack_midx_entry)); -- cgit v1.2.3 From 2bc764c1d4d1bc12ba7d95ceebb0da54ba381be5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:46 -0400 Subject: midx.c: prevent overflow in `write_midx_internal()` When writing a MIDX, we use the chunk-format API to write out each individual chunk of the MIDX. Each chunk of the MIDX is tracked via a call to `add_chunk()`, along with the expected size of that chunk. Guard against overflow when dealing with a MIDX with a large number of entries (and consequently, large chunks within the MIDX file itself) to avoid corrupting the contents of the MIDX itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 909639eea5..606e3ae79e 100644 --- a/midx.c +++ b/midx.c @@ -1501,21 +1501,22 @@ static int write_midx_internal(const char *object_dir, add_chunk(cf, MIDX_CHUNKID_OIDFANOUT, MIDX_CHUNK_FANOUT_SIZE, write_midx_oid_fanout); add_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, - (size_t)ctx.entries_nr * the_hash_algo->rawsz, + st_mult(ctx.entries_nr, the_hash_algo->rawsz), write_midx_oid_lookup); add_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, - (size_t)ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH, + st_mult(ctx.entries_nr, MIDX_CHUNK_OFFSET_WIDTH), write_midx_object_offsets); if (ctx.large_offsets_needed) add_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, - (size_t)ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH, + st_mult(ctx.num_large_offsets, + MIDX_CHUNK_LARGE_OFFSET_WIDTH), write_midx_large_offsets); if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { ctx.pack_order = midx_pack_order(&ctx); add_chunk(cf, MIDX_CHUNKID_REVINDEX, - ctx.entries_nr * sizeof(uint32_t), + st_mult(ctx.entries_nr, sizeof(uint32_t)), write_midx_revindex); } -- cgit v1.2.3 From d67609bdded151d78522911629c9e7da7fd06640 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:49 -0400 Subject: midx.c: prevent overflow in `fill_included_packs_batch()` In a similar spirit as in previous commits, avoid an integer overflow when computing the expected size of a MIDX. (Note that this is also OK as-is, since `p->pack_size` is an `off_t`, so this computation should already be done as 64-bit integers. But again, let's use `st_mult()` to make this fact clear). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 606e3ae79e..067482a98c 100644 --- a/midx.c +++ b/midx.c @@ -1994,8 +1994,8 @@ static int fill_included_packs_batch(struct repository *r, if (open_pack_index(p) || !p->num_objects) continue; - expected_size = (size_t)(p->pack_size - * pack_info[i].referenced_objects); + expected_size = st_mult(p->pack_size, + pack_info[i].referenced_objects); expected_size /= p->num_objects; if (expected_size >= batch_size) -- cgit v1.2.3