From 66f0c71073ee5fe1c9d12d2952305a4793d7b43f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:23:39 -0500 Subject: pack-objects: free packing_data in more places The pack-objects internals use a packing_data struct to track what objects are part of the pack(s) being formed. Since these structures contain allocated fields, failing to appropriately free() them results in a leak. Plug that leak by introducing a clear_packing_data() function, and call it in the appropriate spots. This is a fairly straightforward leak to plug, since none of the callers expect to read any values or have any references to parts of the address space being freed. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 1 + 1 file changed, 1 insertion(+) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 89a8b5a976..321d7effb0 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4522,6 +4522,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) reuse_packfile_objects); cleanup: + clear_packing_data(&to_pack); list_objects_filter_release(&filter_options); strvec_clear(&rp); -- cgit v1.2.3 From 35e156b9de1dcc43673c6050cdb65735a7457c1a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:01 -0500 Subject: pack-bitmap: simplify `reuse_partial_packfile_from_bitmap()` signature The signature of `reuse_partial_packfile_from_bitmap()` currently takes in a bitmap, as well as three output parameters (filled through pointers, and passed as arguments), and also returns an integer result. The output parameters are filled out with: (a) the packfile used for pack-reuse, (b) the number of objects from that pack that we can reuse, and (c) a bitmap indicating which objects we can reuse. The return value is either -1 (when there are no objects to reuse), or 0 (when there is at least one object to reuse). Some of these parameters are redundant. Notably, we can infer from the bitmap how many objects are reused by calling bitmap_popcount(). And we can similar compute the return value based on that number as well. As such, clean up the signature of this function to drop the "*entries" parameter, as well as the int return value, since the single caller of this function can infer these values themself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 321d7effb0..c3df6d9657 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3943,13 +3943,15 @@ static int get_object_list_from_bitmap(struct rev_info *revs) if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; - if (pack_options_allow_reuse() && - !reuse_partial_packfile_from_bitmap( - bitmap_git, - &reuse_packfile, - &reuse_packfile_objects, - &reuse_packfile_bitmap)) { - assert(reuse_packfile_objects); + if (pack_options_allow_reuse()) + reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfile, + &reuse_packfile_bitmap); + + if (reuse_packfile) { + reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); + if (!reuse_packfile_objects) + BUG("expected non-empty reuse bitmap"); + nr_result += reuse_packfile_objects; nr_seen += reuse_packfile_objects; display_progress(progress_state, nr_seen); -- cgit v1.2.3 From 83296d20e84e248ea539fe1332fca2139cfcfb8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:04 -0500 Subject: pack-bitmap: return multiple packs via `reuse_partial_packfile_from_bitmap()` Further prepare for enabling verbatim pack-reuse over multiple packfiles by changing the signature of reuse_partial_packfile_from_bitmap() to populate an array of `struct bitmapped_pack *`'s instead of a pointer to a single packfile. Since the array we're filling out is sized dynamically[^1], add an additional `size_t *` parameter which will hold the number of reusable packs (equal to the number of elements in the array). Note that since we still have not implemented true multi-pack reuse, these changes aren't propagated out to the rest of the caller in builtin/pack-objects.c. In the interim state, we expect that the array has a single element, and we use that element to fill out the static `reuse_packfile` variable (which is a bog-standard `struct packed_git *`). Future commits will continue to push this change further out through the pack-objects code. [^1]: That is, even though we know the number of packs which are candidates for pack-reuse, we do not know how many of those candidates we can actually reuse. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c3df6d9657..87e16636a8 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3940,14 +3940,19 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { + struct bitmapped_pack *packs = NULL; + size_t packs_nr = 0; + if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; if (pack_options_allow_reuse()) - reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfile, + reuse_partial_packfile_from_bitmap(bitmap_git, &packs, + &packs_nr, &reuse_packfile_bitmap); - if (reuse_packfile) { + if (packs) { + reuse_packfile = packs[0].p; reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); if (!reuse_packfile_objects) BUG("expected non-empty reuse bitmap"); -- cgit v1.2.3 From 5e29c3f70749f9f2102185e7450c54a3637fdee0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:07 -0500 Subject: pack-objects: parameterize pack-reuse routines over a single pack The routines pack-objects uses to perform verbatim pack-reuse are: - write_reused_pack_one() - write_reused_pack_verbatim() - write_reused_pack() , all of which assume that there is exactly one packfile being reused: the global constant `reuse_packfile`. Prepare for reusing objects from multiple packs by making reuse packfile a parameter of each of the above functions in preparation for calling these functions in a loop with multiple packfiles. Note that we still have the global "reuse_packfile", but pass it through each of the above function's parameter lists, eliminating all but one direct access (the top-level caller in `write_pack_file()`). Even after this series, we will still have a global, but it will hold the array of reusable packfiles, and we'll pass them one at a time to these functions in a loop. Note also that we will eventually need to pass a `bitmapped_pack` instead of a `packed_git` in order to hold onto additional information required for reuse (such as the bit position of the first object belonging to that pack). But that change will be made in a future commit so as to minimize the noise below as much as possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 87e16636a8..102fe9a4f8 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1013,7 +1013,8 @@ static off_t find_reused_offset(off_t where) return reused_chunks[lo-1].difference; } -static void write_reused_pack_one(size_t pos, struct hashfile *out, +static void write_reused_pack_one(struct packed_git *reuse_packfile, + size_t pos, struct hashfile *out, struct pack_window **w_curs) { off_t offset, next, cur; @@ -1091,7 +1092,8 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset); } -static size_t write_reused_pack_verbatim(struct hashfile *out, +static size_t write_reused_pack_verbatim(struct packed_git *reuse_packfile, + struct hashfile *out, struct pack_window **w_curs) { size_t pos = 0; @@ -1118,14 +1120,15 @@ static size_t write_reused_pack_verbatim(struct hashfile *out, return pos; } -static void write_reused_pack(struct hashfile *f) +static void write_reused_pack(struct packed_git *reuse_packfile, + struct hashfile *f) { size_t i = 0; uint32_t offset; struct pack_window *w_curs = NULL; if (allow_ofs_delta) - i = write_reused_pack_verbatim(f, &w_curs); + i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs); for (; i < reuse_packfile_bitmap->word_alloc; ++i) { eword_t word = reuse_packfile_bitmap->words[i]; @@ -1141,7 +1144,8 @@ static void write_reused_pack(struct hashfile *f) * bitmaps. See comment in try_partial_reuse() * for why. */ - write_reused_pack_one(pos + offset, f, &w_curs); + write_reused_pack_one(reuse_packfile, pos + offset, f, + &w_curs); display_progress(progress_state, ++written); } } @@ -1199,7 +1203,7 @@ static void write_pack_file(void) if (reuse_packfile) { assert(pack_to_stdout); - write_reused_pack(f); + write_reused_pack(reuse_packfile, f); offset = hashfile_total(f); } -- cgit v1.2.3 From d1d701eb9ce2293588aabf34c69335d49640f968 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:09 -0500 Subject: pack-objects: keep track of `pack_start` for each reuse pack When reusing objects from a pack, we keep track of a set of one or more `reused_chunk`s, corresponding to sections of one or more object(s) from a source pack that we are reusing. Each chunk contains two pieces of information: - the offset of the first object in the source pack (relative to the beginning of the source pack) - the difference between that offset, and the corresponding offset in the pack we're generating The purpose of keeping track of these is so that we can patch an OFS_DELTAs that cross over a section of the reuse pack that we didn't take. For instance, consider a hypothetical pack as shown below: (chunk #2) __________... / / +--------+---------+-------------------+---------+ ... | | | (unused) | | ... +--------+---------+-------------------+---------+ \ / \______________/ (chunk #1) Suppose that we are sending objects "base", "other", and "delta", and that the "delta" object is stored as an OFS_DELTA, and that its base is "base". If we don't send any objects in the "(unused)" range, we can't copy the delta'd object directly, since its delta offset includes a range of the pack that we didn't copy, so we have to account for that difference when patching and reassembling the delta. In order to compute this value correctly, we need to know not only where we are in the packfile we're assembling (with `hashfile_total(f)`) but also the position of the first byte of the packfile that we are currently reusing. Currently, this works just fine, since when reusing only a single pack those two values are always identical (because verbatim reuse is the first thing pack-objects does when enabled after writing the pack header). But when reusing multiple packs which have one or more gaps, we'll need to account for these two values diverging. Together, these two allow us to compute the reused chunk's offset difference relative to the start of the reused pack, as desired. Helped-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 102fe9a4f8..f51b86d99f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1015,6 +1015,7 @@ static off_t find_reused_offset(off_t where) static void write_reused_pack_one(struct packed_git *reuse_packfile, size_t pos, struct hashfile *out, + off_t pack_start, struct pack_window **w_curs) { off_t offset, next, cur; @@ -1024,7 +1025,8 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile, offset = pack_pos_to_offset(reuse_packfile, pos); next = pack_pos_to_offset(reuse_packfile, pos + 1); - record_reused_object(offset, offset - hashfile_total(out)); + record_reused_object(offset, + offset - (hashfile_total(out) - pack_start)); cur = offset; type = unpack_object_header(reuse_packfile, w_curs, &cur, &size); @@ -1094,6 +1096,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile, static size_t write_reused_pack_verbatim(struct packed_git *reuse_packfile, struct hashfile *out, + off_t pack_start UNUSED, struct pack_window **w_curs) { size_t pos = 0; @@ -1125,10 +1128,12 @@ static void write_reused_pack(struct packed_git *reuse_packfile, { size_t i = 0; uint32_t offset; + off_t pack_start = hashfile_total(f) - sizeof(struct pack_header); struct pack_window *w_curs = NULL; if (allow_ofs_delta) - i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs); + i = write_reused_pack_verbatim(reuse_packfile, f, pack_start, + &w_curs); for (; i < reuse_packfile_bitmap->word_alloc; ++i) { eword_t word = reuse_packfile_bitmap->words[i]; @@ -1145,7 +1150,7 @@ static void write_reused_pack(struct packed_git *reuse_packfile, * for why. */ write_reused_pack_one(reuse_packfile, pos + offset, f, - &w_curs); + pack_start, &w_curs); display_progress(progress_state, ++written); } } -- cgit v1.2.3 From 073b40eba0064ad3bd9bfad89a050208bc033890 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:12 -0500 Subject: pack-objects: pass `bitmapped_pack`'s to pack-reuse functions Further prepare pack-objects to perform verbatim pack-reuse over multiple packfiles by converting functions that take in a pointer to a `struct packed_git` to instead take in a pointer to a `struct bitmapped_pack`. The additional information found in the bitmapped_pack struct (such as the bit position corresponding to the beginning of the pack) will be necessary in order to perform verbatim pack-reuse. Note that we don't use any of the extra pieces of information contained in the bitmapped_pack struct, so this step is merely preparatory and does not introduce any functional changes. Note further that we do not change the argument type to write_reused_pack_one(). That function is responsible for copying sections of the packfile directly and optionally patching any OFS_DELTAs to account for not reusing sections of the packfile in between a delta and its base. As such, that function is (and should remain) oblivious to multi-pack reuse, and does not require any of the extra pieces of information stored in the bitmapped_pack struct. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f51b86d99f..07c849b5d4 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -221,7 +221,8 @@ static int thin; static int num_preferred_base; static struct progress *progress_state; -static struct packed_git *reuse_packfile; +static struct bitmapped_pack *reuse_packfiles; +static size_t reuse_packfiles_nr; static uint32_t reuse_packfile_objects; static struct bitmap *reuse_packfile_bitmap; @@ -1094,7 +1095,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile, copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset); } -static size_t write_reused_pack_verbatim(struct packed_git *reuse_packfile, +static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile, struct hashfile *out, off_t pack_start UNUSED, struct pack_window **w_curs) @@ -1109,13 +1110,13 @@ static size_t write_reused_pack_verbatim(struct packed_git *reuse_packfile, off_t to_write; written = (pos * BITS_IN_EWORD); - to_write = pack_pos_to_offset(reuse_packfile, written) + to_write = pack_pos_to_offset(reuse_packfile->p, written) - sizeof(struct pack_header); /* We're recording one chunk, not one object. */ record_reused_object(sizeof(struct pack_header), 0); hashflush(out); - copy_pack_data(out, reuse_packfile, w_curs, + copy_pack_data(out, reuse_packfile->p, w_curs, sizeof(struct pack_header), to_write); display_progress(progress_state, written); @@ -1123,7 +1124,7 @@ static size_t write_reused_pack_verbatim(struct packed_git *reuse_packfile, return pos; } -static void write_reused_pack(struct packed_git *reuse_packfile, +static void write_reused_pack(struct bitmapped_pack *reuse_packfile, struct hashfile *f) { size_t i = 0; @@ -1149,8 +1150,8 @@ static void write_reused_pack(struct packed_git *reuse_packfile, * bitmaps. See comment in try_partial_reuse() * for why. */ - write_reused_pack_one(reuse_packfile, pos + offset, f, - pack_start, &w_curs); + write_reused_pack_one(reuse_packfile->p, pos + offset, + f, pack_start, &w_curs); display_progress(progress_state, ++written); } } @@ -1206,9 +1207,12 @@ static void write_pack_file(void) offset = write_pack_header(f, nr_remaining); - if (reuse_packfile) { + if (reuse_packfiles_nr) { assert(pack_to_stdout); - write_reused_pack(reuse_packfile, f); + for (j = 0; j < reuse_packfiles_nr; j++) { + reused_chunks_nr = 0; + write_reused_pack(&reuse_packfiles[j], f); + } offset = hashfile_total(f); } @@ -3949,19 +3953,16 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - struct bitmapped_pack *packs = NULL; - size_t packs_nr = 0; - if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; if (pack_options_allow_reuse()) - reuse_partial_packfile_from_bitmap(bitmap_git, &packs, - &packs_nr, + reuse_partial_packfile_from_bitmap(bitmap_git, + &reuse_packfiles, + &reuse_packfiles_nr, &reuse_packfile_bitmap); - if (packs) { - reuse_packfile = packs[0].p; + if (reuse_packfiles) { reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); if (!reuse_packfile_objects) BUG("expected non-empty reuse bitmap"); -- cgit v1.2.3 From 48051257101bc4689f91b4ea2eec9606d71686dc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:14 -0500 Subject: pack-objects: prepare `write_reused_pack()` for multi-pack reuse The function `write_reused_pack()` within `builtin/pack-objects.c` is responsible for performing pack-reuse on a single pack, and has two main functions: - it dispatches a call to `write_reused_pack_verbatim()` to see if we can reuse portions of the packfile in whole-word chunks - for any remaining objects (that is, any objects that appear after the first "gap" in the bitmap), call write_reused_pack_one() on that object to record it for reuse. Prepare this function for multi-pack reuse by removing the assumption that the bit position corresponding to the first object being reused from a given pack must be at bit position zero. The changes in this function are mostly straightforward. Initialize `i` to the position of the first word to contain bits corresponding to that reuse pack. In most situations, we throw the initialized value away, since we end up replacing it with the return value from write_reused_pack_verbatim(), moving us past the section of whole words that we reused. Likewise, modify the per-object loop to ignore any bits at the beginning of the first word that do not belong to the pack currently being reused, as well as skip to the "done" section once we have processed the last bit corresponding to this pack. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 07c849b5d4..6ce52d88a9 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1127,7 +1127,7 @@ static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile, static void write_reused_pack(struct bitmapped_pack *reuse_packfile, struct hashfile *f) { - size_t i = 0; + size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD; uint32_t offset; off_t pack_start = hashfile_total(f) - sizeof(struct pack_header); struct pack_window *w_curs = NULL; @@ -1145,17 +1145,23 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile, break; offset += ewah_bit_ctz64(word >> offset); + if (pos + offset < reuse_packfile->bitmap_pos) + continue; + if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr) + goto done; /* * Can use bit positions directly, even for MIDX * bitmaps. See comment in try_partial_reuse() * for why. */ - write_reused_pack_one(reuse_packfile->p, pos + offset, + write_reused_pack_one(reuse_packfile->p, + pos + offset - reuse_packfile->bitmap_pos, f, pack_start, &w_curs); display_progress(progress_state, ++written); } } +done: unuse_pack(&w_curs); } -- cgit v1.2.3 From ca0fd69e37132acdddc457b96a91ef528c7c312b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:17 -0500 Subject: pack-objects: prepare `write_reused_pack_verbatim()` for multi-pack reuse The function `write_reused_pack_verbatim()` within `builtin/pack-objects.c` is responsible for writing out a continuous set of objects beginning at the start of the reuse packfile. In the existing implementation, we did something like: while (pos < reuse_packfile_bitmap->word_alloc && reuse_packfile_bitmap->words[pos] == (eword_t)~0) pos++; if (pos) /* write first `pos * BITS_IN_WORD` objects from pack */ as an optimization to record a single chunk for the longest continuous prefix of objects wanted out of the reuse pack, instead of having a chunk for each individual object. For more details, see bb514de356 (pack-objects: improve partial packfile reuse, 2019-12-18). In order to retain this optimization in a multi-pack reuse world, we can no longer assume that the first object in a pack is on a word boundary in the bitmap storing the set of reusable objects. Assuming that all objects from the beginning of the reuse packfile up to the object corresponding to the first bit on a word boundary are part of the result, consume whole words at a time until the last whole word belonging to the reuse packfile. Copy those objects to the resulting packfile, and track that we reused them by recording a single chunk. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 73 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 13 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6ce52d88a9..31053128fc 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1097,31 +1097,78 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile, static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile, struct hashfile *out, - off_t pack_start UNUSED, + off_t pack_start, struct pack_window **w_curs) { - size_t pos = 0; + size_t pos = reuse_packfile->bitmap_pos; + size_t end; - while (pos < reuse_packfile_bitmap->word_alloc && - reuse_packfile_bitmap->words[pos] == (eword_t)~0) - pos++; + if (pos % BITS_IN_EWORD) { + size_t word_pos = (pos / BITS_IN_EWORD); + size_t offset = pos % BITS_IN_EWORD; + size_t last; + eword_t word = reuse_packfile_bitmap->words[word_pos]; - if (pos) { - off_t to_write; + if (offset + reuse_packfile->bitmap_nr < BITS_IN_EWORD) + last = offset + reuse_packfile->bitmap_nr; + else + last = BITS_IN_EWORD; + + for (; offset < last; offset++) { + if (word >> offset == 0) + return word_pos; + if (!bitmap_get(reuse_packfile_bitmap, + word_pos * BITS_IN_EWORD + offset)) + return word_pos; + } + + pos += BITS_IN_EWORD - (pos % BITS_IN_EWORD); + } + + /* + * Now we're going to copy as many whole eword_t's as possible. + * "end" is the index of the last whole eword_t we copy, but + * there may be additional bits to process. Those are handled + * individually by write_reused_pack(). + * + * Begin by advancing to the first word boundary in range of the + * bit positions occupied by objects in "reuse_packfile". Then + * pick the last word boundary in the same range. If we have at + * least one word's worth of bits to process, continue on. + */ + end = reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr; + if (end % BITS_IN_EWORD) + end -= end % BITS_IN_EWORD; + if (pos >= end) + return reuse_packfile->bitmap_pos / BITS_IN_EWORD; + + while (pos < end && + reuse_packfile_bitmap->words[pos / BITS_IN_EWORD] == (eword_t)~0) + pos += BITS_IN_EWORD; + + if (pos > end) + pos = end; + + if (reuse_packfile->bitmap_pos < pos) { + off_t pack_start_off = pack_pos_to_offset(reuse_packfile->p, 0); + off_t pack_end_off = pack_pos_to_offset(reuse_packfile->p, + pos - reuse_packfile->bitmap_pos); - written = (pos * BITS_IN_EWORD); - to_write = pack_pos_to_offset(reuse_packfile->p, written) - - sizeof(struct pack_header); + written += pos - reuse_packfile->bitmap_pos; /* We're recording one chunk, not one object. */ - record_reused_object(sizeof(struct pack_header), 0); + record_reused_object(pack_start_off, + pack_start_off - (hashfile_total(out) - pack_start)); hashflush(out); copy_pack_data(out, reuse_packfile->p, w_curs, - sizeof(struct pack_header), to_write); + pack_start_off, pack_end_off - pack_start_off); display_progress(progress_state, written); } - return pos; + if (pos % BITS_IN_EWORD) + BUG("attempted to jump past a word boundary to %"PRIuMAX, + (uintmax_t)pos); + return pos / BITS_IN_EWORD; } static void write_reused_pack(struct bitmapped_pack *reuse_packfile, -- cgit v1.2.3 From b96289a10b6577c8fb60b425dbd009f6966622f8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:20 -0500 Subject: pack-objects: include number of packs reused in output In addition to including the number of objects reused verbatim from a reuse-pack, include the number of packs from which objects were reused. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 31053128fc..7eb035eb7d 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -223,6 +223,7 @@ static struct progress *progress_state; static struct bitmapped_pack *reuse_packfiles; static size_t reuse_packfiles_nr; +static size_t reuse_packfiles_used_nr; static uint32_t reuse_packfile_objects; static struct bitmap *reuse_packfile_bitmap; @@ -1265,6 +1266,8 @@ static void write_pack_file(void) for (j = 0; j < reuse_packfiles_nr; j++) { reused_chunks_nr = 0; write_reused_pack(&reuse_packfiles[j], f); + if (reused_chunks_nr) + reuse_packfiles_used_nr++; } offset = hashfile_total(f); } @@ -4587,9 +4590,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) fprintf_ln(stderr, _("Total %"PRIu32" (delta %"PRIu32")," " reused %"PRIu32" (delta %"PRIu32")," - " pack-reused %"PRIu32), + " pack-reused %"PRIu32" (from %"PRIuMAX")"), written, written_delta, reused, reused_delta, - reuse_packfile_objects); + reuse_packfile_objects, + (uintmax_t)reuse_packfiles_used_nr); cleanup: clear_packing_data(&to_pack); -- cgit v1.2.3 From 54393e4e68f2b4cf1ebaab8a934e83c9384595af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:36 -0500 Subject: pack-objects: add tracing for various packfile metrics As part of the multi-pack reuse effort, we will want to add some tests that assert that we reused a certain number of objects from a certain number of packs. We could do this by grepping through the stderr output of `pack-objects`, but doing so would be brittle in case the output format changed. Instead, let's use the trace2 mechanism to log various pieces of information about the generated packfile, which we can then use to compare against desired values. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7eb035eb7d..7aae9f104b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4595,6 +4595,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) reuse_packfile_objects, (uintmax_t)reuse_packfiles_used_nr); + trace2_data_intmax("pack-objects", the_repository, "written", written); + trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta); + trace2_data_intmax("pack-objects", the_repository, "reused", reused); + trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta); + trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects); + trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr); + cleanup: clear_packing_data(&to_pack); list_objects_filter_release(&filter_options); -- cgit v1.2.3 From 941074134cefe49fd7dc894665f1eb9804e06cf8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:42 -0500 Subject: pack-objects: allow setting `pack.allowPackReuse` to "single" In e704fc7978 (pack-objects: introduce pack.allowPackReuse, 2019-12-18), the `pack.allowPackReuse` configuration option was introduced, allowing users to disable the pack reuse mechanism. To prepare for debugging multi-pack reuse, allow setting configuration to "single" in addition to the usual bool-or-int values. "single" implies the same behavior as "true", "1", "yes", and so on. But it will complement a new "multi" value (to be introduced in a future commit). When set to "single", we will only perform pack reuse on a single pack, regardless of whether or not there are multiple MIDX'd packs. This requires no code changes (yet), since we only support single pack reuse. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7aae9f104b..684698f679 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -229,7 +229,10 @@ static struct bitmap *reuse_packfile_bitmap; static int use_bitmap_index_default = 1; static int use_bitmap_index = -1; -static int allow_pack_reuse = 1; +static enum { + NO_PACK_REUSE = 0, + SINGLE_PACK_REUSE, +} allow_pack_reuse = SINGLE_PACK_REUSE; static enum { WRITE_BITMAP_FALSE = 0, WRITE_BITMAP_QUIET, @@ -3244,7 +3247,17 @@ static int git_pack_config(const char *k, const char *v, return 0; } if (!strcmp(k, "pack.allowpackreuse")) { - allow_pack_reuse = git_config_bool(k, v); + int res = git_parse_maybe_bool_text(v); + if (res < 0) { + if (!strcasecmp(v, "single")) + allow_pack_reuse = SINGLE_PACK_REUSE; + else + die(_("invalid pack.allowPackReuse value: '%s'"), v); + } else if (res) { + allow_pack_reuse = SINGLE_PACK_REUSE; + } else { + allow_pack_reuse = NO_PACK_REUSE; + } return 0; } if (!strcmp(k, "pack.threads")) { @@ -3999,7 +4012,7 @@ static void loosen_unused_packed_objects(void) */ static int pack_options_allow_reuse(void) { - return allow_pack_reuse && + return allow_pack_reuse != NO_PACK_REUSE && pack_to_stdout && !ignore_packed_keep_on_disk && !ignore_packed_keep_in_core && -- cgit v1.2.3 From af626ac0e02570e3afac8b4238199157181d43c2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:44 -0500 Subject: pack-bitmap: enable reuse from all bitmapped packs Now that both the pack-bitmap and pack-objects code are prepared to handle marking and using objects from multiple bitmapped packs for verbatim reuse, allow marking objects from all bitmapped packs as eligible for reuse. Within the `reuse_partial_packfile_from_bitmap()` function, we no longer only mark the pack whose first object is at bit position zero for reuse, and instead mark any pack contained in the MIDX as a reuse candidate. Provide a handful of test cases in a new script (t5332) exercising interesting behavior for multi-pack reuse to ensure that we performed all of the previous steps correctly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 684698f679..5d3c42035b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -232,6 +232,7 @@ static int use_bitmap_index = -1; static enum { NO_PACK_REUSE = 0, SINGLE_PACK_REUSE, + MULTI_PACK_REUSE, } allow_pack_reuse = SINGLE_PACK_REUSE; static enum { WRITE_BITMAP_FALSE = 0, @@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v, if (res < 0) { if (!strcasecmp(v, "single")) allow_pack_reuse = SINGLE_PACK_REUSE; + else if (!strcasecmp(v, "multi")) + allow_pack_reuse = MULTI_PACK_REUSE; else die(_("invalid pack.allowPackReuse value: '%s'"), v); } else if (res) { @@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs) reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfiles, &reuse_packfiles_nr, - &reuse_packfile_bitmap); + &reuse_packfile_bitmap, + allow_pack_reuse == MULTI_PACK_REUSE); if (reuse_packfiles) { reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); -- cgit v1.2.3