Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2021-02-23 05:25:20 +0300
committerJunio C Hamano <gitster@pobox.com>2021-02-23 10:30:52 +0300
commit6325da14af2b76585a72cf639fa08e4cb1a370c8 (patch)
tree604d80863a05933137da93fd2cb8919eca6016ec /builtin/pack-objects.c
parentfbf20aeeef062e8cbd7db43788cfd3a69764071f (diff)
builtin/pack-objects.c: rewrite honor-pack-keep logic
Now that we have find_kept_pack_entry(), we don't have to manually keep hunting through every pack to find a possible "kept" duplicate of the object. This should be faster, assuming only a portion of your total packs are actually kept. Note that we have to re-order the logic a bit here; we can deal with the disqualifying situations first (e.g., finding the object in a non-local pack with --local), then "kept" situation(s), and then just fall back to other "--local" conditions. Here are the results from p5303 (measurements again taken on the kernel): Test HEAD^ HEAD ----------------------------------------------------------------------------------------------- 5303.5: repack (1) 57.26(54.59+10.84) 57.34(54.66+10.88) +0.1% 5303.6: repack with kept (1) 57.33(54.80+10.51) 57.38(54.83+10.49) +0.1% 5303.11: repack (50) 71.54(88.57+4.84) 71.70(88.99+4.74) +0.2% 5303.12: repack with kept (50) 85.12(102.05+4.94) 72.58(89.61+4.78) -14.7% 5303.17: repack (1000) 216.87(490.79+14.57) 217.19(491.72+14.25) +0.1% 5303.18: repack with kept (1000) 665.63(938.87+15.76) 246.12(520.07+14.93) -63.0% and the --stdin-packs timings: 5303.7: repack with --stdin-packs (1) 0.01(0.01+0.00) 0.00(0.00+0.00) -100.0% 5303.13: repack with --stdin-packs (50) 3.53(12.07+0.24) 3.43(11.75+0.24) -2.8% 5303.19: repack with --stdin-packs (1000) 195.83(371.82+8.10) 130.50(307.15+7.66) -33.4% So our repack with an empty .keep pack is roughly as fast as one without a .keep pack up to 50 packs. But the --stdin-packs case scales a little better, too. Notably, it is faster than a repack of the same size and a kept pack. It looks at fewer objects, of course, but the penalty for looking at many packs isn't as costly. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/pack-objects.c')
-rw-r--r--builtin/pack-objects.c127
1 files changed, 76 insertions, 51 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6ee8e40665..8cb32763b7 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1188,7 +1188,8 @@ static int have_duplicate_entry(const struct object_id *oid,
return 1;
}
-static int want_found_object(int exclude, struct packed_git *p)
+static int want_found_object(const struct object_id *oid, int exclude,
+ struct packed_git *p)
{
if (exclude)
return 1;
@@ -1204,27 +1205,82 @@ static int want_found_object(int exclude, struct packed_git *p)
* make sure no copy of this object appears in _any_ pack that makes us
* to omit the object, so we need to check all the packs.
*
- * We can however first check whether these options can possible matter;
+ * We can however first check whether these options can possibly matter;
* if they do not matter we know we want the object in generated pack.
* Otherwise, we signal "-1" at the end to tell the caller that we do
* not know either way, and it needs to check more packs.
*/
- if (!ignore_packed_keep_on_disk &&
- !ignore_packed_keep_in_core &&
- (!local || !have_non_local_packs))
- return 1;
+ /*
+ * Objects in packs borrowed from elsewhere are discarded regardless of
+ * if they appear in other packs that weren't borrowed.
+ */
if (local && !p->pack_local)
return 0;
- if (p->pack_local &&
- ((ignore_packed_keep_on_disk && p->pack_keep) ||
- (ignore_packed_keep_in_core && p->pack_keep_in_core)))
- return 0;
+
+ /*
+ * Then handle .keep first, as we have a fast(er) path there.
+ */
+ if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
+ /*
+ * Set the flags for the kept-pack cache to be the ones we want
+ * to ignore.
+ *
+ * That is, if we are ignoring objects in on-disk keep packs,
+ * then we want to search through the on-disk keep and ignore
+ * the in-core ones.
+ */
+ unsigned flags = 0;
+ if (ignore_packed_keep_on_disk)
+ flags |= ON_DISK_KEEP_PACKS;
+ if (ignore_packed_keep_in_core)
+ flags |= IN_CORE_KEEP_PACKS;
+
+ if (ignore_packed_keep_on_disk && p->pack_keep)
+ return 0;
+ if (ignore_packed_keep_in_core && p->pack_keep_in_core)
+ return 0;
+ if (has_object_kept_pack(oid, flags))
+ return 0;
+ }
+
+ /*
+ * At this point we know definitively that either we don't care about
+ * keep-packs, or the object is not in one. Keep checking other
+ * conditions...
+ */
+ if (!local || !have_non_local_packs)
+ return 1;
/* we don't know yet; keep looking for more packs */
return -1;
}
+static int want_object_in_pack_one(struct packed_git *p,
+ const struct object_id *oid,
+ int exclude,
+ struct packed_git **found_pack,
+ off_t *found_offset)
+{
+ off_t offset;
+
+ if (p == *found_pack)
+ offset = *found_offset;
+ else
+ offset = find_pack_entry_one(oid->hash, p);
+
+ if (offset) {
+ if (!*found_pack) {
+ if (!is_pack_valid(p))
+ return -1;
+ *found_offset = offset;
+ *found_pack = p;
+ }
+ return want_found_object(oid, exclude, p);
+ }
+ return -1;
+}
+
/*
* Check whether we want the object in the pack (e.g., we do not want
* objects found in non-local stores if the "--local" option was used).
@@ -1252,7 +1308,7 @@ static int want_object_in_pack(const struct object_id *oid,
* are present we will determine the answer right now.
*/
if (*found_pack) {
- want = want_found_object(exclude, *found_pack);
+ want = want_found_object(oid, exclude, *found_pack);
if (want != -1)
return want;
}
@@ -1260,51 +1316,20 @@ static int want_object_in_pack(const struct object_id *oid,
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
struct pack_entry e;
if (fill_midx_entry(the_repository, oid, &e, m)) {
- struct packed_git *p = e.p;
- off_t offset;
-
- if (p == *found_pack)
- offset = *found_offset;
- else
- offset = find_pack_entry_one(oid->hash, p);
-
- if (offset) {
- if (!*found_pack) {
- if (!is_pack_valid(p))
- continue;
- *found_offset = offset;
- *found_pack = p;
- }
- want = want_found_object(exclude, p);
- if (want != -1)
- return want;
- }
+ want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
+ if (want != -1)
+ return want;
}
}
list_for_each(pos, get_packed_git_mru(the_repository)) {
struct packed_git *p = list_entry(pos, struct packed_git, mru);
- off_t offset;
-
- if (p == *found_pack)
- offset = *found_offset;
- else
- offset = find_pack_entry_one(oid->hash, p);
-
- if (offset) {
- if (!*found_pack) {
- if (!is_pack_valid(p))
- continue;
- *found_offset = offset;
- *found_pack = p;
- }
- want = want_found_object(exclude, p);
- if (!exclude && want > 0)
- list_move(&p->mru,
- get_packed_git_mru(the_repository));
- if (want != -1)
- return want;
- }
+ want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
+ if (!exclude && want > 0)
+ list_move(&p->mru,
+ get_packed_git_mru(the_repository));
+ if (want != -1)
+ return want;
}
if (uri_protocols.nr) {