Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2016-10-11 00:03:46 +0300
committerJunio C Hamano <gitster@pobox.com>2016-10-11 00:03:47 +0300
commite6e24c94df9df6d39f2316113c14fe07d2ab03d7 (patch)
tree1f883be2ac3f3a84ce5af3cd1e9e32c45648e21e /builtin
parentb8688adb12d086b161aa7c369126bdd56843a01b (diff)
parentc9af708b1a5a92cbde2a0c9a75b13530f792ac84 (diff)
Merge branch 'jk/pack-objects-optim-mru'
"git pack-objects" in a repository with many packfiles used to spend a lot of time looking for/at objects in them; the accesses to the packfiles are now optimized by checking the most-recently-used packfile first. * jk/pack-objects-optim-mru: pack-objects: use mru list when iterating over packs pack-objects: break delta cycles before delta-search phase sha1_file: make packed_object_info public provide an initializer for "struct object_info"
Diffstat (limited to 'builtin')
-rw-r--r--builtin/cat-file.c5
-rw-r--r--builtin/pack-objects.c92
2 files changed, 92 insertions, 5 deletions
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index cca97a86c0..30383e9eb4 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -53,7 +53,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
char *buf;
unsigned long size;
struct object_context obj_context;
- struct object_info oi = {NULL};
+ struct object_info oi = OBJECT_INFO_INIT;
struct strbuf sb = STRBUF_INIT;
unsigned flags = LOOKUP_REPLACE_OBJECT;
const char *path = force_path;
@@ -449,8 +449,7 @@ static int batch_objects(struct batch_options *opt)
data.split_on_whitespace = 1;
if (opt->all_objects) {
- struct object_info empty;
- memset(&empty, 0, sizeof(empty));
+ struct object_info empty = OBJECT_INFO_INIT;
if (!memcmp(&data.info, &empty, sizeof(empty)))
data.skip_object_info = 1;
}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 8aeba6a6e1..1e7c2a98a5 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -23,6 +23,7 @@
#include "reachable.h"
#include "sha1-array.h"
#include "argv-array.h"
+#include "mru.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
@@ -994,7 +995,7 @@ static int want_object_in_pack(const unsigned char *sha1,
struct packed_git **found_pack,
off_t *found_offset)
{
- struct packed_git *p;
+ struct mru_entry *entry;
int want;
if (!exclude && local && has_loose_object_nonlocal(sha1))
@@ -1011,7 +1012,8 @@ static int want_object_in_pack(const unsigned char *sha1,
return want;
}
- for (p = packed_git; p; p = p->next) {
+ for (entry = packed_git_mru->head; entry; entry = entry->next) {
+ struct packed_git *p = entry->item;
off_t offset;
if (p == *found_pack)
@@ -1027,6 +1029,8 @@ static int want_object_in_pack(const unsigned char *sha1,
*found_pack = p;
}
want = want_found_object(exclude, p);
+ if (!exclude && want > 0)
+ mru_mark(packed_git_mru, entry);
if (want != -1)
return want;
}
@@ -1527,6 +1531,83 @@ static int pack_offset_sort(const void *_a, const void *_b)
(a->in_pack_offset > b->in_pack_offset);
}
+/*
+ * Drop an on-disk delta we were planning to reuse. Naively, this would
+ * just involve blanking out the "delta" field, but we have to deal
+ * with some extra book-keeping:
+ *
+ * 1. Removing ourselves from the delta_sibling linked list.
+ *
+ * 2. Updating our size/type to the non-delta representation. These were
+ * either not recorded initially (size) or overwritten with the delta type
+ * (type) when check_object() decided to reuse the delta.
+ */
+static void drop_reused_delta(struct object_entry *entry)
+{
+ struct object_entry **p = &entry->delta->delta_child;
+ struct object_info oi = OBJECT_INFO_INIT;
+
+ while (*p) {
+ if (*p == entry)
+ *p = (*p)->delta_sibling;
+ else
+ p = &(*p)->delta_sibling;
+ }
+ entry->delta = NULL;
+
+ oi.sizep = &entry->size;
+ oi.typep = &entry->type;
+ if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
+ /*
+ * We failed to get the info from this pack for some reason;
+ * fall back to sha1_object_info, which may find another copy.
+ * And if that fails, the error will be recorded in entry->type
+ * and dealt with in prepare_pack().
+ */
+ entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
+ }
+}
+
+/*
+ * Follow the chain of deltas from this entry onward, throwing away any links
+ * that cause us to hit a cycle (as determined by the DFS state flags in
+ * the entries).
+ */
+static void break_delta_chains(struct object_entry *entry)
+{
+ /* If it's not a delta, it can't be part of a cycle. */
+ if (!entry->delta) {
+ entry->dfs_state = DFS_DONE;
+ return;
+ }
+
+ switch (entry->dfs_state) {
+ case DFS_NONE:
+ /*
+ * This is the first time we've seen the object. We mark it as
+ * part of the active potential cycle and recurse.
+ */
+ entry->dfs_state = DFS_ACTIVE;
+ break_delta_chains(entry->delta);
+ entry->dfs_state = DFS_DONE;
+ break;
+
+ case DFS_DONE:
+ /* object already examined, and not part of a cycle */
+ break;
+
+ case DFS_ACTIVE:
+ /*
+ * We found a cycle that needs broken. It would be correct to
+ * break any link in the chain, but it's convenient to
+ * break this one.
+ */
+ drop_reused_delta(entry);
+ entry->dfs_state = DFS_DONE;
+ break;
+ }
+}
+
static void get_object_details(void)
{
uint32_t i;
@@ -1544,6 +1625,13 @@ static void get_object_details(void)
entry->no_try_delta = 1;
}
+ /*
+ * This must happen in a second pass, since we rely on the delta
+ * information for the whole list being completed.
+ */
+ for (i = 0; i < to_pack.nr_objects; i++)
+ break_delta_chains(&to_pack.objects[i]);
+
free(sorted_by_offset);
}