Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--midx.c128
-rw-r--r--packfile.c17
-rw-r--r--packfile.h2
3 files changed, 147 insertions, 0 deletions
diff --git a/midx.c b/midx.c
index fcdf6553ce..29f8de5ee6 100644
--- a/midx.c
+++ b/midx.c
@@ -4,6 +4,7 @@
#include "lockfile.h"
#include "packfile.h"
#include "object-store.h"
+#include "packfile.h"
#include "midx.h"
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
@@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
packs->list[packs->nr] = add_packed_git(full_path,
full_path_len,
0);
+
if (!packs->list[packs->nr]) {
warning(_("failed to add packfile '%s'"),
full_path);
return;
}
+ if (open_pack_index(packs->list[packs->nr])) {
+ warning(_("failed to open pack-index '%s'"),
+ full_path);
+ close_pack(packs->list[packs->nr]);
+ FREE_AND_NULL(packs->list[packs->nr]);
+ return;
+ }
+
packs->names[packs->nr] = xstrdup(file_name);
packs->pack_name_concat_len += strlen(file_name) + 1;
packs->nr++;
@@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
free(pairs);
}
+struct pack_midx_entry {
+ struct object_id oid;
+ uint32_t pack_int_id;
+ time_t pack_mtime;
+ uint64_t offset;
+};
+
+static int midx_oid_compare(const void *_a, const void *_b)
+{
+ const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
+ const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
+ int cmp = oidcmp(&a->oid, &b->oid);
+
+ if (cmp)
+ return cmp;
+
+ if (a->pack_mtime > b->pack_mtime)
+ return -1;
+ else if (a->pack_mtime < b->pack_mtime)
+ return 1;
+
+ return a->pack_int_id - b->pack_int_id;
+}
+
+static void fill_pack_entry(uint32_t pack_int_id,
+ struct packed_git *p,
+ uint32_t cur_object,
+ struct pack_midx_entry *entry)
+{
+ if (!nth_packed_object_oid(&entry->oid, p, cur_object))
+ die(_("failed to locate object %d in packfile"), cur_object);
+
+ entry->pack_int_id = pack_int_id;
+ entry->pack_mtime = p->mtime;
+
+ entry->offset = nth_packed_object_offset(p, cur_object);
+}
+
+/*
+ * It is possible to artificially get into a state where there are many
+ * duplicate copies of objects. That can create high memory pressure if
+ * we are to create a list of all objects before de-duplication. To reduce
+ * this memory pressure without a significant performance drop, automatically
+ * group objects by the first byte of their object id. Use the IDX fanout
+ * tables to group the data, copy to a local array, then sort.
+ *
+ * Copy only the de-duplicated entries (selected by most-recent modified time
+ * of a packfile containing the object).
+ */
+static struct pack_midx_entry *get_sorted_entries(struct packed_git **p,
+ uint32_t *perm,
+ uint32_t nr_packs,
+ uint32_t *nr_objects)
+{
+ uint32_t cur_fanout, cur_pack, cur_object;
+ uint32_t alloc_fanout, alloc_objects, total_objects = 0;
+ struct pack_midx_entry *entries_by_fanout = NULL;
+ struct pack_midx_entry *deduplicated_entries = NULL;
+
+ for (cur_pack = 0; cur_pack < nr_packs; cur_pack++)
+ total_objects += p[cur_pack]->num_objects;
+
+ /*
+ * As we de-duplicate by fanout value, we expect the fanout
+ * slices to be evenly distributed, with some noise. Hence,
+ * allocate slightly more than one 256th.
+ */
+ alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
+
+ ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
+ ALLOC_ARRAY(deduplicated_entries, alloc_objects);
+ *nr_objects = 0;
+
+ for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
+ uint32_t nr_fanout = 0;
+
+ for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) {
+ uint32_t start = 0, end;
+
+ if (cur_fanout)
+ start = get_pack_fanout(p[cur_pack], cur_fanout - 1);
+ end = get_pack_fanout(p[cur_pack], cur_fanout);
+
+ for (cur_object = start; cur_object < end; cur_object++) {
+ ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
+ fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]);
+ nr_fanout++;
+ }
+ }
+
+ QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
+
+ /*
+ * The batch is now sorted by OID and then mtime (descending).
+ * Take only the first duplicate.
+ */
+ for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
+ if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid,
+ &entries_by_fanout[cur_object].oid))
+ continue;
+
+ ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
+ memcpy(&deduplicated_entries[*nr_objects],
+ &entries_by_fanout[cur_object],
+ sizeof(struct pack_midx_entry));
+ (*nr_objects)++;
+ }
+ }
+
+ free(entries_by_fanout);
+ return deduplicated_entries;
+}
+
static size_t write_midx_pack_names(struct hashfile *f,
char **pack_names,
uint32_t num_packs)
@@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir)
uint64_t written = 0;
uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
+ uint32_t nr_entries;
+ struct pack_midx_entry *entries = NULL;
midx_name = get_midx_filename(object_dir);
if (safe_create_leading_directories(midx_name)) {
@@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir)
ALLOC_ARRAY(pack_perm, packs.nr);
sort_packs_by_name(packs.names, packs.nr, pack_perm);
+ entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries);
+
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
FREE_AND_NULL(midx_name);
@@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir)
free(packs.list);
free(packs.names);
+ free(entries);
return 0;
}
diff --git a/packfile.c b/packfile.c
index ee1ab9b804..3d652212c6 100644
--- a/packfile.c
+++ b/packfile.c
@@ -196,6 +196,23 @@ int open_pack_index(struct packed_git *p)
return ret;
}
+uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
+{
+ const uint32_t *level1_ofs = p->index_data;
+
+ if (!level1_ofs) {
+ if (open_pack_index(p))
+ return 0;
+ level1_ofs = p->index_data;
+ }
+
+ if (p->index_version > 1) {
+ level1_ofs += 2;
+ }
+
+ return ntohl(level1_ofs[value]);
+}
+
static struct packed_git *alloc_packed_git(int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
diff --git a/packfile.h b/packfile.h
index d2ad30300a..b0eed44c0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -69,6 +69,8 @@ extern int open_pack_index(struct packed_git *);
*/
extern void close_pack_index(struct packed_git *);
+extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
+
extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
extern void close_pack_windows(struct packed_git *);
extern void close_pack(struct packed_git *);