From f41aebd46998c8b6df3733196d55aeb7162b3df6 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 13 Jul 2008 22:07:44 -0400 Subject: index-pack: Refactor base arguments of resolve_delta into a struct We need to discard base objects which are not recently used if our memory gets low, such as when we are unpacking a long delta chain of a very large object. To support tracking the available base objects we combine the pointer and size into a struct. Future changes would allow the data pointer to be free'd and marked NULL if memory gets low. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- index-pack.c | 60 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index 5ac91baf98..8162265a63 100644 --- a/index-pack.c +++ b/index-pack.c @@ -26,6 +26,11 @@ union delta_base { off_t offset; }; +struct base_data { + void *data; + unsigned long size; +}; + /* * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want * to memcmp() only the first 20 bytes. @@ -425,25 +430,25 @@ static void sha1_object(const void *data, unsigned long size, } } -static void resolve_delta(struct object_entry *delta_obj, void *base_data, - unsigned long base_size, enum object_type type) +static void resolve_delta(struct object_entry *delta_obj, + struct base_data *base_obj, enum object_type type) { void *delta_data; unsigned long delta_size; - void *result; - unsigned long result_size; union delta_base delta_base; int j, first, last; + struct base_data result; delta_obj->real_type = type; delta_data = get_data_from_pack(delta_obj); delta_size = delta_obj->size; - result = patch_delta(base_data, base_size, delta_data, delta_size, - &result_size); + result.data = patch_delta(base_obj->data, base_obj->size, + delta_data, delta_size, + &result.size); free(delta_data); - if (!result) + if (!result.data) bad_object(delta_obj->idx.offset, "failed to apply delta"); - sha1_object(result, result_size, type, delta_obj->idx.sha1); + sha1_object(result.data, result.size, type, delta_obj->idx.sha1); nr_resolved_deltas++; hashcpy(delta_base.sha1, delta_obj->idx.sha1); @@ -451,7 +456,7 @@ static void resolve_delta(struct object_entry *delta_obj, void *base_data, for (j = first; j <= last; j++) { struct object_entry *child = objects + deltas[j].obj_no; if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, result, result_size, type); + resolve_delta(child, &result, type); } } @@ -461,11 +466,11 @@ static void resolve_delta(struct object_entry *delta_obj, void *base_data, for (j = first; j <= last; j++) { struct object_entry *child = objects + deltas[j].obj_no; if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, result, result_size, type); + resolve_delta(child, &result, type); } } - free(result); + free(result.data); } static int compare_delta_entry(const void *a, const void *b) @@ -480,7 +485,6 @@ static void parse_pack_objects(unsigned char *sha1) { int i; struct delta_entry *delta = deltas; - void *data; struct stat st; /* @@ -495,7 +499,7 @@ static void parse_pack_objects(unsigned char *sha1) nr_objects); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - data = unpack_raw_entry(obj, &delta->base); + void *data = unpack_raw_entry(obj, &delta->base); obj->real_type = obj->type; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { nr_deltas++; @@ -544,6 +548,7 @@ static void parse_pack_objects(unsigned char *sha1) struct object_entry *obj = &objects[i]; union delta_base base; int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; + struct base_data base_obj; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; @@ -554,22 +559,22 @@ static void parse_pack_objects(unsigned char *sha1) ofs = !find_delta_children(&base, &ofs_first, &ofs_last); if (!ref && !ofs) continue; - data = get_data_from_pack(obj); + base_obj.data = get_data_from_pack(obj); + base_obj.size = obj->size; + if (ref) for (j = ref_first; j <= ref_last; j++) { struct object_entry *child = objects + deltas[j].obj_no; if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, data, - obj->size, obj->type); + resolve_delta(child, &base_obj, obj->type); } if (ofs) for (j = ofs_first; j <= ofs_last; j++) { struct object_entry *child = objects + deltas[j].obj_no; if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, data, - obj->size, obj->type); + resolve_delta(child, &base_obj, obj->type); } - free(data); + free(base_obj.data); display_progress(progress, nr_resolved_deltas); } } @@ -655,28 +660,29 @@ static void fix_unresolved_deltas(int nr_unresolved) for (i = 0; i < n; i++) { struct delta_entry *d = sorted_by_pos[i]; - void *data; - unsigned long size; enum object_type type; int j, first, last; + struct base_data base_obj; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; - data = read_sha1_file(d->base.sha1, &type, &size); - if (!data) + base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size); + if (!base_obj.data) continue; find_delta_children(&d->base, &first, &last); for (j = first; j <= last; j++) { struct object_entry *child = objects + deltas[j].obj_no; if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, data, size, type); + resolve_delta(child, &base_obj, type); } - if (check_sha1_signature(d->base.sha1, data, size, typename(type))) + if (check_sha1_signature(d->base.sha1, base_obj.data, + base_obj.size, typename(type))) die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); - append_obj_to_pack(d->base.sha1, data, size, type); - free(data); + append_obj_to_pack(d->base.sha1, base_obj.data, + base_obj.size, type); + free(base_obj.data); display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); -- cgit v1.2.3 From 4a438cabacdbfa71f59dad127d436bbb49a86f35 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 13 Jul 2008 22:07:45 -0400 Subject: index-pack: Chain the struct base_data on the stack for traversal We need to release earlier inflated base objects when memory gets low, which means we need to be able to walk up or down the stack to locate the objects we want to release, and free their data. The new link/unlink routines allow inserting and removing the struct base_data during recursion inside resolve_delta, and the global base_cache gives us the head of the chain (bottom of the stack) so we can traverse it. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- index-pack.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index 8162265a63..85e20cd04b 100644 --- a/index-pack.c +++ b/index-pack.c @@ -27,6 +27,8 @@ union delta_base { }; struct base_data { + struct base_data *base; + struct base_data *child; void *data; unsigned long size; }; @@ -48,6 +50,7 @@ struct delta_entry static struct object_entry *objects; static struct delta_entry *deltas; +static struct base_data *base_cache; static int nr_objects; static int nr_deltas; static int nr_resolved_deltas; @@ -215,6 +218,27 @@ static void bad_object(unsigned long offset, const char *format, ...) die("pack has bad object at offset %lu: %s", offset, buf); } +static void link_base_data(struct base_data *base, struct base_data *c) +{ + if (base) + base->child = c; + else + base_cache = c; + + c->base = base; + c->child = NULL; +} + +static void unlink_base_data(struct base_data *c) +{ + struct base_data *base = c->base; + if (base) + base->child = NULL; + else + base_cache = NULL; + free(c->data); +} + static void *unpack_entry_data(unsigned long offset, unsigned long size) { z_stream stream; @@ -451,6 +475,8 @@ static void resolve_delta(struct object_entry *delta_obj, sha1_object(result.data, result.size, type, delta_obj->idx.sha1); nr_resolved_deltas++; + link_base_data(base_obj, &result); + hashcpy(delta_base.sha1, delta_obj->idx.sha1); if (!find_delta_children(&delta_base, &first, &last)) { for (j = first; j <= last; j++) { @@ -470,7 +496,7 @@ static void resolve_delta(struct object_entry *delta_obj, } } - free(result.data); + unlink_base_data(&result); } static int compare_delta_entry(const void *a, const void *b) @@ -561,6 +587,7 @@ static void parse_pack_objects(unsigned char *sha1) continue; base_obj.data = get_data_from_pack(obj); base_obj.size = obj->size; + link_base_data(NULL, &base_obj); if (ref) for (j = ref_first; j <= ref_last; j++) { @@ -574,7 +601,7 @@ static void parse_pack_objects(unsigned char *sha1) if (child->real_type == OBJ_OFS_DELTA) resolve_delta(child, &base_obj, obj->type); } - free(base_obj.data); + unlink_base_data(&base_obj); display_progress(progress, nr_resolved_deltas); } } @@ -669,6 +696,7 @@ static void fix_unresolved_deltas(int nr_unresolved) base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size); if (!base_obj.data) continue; + link_base_data(NULL, &base_obj); find_delta_children(&d->base, &first, &last); for (j = first; j <= last; j++) { @@ -682,7 +710,7 @@ static void fix_unresolved_deltas(int nr_unresolved) die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); append_obj_to_pack(d->base.sha1, base_obj.data, base_obj.size, type); - free(base_obj.data); + unlink_base_data(&base_obj); display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); -- cgit v1.2.3 From 03993e139cafd04c3783902243328442a5b4aa2c Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 13 Jul 2008 22:07:46 -0400 Subject: index-pack: Track the object_entry that creates each base_data If we free the data stored within a base_data we need the struct object_entry to get the data back again for use with another dependent delta. Storing the object_entry* in base_data makes it simple to call get_data_from_pack() to recover the compressed information. This however means that we must add the missing base object to the end of our packfile prior to calling resolve_delta() on each of the dependent deltas. Adding the base first ensures we can read the base back from the pack we are indexing, as if it had been included by the remote side. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- index-pack.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index 85e20cd04b..16cbf304e3 100644 --- a/index-pack.c +++ b/index-pack.c @@ -29,6 +29,7 @@ union delta_base { struct base_data { struct base_data *base; struct base_data *child; + struct object_entry *obj; void *data; unsigned long size; }; @@ -475,6 +476,7 @@ static void resolve_delta(struct object_entry *delta_obj, sha1_object(result.data, result.size, type, delta_obj->idx.sha1); nr_resolved_deltas++; + result.obj = delta_obj; link_base_data(base_obj, &result); hashcpy(delta_base.sha1, delta_obj->idx.sha1); @@ -587,6 +589,7 @@ static void parse_pack_objects(unsigned char *sha1) continue; base_obj.data = get_data_from_pack(obj); base_obj.size = obj->size; + base_obj.obj = obj; link_base_data(NULL, &base_obj); if (ref) @@ -632,7 +635,8 @@ static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_c return size; } -static void append_obj_to_pack(const unsigned char *sha1, void *buf, +static struct object_entry *append_obj_to_pack( + const unsigned char *sha1, void *buf, unsigned long size, enum object_type type) { struct object_entry *obj = &objects[nr_objects++]; @@ -653,6 +657,7 @@ static void append_obj_to_pack(const unsigned char *sha1, void *buf, obj[1].idx.offset = obj[0].idx.offset + n; obj[1].idx.offset += write_compressed(output_fd, buf, size, &obj[0].idx.crc32); hashcpy(obj->idx.sha1, sha1); + return obj; } static int delta_pos_compare(const void *_a, const void *_b) @@ -696,6 +701,12 @@ static void fix_unresolved_deltas(int nr_unresolved) base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size); if (!base_obj.data) continue; + + if (check_sha1_signature(d->base.sha1, base_obj.data, + base_obj.size, typename(type))) + die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); + base_obj.obj = append_obj_to_pack(d->base.sha1, base_obj.data, + base_obj.size, type); link_base_data(NULL, &base_obj); find_delta_children(&d->base, &first, &last); @@ -705,11 +716,6 @@ static void fix_unresolved_deltas(int nr_unresolved) resolve_delta(child, &base_obj, type); } - if (check_sha1_signature(d->base.sha1, base_obj.data, - base_obj.size, typename(type))) - die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); - append_obj_to_pack(d->base.sha1, base_obj.data, - base_obj.size, type); unlink_base_data(&base_obj); display_progress(progress, nr_resolved_deltas); } -- cgit v1.2.3 From 92392b4a4530056918174988200d7c10286a4acd Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Tue, 15 Jul 2008 04:45:34 +0000 Subject: index-pack: Honor core.deltaBaseCacheLimit when resolving deltas If we are trying to resolve deltas for a long delta chain composed of multi-megabyte objects we can easily run into requiring 500M+ of memory to hold each object in the chain on the call stack while we recurse into the dependent objects and resolve them. We now use a simple delta cache that discards objects near the bottom of the call stack first, as they are the most least recently used objects in this current delta chain. If we recurse out of a chain we may find the base object is no longer available, as it was free'd to keep memory under the deltaBaseCacheLimit. In such cases we must unpack the base object again, which will require recursing back to the root of the top of the delta chain as we released that root first. The astute reader will probably realize that we can still exceed the delta base cache limit, but this happens only if the most recent base plus the delta plus the inflated dependent sum up to more than the base cache limit. Due to the way patch_delta is currently implemented we cannot operate in less memory anyway. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- index-pack.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index 16cbf304e3..c359f8c9df 100644 --- a/index-pack.c +++ b/index-pack.c @@ -52,6 +52,7 @@ struct delta_entry static struct object_entry *objects; static struct delta_entry *deltas; static struct base_data *base_cache; +static size_t base_cache_used; static int nr_objects; static int nr_deltas; static int nr_resolved_deltas; @@ -219,6 +220,20 @@ static void bad_object(unsigned long offset, const char *format, ...) die("pack has bad object at offset %lu: %s", offset, buf); } +static void prune_base_data(struct base_data *retain) +{ + struct base_data *b = base_cache; + for (b = base_cache; + base_cache_used > delta_base_cache_limit && b; + b = b->child) { + if (b->data && b != retain) { + free(b->data); + b->data = NULL; + base_cache_used -= b->size; + } + } +} + static void link_base_data(struct base_data *base, struct base_data *c) { if (base) @@ -228,6 +243,8 @@ static void link_base_data(struct base_data *base, struct base_data *c) c->base = base; c->child = NULL; + base_cache_used += c->size; + prune_base_data(c); } static void unlink_base_data(struct base_data *c) @@ -237,7 +254,10 @@ static void unlink_base_data(struct base_data *c) base->child = NULL; else base_cache = NULL; - free(c->data); + if (c->data) { + free(c->data); + base_cache_used -= c->size; + } } static void *unpack_entry_data(unsigned long offset, unsigned long size) @@ -455,6 +475,30 @@ static void sha1_object(const void *data, unsigned long size, } } +static void *get_base_data(struct base_data *c) +{ + if (!c->data) { + struct object_entry *obj = c->obj; + + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + void *base = get_base_data(c->base); + void *raw = get_data_from_pack(obj); + c->data = patch_delta( + base, c->base->size, + raw, obj->size, + &c->size); + free(raw); + if (!c->data) + bad_object(obj->idx.offset, "failed to apply delta"); + } else + c->data = get_data_from_pack(obj); + + base_cache_used += c->size; + prune_base_data(c); + } + return c->data; +} + static void resolve_delta(struct object_entry *delta_obj, struct base_data *base_obj, enum object_type type) { @@ -467,7 +511,7 @@ static void resolve_delta(struct object_entry *delta_obj, delta_obj->real_type = type; delta_data = get_data_from_pack(delta_obj); delta_size = delta_obj->size; - result.data = patch_delta(base_obj->data, base_obj->size, + result.data = patch_delta(get_base_data(base_obj), base_obj->size, delta_data, delta_size, &result.size); free(delta_data); -- cgit v1.2.3