Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHan Xin <hanxin.hx@alibaba-inc.com>2022-06-11 05:44:16 +0300
committerJunio C Hamano <gitster@pobox.com>2022-06-13 20:22:35 +0300
commita1bf5ca29f7ebb41e24318147518fc7c738303e5 (patch)
tree12bd174f216456e38cb12cd4624b59b3bd0335b9 /builtin/unpack-objects.c
parentab336e8f1c8009c8b1aab8deb592148e69217085 (diff)
unpack-objects: low memory footprint for get_data() in dry_run mode
As the name implies, "get_data(size)" will allocate and return a given amount of memory. Allocating memory for a large blob object may cause the system to run out of memory. Before preparing to replace calling of "get_data()" to unpack large blob objects in latter commits, refactor "get_data()" to reduce memory footprint for dry_run mode. Because in dry_run mode, "get_data()" is only used to check the integrity of data, and the returned buffer is not used at all, we can allocate a smaller buffer and use it as zstream output. Make the function return NULL in the dry-run mode, as no callers use the returned buffer. The "find [...]objects/?? -type f | wc -l" test idiom being used here is adapted from the same "find" use added to another test in d9545c7f465 (fast-import: implement unpack limit, 2016-04-25). Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com> Signed-off-by: Han Xin <chiyutianyi@gmail.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/unpack-objects.c')
-rw-r--r--builtin/unpack-objects.c37
1 files changed, 26 insertions, 11 deletions
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 56d05e2725..32e8b47059 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -97,15 +97,27 @@ static void use(int bytes)
display_throughput(progress, consumed_bytes);
}
+/*
+ * Decompress zstream from the standard input into a newly
+ * allocated buffer of specified size and return the buffer.
+ * The caller is responsible to free the returned buffer.
+ *
+ * But for dry_run mode, "get_data()" is only used to check the
+ * integrity of data, and the returned buffer is not used at all.
+ * Therefore, in dry_run mode, "get_data()" will release the small
+ * allocated buffer which is reused to hold temporary zstream output
+ * and return NULL instead of returning garbage data.
+ */
static void *get_data(unsigned long size)
{
git_zstream stream;
- void *buf = xmallocz(size);
+ unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
+ void *buf = xmallocz(bufsize);
memset(&stream, 0, sizeof(stream));
stream.next_out = buf;
- stream.avail_out = size;
+ stream.avail_out = bufsize;
stream.next_in = fill(1);
stream.avail_in = len;
git_inflate_init(&stream);
@@ -125,8 +137,17 @@ static void *get_data(unsigned long size)
}
stream.next_in = fill(1);
stream.avail_in = len;
+ if (dry_run) {
+ /* reuse the buffer in dry_run mode */
+ stream.next_out = buf;
+ stream.avail_out = bufsize > size - stream.total_out ?
+ size - stream.total_out :
+ bufsize;
+ }
}
git_inflate_end(&stream);
+ if (dry_run)
+ FREE_AND_NULL(buf);
return buf;
}
@@ -326,10 +347,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
{
void *buf = get_data(size);
- if (!dry_run && buf)
+ if (buf)
write_object(nr, type, buf, size);
- else
- free(buf);
}
static int resolve_against_held(unsigned nr, const struct object_id *base,
@@ -359,10 +378,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
oidread(&base_oid, fill(the_hash_algo->rawsz));
use(the_hash_algo->rawsz);
delta_data = get_data(delta_size);
- if (dry_run || !delta_data) {
- free(delta_data);
+ if (!delta_data)
return;
- }
if (has_object_file(&base_oid))
; /* Ok we have this one */
else if (resolve_against_held(nr, &base_oid,
@@ -398,10 +415,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
die("offset value out of bound for delta base object");
delta_data = get_data(delta_size);
- if (dry_run || !delta_data) {
- free(delta_data);
+ if (!delta_data)
return;
- }
lo = 0;
hi = nr;
while (lo < hi) {