From 0660626caff6ac3745cd7b7908a2ca540141a2ec Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 5 May 2006 02:41:53 -0700 Subject: binary diff: further updates. This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano --- diff.c | 134 ++++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 71 insertions(+), 63 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index b14d897f19..bfe54c3e09 100644 --- a/diff.c +++ b/diff.c @@ -392,78 +392,78 @@ static void show_stats(struct diffstat_t* data) total_files, adds, dels); } -static void *encode_delta_size(void *data, unsigned long size) +static unsigned char *deflate_it(char *data, + unsigned long size, + unsigned long *result_size) { - unsigned char *cp = data; - *cp++ = size; - size >>= 7; - while (size) { - cp[-1] |= 0x80; - *cp++ = size; - size >>= 7; - } - return cp; + int bound; + unsigned char *deflated; + z_stream stream; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, Z_BEST_COMPRESSION); + bound = deflateBound(&stream, size); + deflated = xmalloc(bound); + stream.next_out = deflated; + stream.avail_out = bound; + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + while (deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + deflateEnd(&stream); + *result_size = stream.total_out; + return deflated; } -static void *safe_diff_delta(const unsigned char *src, unsigned long src_size, - const unsigned char *dst, unsigned long dst_size, - unsigned long *delta_size) +static void emit_binary_diff(mmfile_t *one, mmfile_t *two) { - unsigned long bufsize; - unsigned char *data; - unsigned char *cp; - - if (src_size && dst_size) - return diff_delta(src, src_size, dst, dst_size, delta_size, 0); + void *cp; + void *delta; + void *deflated; + void *data; + unsigned long orig_size; + unsigned long delta_size; + unsigned long deflate_size; + unsigned long data_size; - /* diff-delta does not like to do delta with empty, so - * we do that by hand here. Sigh... + printf("GIT binary patch\n"); + /* We could do deflated delta, or we could do just deflated two, + * whichever is smaller. */ - - if (!src_size) - /* literal copy can be done only 127-byte at a time. - */ - bufsize = dst_size + (dst_size / 127) + 40; - else - bufsize = 40; - data = xmalloc(bufsize); - cp = encode_delta_size(data, src_size); - cp = encode_delta_size(cp, dst_size); - - if (dst_size) { - /* copy out literally */ - while (dst_size) { - int sz = (127 < dst_size) ? 127 : dst_size; - *cp++ = sz; - dst_size -= sz; - while (sz) { - *cp++ = *dst++; - sz--; - } + delta = NULL; + deflated = deflate_it(two->ptr, two->size, &deflate_size); + if (one->size && two->size) { + delta = diff_delta(one->ptr, one->size, + two->ptr, two->size, + &delta_size, deflate_size); + if (delta) { + void *to_free = delta; + orig_size = delta_size; + delta = deflate_it(delta, delta_size, &delta_size); + free(to_free); } } - *delta_size = (cp - data); - return data; -} -static void emit_binary_diff(mmfile_t *one, mmfile_t *two) -{ - void *delta, *cp; - unsigned long delta_size; + if (delta && delta_size < deflate_size) { + printf("delta %lu\n", orig_size); + free(deflated); + data = delta; + data_size = delta_size; + } + else { + printf("literal %lu\n", two->size); + free(delta); + data = deflated; + data_size = deflate_size; + } - printf("GIT binary patch\n"); - delta = safe_diff_delta(one->ptr, one->size, - two->ptr, two->size, - &delta_size); - if (!delta) - die("unable to generate binary diff"); - - /* emit delta encoded in base85 */ - cp = delta; - while (delta_size) { - int bytes = (52 < delta_size) ? 52 : delta_size; + /* emit data encoded in base85 */ + cp = data; + while (data_size) { + int bytes = (52 < data_size) ? 52 : data_size; char line[70]; - delta_size -= bytes; + data_size -= bytes; if (bytes <= 26) line[0] = bytes + 'A' - 1; else @@ -473,7 +473,7 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two) puts(line); } printf("\n"); - free(delta); + free(data); } #define FIRST_FEW_BYTES 8000 @@ -538,7 +538,11 @@ static void builtin_diff(const char *name_a, die("unable to read files to diff"); if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) { - if (o->full_index) + /* Quite common confusing case */ + if (mf1.size == mf2.size && + !memcmp(mf1.ptr, mf2.ptr, mf1.size)) + goto free_ab_and_return; + if (o->binary) emit_binary_diff(&mf1, &mf2); else printf("Binary files %s and %s differ\n", @@ -1239,6 +1243,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->rename_limit = strtoul(arg+2, NULL, 10); else if (!strcmp(arg, "--full-index")) options->full_index = 1; + else if (!strcmp(arg, "--binary")) { + options->output_format = DIFF_FORMAT_PATCH; + options->full_index = options->binary = 1; + } else if (!strcmp(arg, "--name-only")) options->output_format = DIFF_FORMAT_NAME; else if (!strcmp(arg, "--name-status")) -- cgit v1.2.3