From 051308f6e9cebeb76b8fb4f52b7e9e7ce064445c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 4 May 2006 16:51:44 -0700 Subject: binary patch. This adds "binary patch" to the diff output and teaches apply what to do with them. On the diff generation side, traditionally, we said "Binary files differ\n" without giving anything other than the preimage and postimage object name on the index line. This was good enough for applying a patch generated from your own repository (very useful while rebasing), because the postimage would be available in such a case. However, this was not useful when the recipient of such a patch via e-mail were to apply it, even if the preimage was available. This patch allows the diff to generate "binary" patch when operating under --full-index option. The binary patch follows the usual extended git diff headers, and looks like this: "GIT binary patch\n" "\n" ... "\n" Each line is prefixed with a "length-byte", whose value is upper or lowercase alphabet that encodes number of bytes that the data on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ..., 'Z' means 26, 'a' means 27, ...). is 1 or more groups of 5-byte sequence, each of which encodes up to 4 bytes in base85 encoding. Because 52 / 4 * 5 = 65 and we have the length byte, an output line is capped to 66 characters. The payload is the same diff-delta as we use in the packfiles. On the consumption side, git-apply now can decode and apply the binary patch when --allow-binary-replacement is given, the diff was generated with --full-index, and the receiving repository has the preimage blob, which is the same condition as it always required when accepting an "Binary files differ\n" patch. Signed-off-by: Junio C Hamano --- apply.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 172 insertions(+), 60 deletions(-) (limited to 'apply.c') diff --git a/apply.c b/apply.c index 269210a578..e37c4ebf52 100644 --- a/apply.c +++ b/apply.c @@ -10,6 +10,7 @@ #include "cache.h" #include "quote.h" #include "blob.h" +#include "delta.h" // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch @@ -966,6 +967,70 @@ static inline int metadata_changes(struct patch *patch) patch->old_mode != patch->new_mode); } +static int parse_binary(char *buffer, unsigned long size, struct patch *patch) +{ + /* We have read "GIT binary patch\n"; what follows is a + * sequence of 'length-byte' followed by base-85 encoded + * delta data. + * + * Each 5-byte sequence of base-85 encodes up to 4 bytes, + * and we would limit the patch line to 66 characters, + * so one line can fit up to 13 groups that would decode + * to 52 bytes max. The length byte 'A'-'Z' corresponds + * to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes. + * The end of binary is signalled with an empty line. + */ + int llen, used; + struct fragment *fragment; + char *delta = NULL; + + patch->is_binary = 1; + patch->fragments = fragment = xcalloc(1, sizeof(*fragment)); + used = 0; + while (1) { + int byte_length, max_byte_length, newsize; + llen = linelen(buffer, size); + used += llen; + linenr++; + if (llen == 1) + break; + /* Minimum line is "A00000\n" which is 7-byte long, + * and the line length must be multiple of 5 plus 2. + */ + if ((llen < 7) || (llen-2) % 5) + goto corrupt; + max_byte_length = (llen - 2) / 5 * 4; + byte_length = *buffer; + if ('A' <= byte_length && byte_length <= 'Z') + byte_length = byte_length - 'A' + 1; + else if ('a' <= byte_length && byte_length <= 'z') + byte_length = byte_length - 'a' + 27; + else + goto corrupt; + /* if the input length was not multiple of 4, we would + * have filler at the end but the filler should never + * exceed 3 bytes + */ + if (max_byte_length < byte_length || + byte_length <= max_byte_length - 4) + goto corrupt; + newsize = fragment->size + byte_length; + delta = xrealloc(delta, newsize); + if (decode_85(delta + fragment->size, + buffer + 1, + byte_length)) + goto corrupt; + fragment->size = newsize; + buffer += llen; + size -= llen; + } + fragment->patch = delta; + return used; + corrupt: + return error("corrupt binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); +} + static int parse_chunk(char *buffer, unsigned long size, struct patch *patch) { int hdrsize, patchsize; @@ -982,19 +1047,34 @@ static int parse_chunk(char *buffer, unsigned long size, struct patch *patch) "Files ", NULL, }; + static const char git_binary[] = "GIT binary patch\n"; int i; int hd = hdrsize + offset; unsigned long llen = linelen(buffer + hd, size - hd); - if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) + if (llen == sizeof(git_binary) - 1 && + !memcmp(git_binary, buffer + hd, llen)) { + int used; + linenr++; + used = parse_binary(buffer + hd + llen, + size - hd - llen, patch); + if (used) + patchsize = used + llen; + else + patchsize = 0; + } + else if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) { for (i = 0; binhdr[i]; i++) { int len = strlen(binhdr[i]); if (len < size - hd && !memcmp(binhdr[i], buffer + hd, len)) { + linenr++; patch->is_binary = 1; + patchsize = llen; break; } } + } /* Empty patch cannot be applied if: * - it is a binary patch and we do not do binary_replace, or @@ -1345,76 +1425,108 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) return offset; } -static int apply_fragments(struct buffer_desc *desc, struct patch *patch) +static int apply_binary(struct buffer_desc *desc, struct patch *patch) { - struct fragment *frag = patch->fragments; const char *name = patch->old_name ? patch->old_name : patch->new_name; + unsigned char sha1[20]; + unsigned char hdr[50]; + int hdrlen; - if (patch->is_binary) { - unsigned char sha1[20]; + if (!allow_binary_replacement) + return error("cannot apply binary patch to '%s' " + "without --allow-binary-replacement", + name); - if (!allow_binary_replacement) - return error("cannot apply binary patch to '%s' " - "without --allow-binary-replacement", - name); + /* For safety, we require patch index line to contain + * full 40-byte textual SHA1 for old and new, at least for now. + */ + if (strlen(patch->old_sha1_prefix) != 40 || + strlen(patch->new_sha1_prefix) != 40 || + get_sha1_hex(patch->old_sha1_prefix, sha1) || + get_sha1_hex(patch->new_sha1_prefix, sha1)) + return error("cannot apply binary patch to '%s' " + "without full index line", name); - /* For safety, we require patch index line to contain - * full 40-byte textual SHA1 for old and new, at least for now. + if (patch->old_name) { + /* See if the old one matches what the patch + * applies to. */ - if (strlen(patch->old_sha1_prefix) != 40 || - strlen(patch->new_sha1_prefix) != 40 || - get_sha1_hex(patch->old_sha1_prefix, sha1) || - get_sha1_hex(patch->new_sha1_prefix, sha1)) - return error("cannot apply binary patch to '%s' " - "without full index line", name); - - if (patch->old_name) { - unsigned char hdr[50]; - int hdrlen; - - /* See if the old one matches what the patch - * applies to. - */ - write_sha1_file_prepare(desc->buffer, desc->size, - blob_type, sha1, hdr, &hdrlen); - if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) - return error("the patch applies to '%s' (%s), " - "which does not match the " - "current contents.", - name, sha1_to_hex(sha1)); - } - else { - /* Otherwise, the old one must be empty. */ - if (desc->size) - return error("the patch applies to an empty " - "'%s' but it is not empty", name); - } + write_sha1_file_prepare(desc->buffer, desc->size, + blob_type, sha1, hdr, &hdrlen); + if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) + return error("the patch applies to '%s' (%s), " + "which does not match the " + "current contents.", + name, sha1_to_hex(sha1)); + } + else { + /* Otherwise, the old one must be empty. */ + if (desc->size) + return error("the patch applies to an empty " + "'%s' but it is not empty", name); + } + + if (desc->buffer) { + free(desc->buffer); + desc->alloc = desc->size = 0; + } + get_sha1_hex(patch->new_sha1_prefix, sha1); + if (!memcmp(sha1, null_sha1, 20)) + return 0; /* deletion patch */ + + if (has_sha1_file(sha1)) { + char type[10]; + unsigned long size; - /* For now, we do not record post-image data in the patch, - * and require the object already present in the recipient's - * object database. + desc->buffer = read_sha1_file(sha1, type, &size); + if (!desc->buffer) + return error("the necessary postimage %s for " + "'%s' cannot be read", + patch->new_sha1_prefix, name); + desc->alloc = desc->size = size; + } + else { + char type[10]; + unsigned long src_size, dst_size; + void *src; + + get_sha1_hex(patch->old_sha1_prefix, sha1); + src = read_sha1_file(sha1, type, &src_size); + if (!src) + return error("the necessary preimage %s for " + "'%s' cannot be read", + patch->old_sha1_prefix, name); + + /* patch->fragment->patch has the delta data and + * we should apply it to the preimage. */ - if (desc->buffer) { - free(desc->buffer); - desc->alloc = desc->size = 0; - } - get_sha1_hex(patch->new_sha1_prefix, sha1); - - if (memcmp(sha1, null_sha1, 20)) { - char type[10]; - unsigned long size; - - desc->buffer = read_sha1_file(sha1, type, &size); - if (!desc->buffer) - return error("the necessary postimage %s for " - "'%s' does not exist", - patch->new_sha1_prefix, name); - desc->alloc = desc->size = size; - } + desc->buffer = patch_delta(src, src_size, + (void*) patch->fragments->patch, + patch->fragments->size, + &dst_size); + if (!desc->buffer) + return error("binary patch does not apply to '%s'", + name); + desc->size = desc->alloc = dst_size; - return 0; + /* verify that the result matches */ + write_sha1_file_prepare(desc->buffer, desc->size, blob_type, + sha1, hdr, &hdrlen); + if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix)) + return error("binary patch to '%s' creates incorrect result", name); } + return 0; +} + +static int apply_fragments(struct buffer_desc *desc, struct patch *patch) +{ + struct fragment *frag = patch->fragments; + const char *name = patch->old_name ? patch->old_name : patch->new_name; + + if (patch->is_binary) + return apply_binary(desc, patch); + while (frag) { if (apply_one_fragment(desc, frag) < 0) return error("patch failed: %s:%ld", -- cgit v1.2.3 From 0660626caff6ac3745cd7b7908a2ca540141a2ec Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 5 May 2006 02:41:53 -0700 Subject: binary diff: further updates. This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano --- apply.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 33 deletions(-) (limited to 'apply.c') diff --git a/apply.c b/apply.c index e37c4ebf52..1b93aab8af 100644 --- a/apply.c +++ b/apply.c @@ -114,6 +114,9 @@ struct patch { char *new_name, *old_name, *def_name; unsigned int old_mode, new_mode; int is_rename, is_copy, is_new, is_delete, is_binary; +#define BINARY_DELTA_DEFLATED 1 +#define BINARY_LITERAL_DEFLATED 2 + unsigned long deflate_origlen; int lines_added, lines_deleted; int score; struct fragment *fragments; @@ -969,9 +972,11 @@ static inline int metadata_changes(struct patch *patch) static int parse_binary(char *buffer, unsigned long size, struct patch *patch) { - /* We have read "GIT binary patch\n"; what follows is a - * sequence of 'length-byte' followed by base-85 encoded - * delta data. + /* We have read "GIT binary patch\n"; what follows is a line + * that says the patch method (currently, either "deflated + * literal" or "deflated delta") and the length of data before + * deflating; a sequence of 'length-byte' followed by base-85 + * encoded data follows. * * Each 5-byte sequence of base-85 encodes up to 4 bytes, * and we would limit the patch line to 66 characters, @@ -982,11 +987,27 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) */ int llen, used; struct fragment *fragment; - char *delta = NULL; + char *data = NULL; - patch->is_binary = 1; patch->fragments = fragment = xcalloc(1, sizeof(*fragment)); - used = 0; + + /* Grab the type of patch */ + llen = linelen(buffer, size); + used = llen; + linenr++; + + if (!strncmp(buffer, "delta ", 6)) { + patch->is_binary = BINARY_DELTA_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 6, NULL, 10); + } + else if (!strncmp(buffer, "literal ", 8)) { + patch->is_binary = BINARY_LITERAL_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 8, NULL, 10); + } + else + return error("unrecognized binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); + buffer += llen; while (1) { int byte_length, max_byte_length, newsize; llen = linelen(buffer, size); @@ -1015,8 +1036,8 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) byte_length <= max_byte_length - 4) goto corrupt; newsize = fragment->size + byte_length; - delta = xrealloc(delta, newsize); - if (decode_85(delta + fragment->size, + data = xrealloc(data, newsize); + if (decode_85(data + fragment->size, buffer + 1, byte_length)) goto corrupt; @@ -1024,7 +1045,7 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) buffer += llen; size -= llen; } - fragment->patch = delta; + fragment->patch = data; return used; corrupt: return error("corrupt binary patch at line %d: %.*s", @@ -1425,6 +1446,61 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) return offset; } +static char *inflate_it(const void *data, unsigned long size, + unsigned long inflated_size) +{ + z_stream stream; + void *out; + int st; + + memset(&stream, 0, sizeof(stream)); + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + stream.next_out = out = xmalloc(inflated_size); + stream.avail_out = inflated_size; + inflateInit(&stream); + st = inflate(&stream, Z_FINISH); + if ((st != Z_STREAM_END) || stream.total_out != inflated_size) { + free(out); + return NULL; + } + return out; +} + +static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch) +{ + unsigned long dst_size; + struct fragment *fragment = patch->fragments; + void *data; + void *result; + + data = inflate_it(fragment->patch, fragment->size, + patch->deflate_origlen); + if (!data) + return error("corrupt patch data"); + switch (patch->is_binary) { + case BINARY_DELTA_DEFLATED: + result = patch_delta(desc->buffer, desc->size, + data, + patch->deflate_origlen, + &dst_size); + free(desc->buffer); + desc->buffer = result; + free(data); + break; + case BINARY_LITERAL_DEFLATED: + free(desc->buffer); + desc->buffer = data; + dst_size = patch->deflate_origlen; + break; + } + if (!desc->buffer) + return -1; + desc->size = desc->alloc = dst_size; + return 0; +} + static int apply_binary(struct buffer_desc *desc, struct patch *patch) { const char *name = patch->old_name ? patch->old_name : patch->new_name; @@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch) "'%s' but it is not empty", name); } - if (desc->buffer) { + get_sha1_hex(patch->new_sha1_prefix, sha1); + if (!memcmp(sha1, null_sha1, 20)) { free(desc->buffer); desc->alloc = desc->size = 0; - } - get_sha1_hex(patch->new_sha1_prefix, sha1); - if (!memcmp(sha1, null_sha1, 20)) + desc->buffer = NULL; return 0; /* deletion patch */ + } if (has_sha1_file(sha1)) { + /* We already have the postimage */ char type[10]; unsigned long size; + free(desc->buffer); desc->buffer = read_sha1_file(sha1, type, &size); if (!desc->buffer) return error("the necessary postimage %s for " @@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch) desc->alloc = desc->size = size; } else { - char type[10]; - unsigned long src_size, dst_size; - void *src; - - get_sha1_hex(patch->old_sha1_prefix, sha1); - src = read_sha1_file(sha1, type, &src_size); - if (!src) - return error("the necessary preimage %s for " - "'%s' cannot be read", - patch->old_sha1_prefix, name); - - /* patch->fragment->patch has the delta data and - * we should apply it to the preimage. + /* We have verified desc matches the preimage; + * apply the patch data to it, which is stored + * in the patch->fragments->{patch,size}. */ - desc->buffer = patch_delta(src, src_size, - (void*) patch->fragments->patch, - patch->fragments->size, - &dst_size); - if (!desc->buffer) + if (apply_binary_fragment(desc, patch)) return error("binary patch does not apply to '%s'", name); - desc->size = desc->alloc = dst_size; /* verify that the result matches */ write_sha1_file_prepare(desc->buffer, desc->size, blob_type, @@ -2102,7 +2165,8 @@ int main(int argc, char **argv) diffstat = 1; continue; } - if (!strcmp(arg, "--allow-binary-replacement")) { + if (!strcmp(arg, "--allow-binary-replacement") || + !strcmp(arg, "--binary")) { allow_binary_replacement = 1; continue; } -- cgit v1.2.3