From c89fb6b19a91add28a111cf257e01cba2c8de69c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 19 Jan 2008 00:42:22 -0800 Subject: builtin-apply.c: refactor small part that matches context This moves three "if" conditions out of line from find_offset() function, which is responsible for finding the matching place in the preimage to apply the patch. There is no change in the logic of the program. Signed-off-by: Junio C Hamano --- builtin-apply.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 15432b6782..2c052f80e5 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1437,6 +1437,17 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) } } +static int match_fragment(const char *buf, unsigned long size, + unsigned long try, + const char *fragment, unsigned long fragsize) +{ + if (try + fragsize > size) + return 0; + if (memcmp(buf + try, fragment, fragsize)) + return 0; + return 1; +} + static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, int line, int *lines) @@ -1461,8 +1472,7 @@ static int find_offset(const char *buf, unsigned long size, } /* Exact line number? */ - if ((start + fragsize <= size) && - !memcmp(buf + start, fragment, fragsize)) + if (match_fragment(buf, size, start, fragment, fragsize)) return start; /* @@ -1494,9 +1504,7 @@ static int find_offset(const char *buf, unsigned long size, try = forwards; } - if (try + fragsize > size) - continue; - if (memcmp(buf + try, fragment, fragsize)) + if (!match_fragment(buf, size, try, fragment, fragsize)) continue; n = (i >> 1)+1; if (i & 1) -- cgit v1.2.3 From fcb77bc57b4d2f63bd4d0c2fd36498f308d28cbe Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 19 Jan 2008 02:16:16 -0800 Subject: builtin-apply.c: restructure "offset" matching This restructures code to find matching location with offset in find_offset() function, so that there is need for only one call site of match_fragment() function. There still isn't a change in the logic of the program. Signed-off-by: Junio C Hamano --- builtin-apply.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 2c052f80e5..0a304ab81a 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1452,8 +1452,8 @@ static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, int line, int *lines) { - int i; - unsigned long start, backwards, forwards; + int i, no_more_backwards, no_more_forwards; + unsigned long start, backwards, forwards, try; if (fragsize > size) return -1; @@ -1471,32 +1471,44 @@ static int find_offset(const char *buf, unsigned long size, } } - /* Exact line number? */ - if (match_fragment(buf, size, start, fragment, fragsize)) - return start; - /* * There's probably some smart way to do this, but I'll leave * that to the smart and beautiful people. I'm simple and stupid. */ backwards = start; forwards = start; + try = start; + for (i = 0; ; i++) { - unsigned long try; - int n; + no_more_backwards = !backwards; + no_more_forwards = (forwards + fragsize > size); + + if (match_fragment(buf, size, try, fragment, fragsize)) { + int shift = ((i+1) >> 1); + if (i & 1) + shift = -shift; + *lines = shift; + return try; + } + + again: + if (no_more_backwards && no_more_forwards) + break; - /* "backward" */ if (i & 1) { - if (!backwards) { - if (forwards + fragsize > size) - break; - continue; + if (no_more_backwards) { + i++; + goto again; } do { --backwards; } while (backwards && buf[backwards-1] != '\n'); try = backwards; } else { + if (no_more_forwards) { + i++; + goto again; + } while (forwards + fragsize <= size) { if (buf[forwards++] == '\n') break; @@ -1504,18 +1516,7 @@ static int find_offset(const char *buf, unsigned long size, try = forwards; } - if (!match_fragment(buf, size, try, fragment, fragsize)) - continue; - n = (i >> 1)+1; - if (i & 1) - n = -n; - *lines = n; - return try; } - - /* - * We should start searching forward and backward. - */ return -1; } -- cgit v1.2.3 From dc41976a3eed1d9c93fdc08c448bab969db4e0ec Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 19 Jan 2008 01:58:34 -0800 Subject: builtin-apply.c: push match-beginning/end logic down This moves the logic to force match at the beginning and/or at the end of the buffer to the actual function that finds the match from its caller. This is a necessary preparation for the next step to allow matching disregarding certain differences, such as whitespace changes. We probably could optimize this even more by taking advantage of the fact that match_beginning and match_end forces the match to be at an exact location (anchored at the beginning and/or the end), but that's for another commit. Signed-off-by: Junio C Hamano --- builtin-apply.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 0a304ab81a..f84a40575e 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1439,18 +1439,36 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) static int match_fragment(const char *buf, unsigned long size, unsigned long try, - const char *fragment, unsigned long fragsize) + const char *fragment, unsigned long fragsize, + int match_beginning, int match_end) { - if (try + fragsize > size) + if (match_beginning && try) return 0; - if (memcmp(buf + try, fragment, fragsize)) - return 0; - return 1; + + /* + * Do we have an exact match? If we were told to match + * at the end, size must be exactly at try+fragsize, + * otherwise try+fragsize must be still within the preimage, + * and either case, the old piece should match the preimage + * exactly. + */ + if ((match_end + ? (try + fragsize == size) + : (try + fragsize <= size)) && + !memcmp(buf + try, fragment, fragsize)) + return 1; + + /* + * NEEDSWORK: We can optionally match fuzzily here, but + * that is for a later round. + */ + return 0; } static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, - int line, int *lines) + int line, int *lines, + int match_beginning, int match_end) { int i, no_more_backwards, no_more_forwards; unsigned long start, backwards, forwards, try; @@ -1483,7 +1501,8 @@ static int find_offset(const char *buf, unsigned long size, no_more_backwards = !backwards; no_more_forwards = (forwards + fragsize > size); - if (match_fragment(buf, size, try, fragment, fragsize)) { + if (match_fragment(buf, size, try, fragment, fragsize, + match_beginning, match_end)) { int shift = ((i+1) >> 1); if (i & 1) shift = -shift; @@ -1765,17 +1784,16 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, pos = frag->newpos; for (;;) { offset = find_offset(buf->buf, buf->len, - oldlines, oldsize, pos, &lines); - if (match_end && offset + oldsize != buf->len) - offset = -1; - if (match_beginning && offset) - offset = -1; + oldlines, oldsize, pos, &lines, + match_beginning, match_end); if (offset >= 0) { if (ws_error_action == correct_ws_error && - (buf->len - oldsize - offset == 0)) /* end of file? */ + (buf->len - oldsize - offset == 0)) + /* end of file? */ newsize -= new_blank_lines_at_end; - /* Warn if it was necessary to reduce the number + /* + * Warn if it was necessary to reduce the number * of context lines. */ if ((leading != frag->leading) || -- cgit v1.2.3 From b94f2eda99646fea71f84ccd895d94b0001d0db6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 26 Jan 2008 17:42:49 -0800 Subject: builtin-apply.c: make it more line oriented This changes the way git-apply internally works to be more line oriented. The logic to find where the patch applies with offset used to count line numbers by always counting LF from the beginning of the buffer, but it is simplified because we count the line length of the target file and the preimage snippet upfront now. The ultimate motivation is to allow applying patches whose preimage context has whitespace corruption that has already been corrected in the local copy. For that purpose, we introduce a table of line-hash that allows us to match lines that differ only in whitespaces. Signed-off-by: Junio C Hamano --- builtin-apply.c | 403 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 280 insertions(+), 123 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index f84a40575e..e046e87ad2 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -161,6 +161,92 @@ struct patch { struct patch *next; }; +/* + * A line in a file, len-bytes long (includes the terminating LF, + * except for an incomplete line at the end if the file ends with + * one), and its contents hashes to 'hash'. + */ +struct line { + size_t len; + unsigned hash : 24; + unsigned flag : 8; +}; + +/* + * This represents a "file", which is an array of "lines". + */ +struct image { + char *buf; + size_t len; + size_t nr; + struct line *line_allocated; + struct line *line; +}; + +static uint32_t hash_line(const char *cp, size_t len) +{ + size_t i; + uint32_t h; + for (i = 0, h = 0; i < len; i++) { + if (!isspace(cp[i])) { + h = h * 3 + (cp[i] & 0xff); + } + } + return h; +} + +static void prepare_image(struct image *image, char *buf, size_t len, + int prepare_linetable) +{ + const char *cp, *ep; + int n; + + image->buf = buf; + image->len = len; + + if (!prepare_linetable) { + image->line = NULL; + image->line_allocated = NULL; + image->nr = 0; + return; + } + + ep = image->buf + image->len; + + /* First count lines */ + cp = image->buf; + n = 0; + while (cp < ep) { + cp = strchrnul(cp, '\n'); + n++; + cp++; + } + + image->line_allocated = xcalloc(n, sizeof(struct line)); + image->line = image->line_allocated; + image->nr = n; + cp = image->buf; + n = 0; + while (cp < ep) { + const char *next; + for (next = cp; next < ep && *next != '\n'; next++) + ; + if (next < ep) + next++; + image->line[n].len = next - cp; + image->line[n].hash = hash_line(cp, next - cp); + cp = next; + n++; + } +} + +static void clear_image(struct image *image) +{ + free(image->buf); + image->buf = NULL; + image->len = 0; +} + static void say_patch_name(FILE *output, const char *pre, struct patch *patch, const char *post) { @@ -1437,14 +1523,29 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) } } -static int match_fragment(const char *buf, unsigned long size, +static int match_fragment(struct image *img, + struct image *preimage, + struct image *postimage, unsigned long try, - const char *fragment, unsigned long fragsize, + int try_lno, int match_beginning, int match_end) { - if (match_beginning && try) + int i; + + if (preimage->nr + try_lno > img->nr) + return 0; + + if (match_beginning && try_lno) + return 0; + + if (match_end && preimage->nr + try_lno != img->nr) return 0; + /* Quick hash check */ + for (i = 0; i < preimage->nr; i++) + if (preimage->line[i].hash != img->line[try_lno + i].hash) + return 0; + /* * Do we have an exact match? If we were told to match * at the end, size must be exactly at try+fragsize, @@ -1453,9 +1554,9 @@ static int match_fragment(const char *buf, unsigned long size, * exactly. */ if ((match_end - ? (try + fragsize == size) - : (try + fragsize <= size)) && - !memcmp(buf + try, fragment, fragsize)) + ? (try + preimage->len == img->len) + : (try + preimage->len <= img->len)) && + !memcmp(img->buf + try, preimage->buf, preimage->len)) return 1; /* @@ -1465,105 +1566,78 @@ static int match_fragment(const char *buf, unsigned long size, return 0; } -static int find_offset(const char *buf, unsigned long size, - const char *fragment, unsigned long fragsize, - int line, int *lines, - int match_beginning, int match_end) +static int find_pos(struct image *img, + struct image *preimage, + struct image *postimage, + int line, + int match_beginning, int match_end) { - int i, no_more_backwards, no_more_forwards; - unsigned long start, backwards, forwards, try; + int i; + unsigned long backwards, forwards, try; + int backwards_lno, forwards_lno, try_lno; - if (fragsize > size) + if (preimage->nr > img->nr) return -1; - start = 0; - if (line > 1) { - unsigned long offset = 0; - i = line-1; - while (offset + fragsize <= size) { - if (buf[offset++] == '\n') { - start = offset; - if (!--i) - break; - } - } - } + try = 0; + for (i = 0; i < line; i++) + try += img->line[i].len; /* * There's probably some smart way to do this, but I'll leave * that to the smart and beautiful people. I'm simple and stupid. */ - backwards = start; - forwards = start; - try = start; + backwards = try; + backwards_lno = line; + forwards = try; + forwards_lno = line; + try_lno = line; for (i = 0; ; i++) { - no_more_backwards = !backwards; - no_more_forwards = (forwards + fragsize > size); - - if (match_fragment(buf, size, try, fragment, fragsize, - match_beginning, match_end)) { - int shift = ((i+1) >> 1); - if (i & 1) - shift = -shift; - *lines = shift; - return try; - } + if (match_fragment(img, preimage, postimage, + try, try_lno, + match_beginning, match_end)) + return try_lno; again: - if (no_more_backwards && no_more_forwards) + if (backwards_lno == 0 && forwards_lno == img->nr) break; if (i & 1) { - if (no_more_backwards) { + if (backwards_lno == 0) { i++; goto again; } - do { - --backwards; - } while (backwards && buf[backwards-1] != '\n'); + backwards_lno--; + backwards -= img->line[backwards_lno].len; try = backwards; + try_lno = backwards_lno; } else { - if (no_more_forwards) { + if (forwards_lno == img->nr) { i++; goto again; } - while (forwards + fragsize <= size) { - if (buf[forwards++] == '\n') - break; - } + forwards += img->line[forwards_lno].len; + forwards_lno++; try = forwards; + try_lno = forwards_lno; } } return -1; } -static void remove_first_line(const char **rbuf, int *rsize) +static void remove_first_line(struct image *img) { - const char *buf = *rbuf; - int size = *rsize; - unsigned long offset; - offset = 0; - while (offset <= size) { - if (buf[offset++] == '\n') - break; - } - *rsize = size - offset; - *rbuf = buf + offset; + img->buf += img->line[0].len; + img->len -= img->line[0].len; + img->line++; + img->nr--; } -static void remove_last_line(const char **rbuf, int *rsize) +static void remove_last_line(struct image *img) { - const char *buf = *rbuf; - int size = *rsize; - unsigned long offset; - offset = size - 1; - while (offset > 0) { - if (buf[--offset] == '\n') - break; - } - *rsize = offset + 1; + img->len -= img->line[--img->nr].len; } static int apply_line(char *output, const char *patch, int plen, @@ -1668,19 +1742,75 @@ static int apply_line(char *output, const char *patch, int plen, return output + plen - buf; } -static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, +static void update_image(struct image *img, + int applied_pos, + struct image *preimage, + struct image *postimage) +{ + /* + * remove the copy of preimage at offset in img + * and replace it with postimage + */ + int i, nr; + size_t remove_count, insert_count, applied_at = 0; + char *result; + + for (i = 0; i < applied_pos; i++) + applied_at += img->line[i].len; + + remove_count = 0; + for (i = 0; i < preimage->nr; i++) + remove_count += img->line[applied_pos + i].len; + insert_count = postimage->len; + + /* Adjust the contents */ + result = xmalloc(img->len + insert_count - remove_count + 1); + memcpy(result, img->buf, applied_at); + memcpy(result + applied_at, postimage->buf, postimage->len); + memcpy(result + applied_at + postimage->len, + img->buf + (applied_at + remove_count), + img->len - (applied_at + remove_count)); + free(img->buf); + img->buf = result; + img->len += insert_count - remove_count; + result[img->len] = '\0'; + + /* Adjust the line table */ + nr = img->nr + postimage->nr - preimage->nr; + if (preimage->nr < postimage->nr) { + /* + * NOTE: this knows that we never call remove_first_line() + * on anything other than pre/post image. + */ + img->line = xrealloc(img->line, nr * sizeof(*img->line)); + img->line_allocated = img->line; + } + if (preimage->nr != postimage->nr) + memmove(img->line + applied_pos + postimage->nr, + img->line + applied_pos + preimage->nr, + (img->nr - (applied_pos + preimage->nr)) * + sizeof(*img->line)); + memcpy(img->line + applied_pos, + postimage->line, + postimage->nr * sizeof(*img->line)); + img->nr = nr; +} + +static int apply_one_fragment(struct image *img, struct fragment *frag, int inaccurate_eof, unsigned ws_rule) { int match_beginning, match_end; const char *patch = frag->patch; - int offset, size = frag->size; + int size = frag->size; char *old = xmalloc(size); char *new = xmalloc(size); - const char *oldlines, *newlines; + char *oldlines, *newlines; int oldsize = 0, newsize = 0; int new_blank_lines_at_end = 0; unsigned long leading, trailing; - int pos, lines; + int pos, applied_pos; + struct image preimage; + struct image postimage; while (size > 0) { char first; @@ -1780,32 +1910,16 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, match_end = !trailing; } - lines = 0; - pos = frag->newpos; + pos = frag->newpos ? (frag->newpos - 1) : 0; + prepare_image(&preimage, oldlines, oldsize, 1); + prepare_image(&postimage, newlines, newsize, 1); for (;;) { - offset = find_offset(buf->buf, buf->len, - oldlines, oldsize, pos, &lines, - match_beginning, match_end); - if (offset >= 0) { - if (ws_error_action == correct_ws_error && - (buf->len - oldsize - offset == 0)) - /* end of file? */ - newsize -= new_blank_lines_at_end; - /* - * Warn if it was necessary to reduce the number - * of context lines. - */ - if ((leading != frag->leading) || - (trailing != frag->trailing)) - fprintf(stderr, "Context reduced to (%ld/%ld)" - " to apply fragment at %d\n", - leading, trailing, pos + lines); - - strbuf_splice(buf, offset, oldsize, newlines, newsize); - offset = 0; + applied_pos = find_pos(img, &preimage, &postimage, + pos, match_beginning, match_end); + + if (applied_pos >= 0) break; - } /* Am I at my context limits? */ if ((leading <= p_context) && (trailing <= p_context)) @@ -1814,33 +1928,63 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, match_beginning = match_end = 0; continue; } + /* * Reduce the number of context lines; reduce both * leading and trailing if they are equal otherwise * just reduce the larger context. */ if (leading >= trailing) { - remove_first_line(&oldlines, &oldsize); - remove_first_line(&newlines, &newsize); + remove_first_line(&preimage); + remove_first_line(&postimage); pos--; leading--; } if (trailing > leading) { - remove_last_line(&oldlines, &oldsize); - remove_last_line(&newlines, &newsize); + remove_last_line(&preimage); + remove_last_line(&postimage); trailing--; } } - if (offset && apply_verbosely) - error("while searching for:\n%.*s", oldsize, oldlines); + if (applied_pos >= 0) { + if (ws_error_action == correct_ws_error && + new_blank_lines_at_end && + postimage.nr + applied_pos == img->nr) { + /* + * If the patch application adds blank lines + * at the end, and if the patch applies at the + * end of the image, remove those added blank + * lines. + */ + while (new_blank_lines_at_end--) + remove_last_line(&postimage); + } + + /* + * Warn if it was necessary to reduce the number + * of context lines. + */ + if ((leading != frag->leading) || + (trailing != frag->trailing)) + fprintf(stderr, "Context reduced to (%ld/%ld)" + " to apply fragment at %d\n", + leading, trailing, applied_pos+1); + update_image(img, applied_pos, &preimage, &postimage); + } else { + if (apply_verbosely) + error("while searching for:\n%.*s", oldsize, oldlines); + } free(old); free(new); - return offset; + free(preimage.line_allocated); + free(postimage.line_allocated); + + return (applied_pos < 0); } -static int apply_binary_fragment(struct strbuf *buf, struct patch *patch) +static int apply_binary_fragment(struct image *img, struct patch *patch) { struct fragment *fragment = patch->fragments; unsigned long len; @@ -1857,22 +2001,26 @@ static int apply_binary_fragment(struct strbuf *buf, struct patch *patch) } switch (fragment->binary_patch_method) { case BINARY_DELTA_DEFLATED: - dst = patch_delta(buf->buf, buf->len, fragment->patch, + dst = patch_delta(img->buf, img->len, fragment->patch, fragment->size, &len); if (!dst) return -1; - /* XXX patch_delta NUL-terminates */ - strbuf_attach(buf, dst, len, len + 1); + clear_image(img); + img->buf = dst; + img->len = len; return 0; case BINARY_LITERAL_DEFLATED: - strbuf_reset(buf); - strbuf_add(buf, fragment->patch, fragment->size); + clear_image(img); + img->len = fragment->size; + img->buf = xmalloc(img->len+1); + memcpy(img->buf, fragment->patch, img->len); + img->buf[img->len] = '\0'; return 0; } return -1; } -static int apply_binary(struct strbuf *buf, struct patch *patch) +static int apply_binary(struct image *img, struct patch *patch) { const char *name = patch->old_name ? patch->old_name : patch->new_name; unsigned char sha1[20]; @@ -1893,7 +2041,7 @@ static int apply_binary(struct strbuf *buf, struct patch *patch) * See if the old one matches what the patch * applies to. */ - hash_sha1_file(buf->buf, buf->len, blob_type, sha1); + hash_sha1_file(img->buf, img->len, blob_type, sha1); if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) return error("the patch applies to '%s' (%s), " "which does not match the " @@ -1902,14 +2050,14 @@ static int apply_binary(struct strbuf *buf, struct patch *patch) } else { /* Otherwise, the old one must be empty. */ - if (buf->len) + if (img->len) return error("the patch applies to an empty " "'%s' but it is not empty", name); } get_sha1_hex(patch->new_sha1_prefix, sha1); if (is_null_sha1(sha1)) { - strbuf_release(buf); + clear_image(img); return 0; /* deletion patch */ } @@ -1924,20 +2072,21 @@ static int apply_binary(struct strbuf *buf, struct patch *patch) return error("the necessary postimage %s for " "'%s' cannot be read", patch->new_sha1_prefix, name); - /* XXX read_sha1_file NUL-terminates */ - strbuf_attach(buf, result, size, size + 1); + clear_image(img); + img->buf = result; + img->len = size; } else { /* * We have verified buf matches the preimage; * apply the patch data to it, which is stored * in the patch->fragments->{patch,size}. */ - if (apply_binary_fragment(buf, patch)) + if (apply_binary_fragment(img, patch)) return error("binary patch does not apply to '%s'", name); /* verify that the result matches */ - hash_sha1_file(buf->buf, buf->len, blob_type, sha1); + hash_sha1_file(img->buf, img->len, blob_type, sha1); if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix)) return error("binary patch to '%s' creates incorrect result (expecting %s, got %s)", name, patch->new_sha1_prefix, sha1_to_hex(sha1)); @@ -1946,7 +2095,7 @@ static int apply_binary(struct strbuf *buf, struct patch *patch) return 0; } -static int apply_fragments(struct strbuf *buf, struct patch *patch) +static int apply_fragments(struct image *img, struct patch *patch) { struct fragment *frag = patch->fragments; const char *name = patch->old_name ? patch->old_name : patch->new_name; @@ -1954,10 +2103,10 @@ static int apply_fragments(struct strbuf *buf, struct patch *patch) unsigned inaccurate_eof = patch->inaccurate_eof; if (patch->is_binary) - return apply_binary(buf, patch); + return apply_binary(img, patch); while (frag) { - if (apply_one_fragment(buf, frag, inaccurate_eof, ws_rule)) { + if (apply_one_fragment(img, frag, inaccurate_eof, ws_rule)) { error("patch failed: %s:%ld", name, frag->oldpos); if (!apply_with_reject) return -1; @@ -1993,6 +2142,9 @@ static int read_file_or_gitlink(struct cache_entry *ce, struct strbuf *buf) static int apply_data(struct patch *patch, struct stat *st, struct cache_entry *ce) { struct strbuf buf; + struct image image; + size_t len; + char *img; strbuf_init(&buf, 0); if (cached) { @@ -2015,9 +2167,14 @@ static int apply_data(struct patch *patch, struct stat *st, struct cache_entry * } } - if (apply_fragments(&buf, patch) < 0) + img = strbuf_detach(&buf, &len); + prepare_image(&image, img, len, !patch->is_binary); + + if (apply_fragments(&image, patch) < 0) return -1; /* note with --reject this succeeds. */ - patch->result = strbuf_detach(&buf, &patch->resultsize); + patch->result = image.buf; + patch->resultsize = image.len; + free(image.line_allocated); if (0 < patch->is_delete && patch->resultsize) return error("removal patch leaves file contents"); -- cgit v1.2.3 From ecf4c2ec6ba6dc39e72c6b37f2a238e28fec2dc1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 28 Jan 2008 03:04:30 -0800 Subject: builtin-apply.c: optimize match_beginning/end processing a bit. Wnen the caller knows the hunk needs to match at the beginning or at the end, there is no point starting from the line number that is found in the patch and trying match with increasing offset. The logic to find matching lines was made more line oriented with the previous patch and this optimization is now trivial. Signed-off-by: Junio C Hamano --- builtin-apply.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index e046e87ad2..dc650f16f6 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1579,6 +1579,16 @@ static int find_pos(struct image *img, if (preimage->nr > img->nr) return -1; + /* + * If match_begining or match_end is specified, there is no + * point starting from a wrong line that will never match and + * wander around and wait for a match at the specified end. + */ + if (match_beginning) + line = 0; + else if (match_end) + line = img->nr - preimage->nr; + try = 0; for (i = 0; i < line; i++) try += img->line[i].len; -- cgit v1.2.3 From c330fdd42dc57127272534cd2a8dd9569334b219 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 29 Jan 2008 00:17:55 -0800 Subject: builtin-apply.c: mark common context lines in lineinfo structure. This updates the way preimage and postimage in a patch hunk is parsed and prepared for applying. By looking at image->line[n].flag, the code can tell if it is a common context line that is the same between the preimage and the postimage. This matters when we actually start applying a patch with contexts that have whitespace breakages that have already been fixed in the target file. --- builtin-apply.c | 57 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 25 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index dc650f16f6..acd84f988a 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -170,6 +170,7 @@ struct line { size_t len; unsigned hash : 24; unsigned flag : 8; +#define LINE_COMMON 1 }; /* @@ -179,6 +180,7 @@ struct image { char *buf; size_t len; size_t nr; + size_t alloc; struct line *line_allocated; struct line *line; }; @@ -195,49 +197,39 @@ static uint32_t hash_line(const char *cp, size_t len) return h; } +static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag) +{ + ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc); + img->line_allocated[img->nr].len = len; + img->line_allocated[img->nr].hash = hash_line(bol, len); + img->line_allocated[img->nr].flag = flag; + img->nr++; +} + static void prepare_image(struct image *image, char *buf, size_t len, int prepare_linetable) { const char *cp, *ep; - int n; + memset(image, 0, sizeof(*image)); image->buf = buf; image->len = len; - if (!prepare_linetable) { - image->line = NULL; - image->line_allocated = NULL; - image->nr = 0; + if (!prepare_linetable) return; - } ep = image->buf + image->len; - - /* First count lines */ cp = image->buf; - n = 0; - while (cp < ep) { - cp = strchrnul(cp, '\n'); - n++; - cp++; - } - - image->line_allocated = xcalloc(n, sizeof(struct line)); - image->line = image->line_allocated; - image->nr = n; - cp = image->buf; - n = 0; while (cp < ep) { const char *next; for (next = cp; next < ep && *next != '\n'; next++) ; if (next < ep) next++; - image->line[n].len = next - cp; - image->line[n].hash = hash_line(cp, next - cp); + add_line_info(image, cp, next - cp, 0); cp = next; - n++; } + image->line = image->line_allocated; } static void clear_image(struct image *image) @@ -1822,6 +1814,9 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, struct image preimage; struct image postimage; + memset(&preimage, 0, sizeof(preimage)); + memset(&postimage, 0, sizeof(postimage)); + while (size > 0) { char first; int len = linelen(patch, size); @@ -1857,10 +1852,14 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, break; old[oldsize++] = '\n'; new[newsize++] = '\n'; + add_line_info(&preimage, "\n", 1, LINE_COMMON); + add_line_info(&postimage, "\n", 1, LINE_COMMON); break; case ' ': case '-': memcpy(old + oldsize, patch + 1, plen); + add_line_info(&preimage, old + oldsize, plen, + (first == ' ' ? LINE_COMMON : 0)); oldsize += plen; if (first == '-') break; @@ -1869,6 +1868,9 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, if (first != '+' || !no_add) { int added = apply_line(new + newsize, patch, plen, ws_rule); + add_line_info(&postimage, new + newsize, added, + (first == '+' ? 0 : LINE_COMMON)); + newsize += added; if (first == '+' && added == 1 && new[newsize-1] == '\n') @@ -1921,8 +1923,13 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, } pos = frag->newpos ? (frag->newpos - 1) : 0; - prepare_image(&preimage, oldlines, oldsize, 1); - prepare_image(&postimage, newlines, newsize, 1); + preimage.buf = old; + preimage.len = oldsize; + postimage.buf = new; + postimage.len = newsize; + preimage.line = preimage.line_allocated; + postimage.line = postimage.line_allocated; + for (;;) { applied_pos = find_pos(img, &preimage, &postimage, -- cgit v1.2.3 From 61e08ccacbfdc6046ccabdfdb01f7755ed5ad8b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 30 Jan 2008 13:12:25 -0800 Subject: builtin-apply.c: clean-up apply_one_fragment() We had two pointer variables pointing to the same buffer and an integer variable used to index into its tail part that was active (old, oldlines and oldsize for the preimage, and their 'new' counterparts for the postimage). To help readability, use 'oldlines' as the allocated pointer, and use 'old' as the pointer to the tail that advances while the code builds up the contents in the buffer. The size 'oldsize' can be computed as (old-oldines). Signed-off-by: Junio C Hamano --- builtin-apply.c | 55 +++++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index acd84f988a..7fb330541f 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1804,10 +1804,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, int match_beginning, match_end; const char *patch = frag->patch; int size = frag->size; - char *old = xmalloc(size); - char *new = xmalloc(size); - char *oldlines, *newlines; - int oldsize = 0, newsize = 0; + char *old, *new, *oldlines, *newlines; int new_blank_lines_at_end = 0; unsigned long leading, trailing; int pos, applied_pos; @@ -1816,7 +1813,11 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, memset(&preimage, 0, sizeof(preimage)); memset(&postimage, 0, sizeof(postimage)); + oldlines = xmalloc(size); + newlines = xmalloc(size); + old = oldlines; + new = newlines; while (size > 0) { char first; int len = linelen(patch, size); @@ -1833,7 +1834,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, * followed by "\ No newline", then we also remove the * last one (which is the newline, of course). */ - plen = len-1; + plen = len - 1; if (len < size && patch[len] == '\\') plen--; first = *patch; @@ -1850,30 +1851,30 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, if (plen < 0) /* ... followed by '\No newline'; nothing */ break; - old[oldsize++] = '\n'; - new[newsize++] = '\n'; + *old++ = '\n'; + *new++ = '\n'; add_line_info(&preimage, "\n", 1, LINE_COMMON); add_line_info(&postimage, "\n", 1, LINE_COMMON); break; case ' ': case '-': - memcpy(old + oldsize, patch + 1, plen); - add_line_info(&preimage, old + oldsize, plen, + memcpy(old, patch + 1, plen); + add_line_info(&preimage, old, plen, (first == ' ' ? LINE_COMMON : 0)); - oldsize += plen; + old += plen; if (first == '-') break; /* Fall-through for ' ' */ case '+': if (first != '+' || !no_add) { - int added = apply_line(new + newsize, patch, + int added = apply_line(new, patch, plen, ws_rule); - add_line_info(&postimage, new + newsize, added, + add_line_info(&postimage, new, added, (first == '+' ? 0 : LINE_COMMON)); - newsize += added; + new += added; if (first == '+' && - added == 1 && new[newsize-1] == '\n') + added == 1 && new[-1] == '\n') added_blank_line = 1; } break; @@ -1892,16 +1893,13 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, patch += len; size -= len; } - if (inaccurate_eof && - oldsize > 0 && old[oldsize - 1] == '\n' && - newsize > 0 && new[newsize - 1] == '\n') { - oldsize--; - newsize--; + old > oldlines && old[-1] == '\n' && + new > newlines && new[-1] == '\n') { + old--; + new--; } - oldlines = old; - newlines = new; leading = frag->leading; trailing = frag->trailing; @@ -1923,10 +1921,10 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, } pos = frag->newpos ? (frag->newpos - 1) : 0; - preimage.buf = old; - preimage.len = oldsize; - postimage.buf = new; - postimage.len = newsize; + preimage.buf = oldlines; + preimage.len = old - oldlines; + postimage.buf = newlines; + postimage.len = new - newlines; preimage.line = preimage.line_allocated; postimage.line = postimage.line_allocated; @@ -1990,11 +1988,12 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, update_image(img, applied_pos, &preimage, &postimage); } else { if (apply_verbosely) - error("while searching for:\n%.*s", oldsize, oldlines); + error("while searching for:\n%.*s", + (int)(old - oldlines), oldlines); } - free(old); - free(new); + free(oldlines); + free(newlines); free(preimage.line_allocated); free(postimage.line_allocated); -- cgit v1.2.3 From 8441a9a84278718e9bfd49112dcc55136cea394f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 30 Jan 2008 13:19:58 -0800 Subject: builtin-apply.c: simplify calling site to apply_line() The function apply_line() changed its behaviour depending on the ws_error_action, whitespace_error and if the input was a context. Make its caller responsible for such checking so that we can convert the function to copy the contents of line while fixing whitespace breakage more easily. Signed-off-by: Junio C Hamano --- builtin-apply.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 7fb330541f..d0d008fde1 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1642,7 +1642,7 @@ static void remove_last_line(struct image *img) img->len -= img->line[--img->nr].len; } -static int apply_line(char *output, const char *patch, int plen, +static int copy_wsfix(char *output, const char *patch, int plen, unsigned ws_rule) { /* @@ -1659,12 +1659,6 @@ static int apply_line(char *output, const char *patch, int plen, int need_fix_leading_space = 0; char *buf; - if ((ws_error_action != correct_ws_error) || !whitespace_error || - *patch != '+') { - memcpy(output, patch + 1, plen); - return plen; - } - /* * Strip trailing whitespace */ @@ -1821,7 +1815,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, while (size > 0) { char first; int len = linelen(patch, size); - int plen; + int plen, added; int added_blank_line = 0; if (!len) @@ -1866,17 +1860,25 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, break; /* Fall-through for ' ' */ case '+': - if (first != '+' || !no_add) { - int added = apply_line(new, patch, - plen, ws_rule); - add_line_info(&postimage, new, added, - (first == '+' ? 0 : LINE_COMMON)); - - new += added; - if (first == '+' && - added == 1 && new[-1] == '\n') - added_blank_line = 1; + /* --no-add does not add new lines */ + if (first == '+' && no_add) + break; + + if (first != '+' || + !whitespace_error || + ws_error_action != correct_ws_error) { + memcpy(new, patch + 1, plen); + added = plen; + } + else { + added = copy_wsfix(new, patch, plen, ws_rule); } + add_line_info(&postimage, new, added, + (first == '+' ? 0 : LINE_COMMON)); + new += added; + if (first == '+' && + added == 1 && new[-1] == '\n') + added_blank_line = 1; break; case '@': case '\\': /* Ignore it, we already handled it */ -- cgit v1.2.3 From 42ab241cfacdcbae910d51c06fa2870cc94b0af3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 30 Jan 2008 14:27:50 -0800 Subject: builtin-apply.c: do not feed copy_wsfix() leading '+' The "patch" parameter used to include leading '+' of an added line in the patch, and the array was treated as 1-based. Make it accept the contents of the line alone and simplify the code. Signed-off-by: Junio C Hamano --- builtin-apply.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index d0d008fde1..0bc33bdd52 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1646,16 +1646,15 @@ static int copy_wsfix(char *output, const char *patch, int plen, unsigned ws_rule) { /* - * plen is number of bytes to be copied from patch, - * starting at patch+1 (patch[0] is '+'). Typically - * patch[plen] is '\n', unless this is the incomplete - * last line. + * plen is number of bytes to be copied from patch, starting + * at patch. Typically patch[plen-1] is '\n', unless this is + * the incomplete last line. */ int i; int add_nl_to_tail = 0; int fixed = 0; - int last_tab_in_indent = 0; - int last_space_in_indent = 0; + int last_tab_in_indent = -1; + int last_space_in_indent = -1; int need_fix_leading_space = 0; char *buf; @@ -1663,11 +1662,11 @@ static int copy_wsfix(char *output, const char *patch, int plen, * Strip trailing whitespace */ if ((ws_rule & WS_TRAILING_SPACE) && - (1 < plen && isspace(patch[plen-1]))) { - if (patch[plen] == '\n') + (2 < plen && isspace(patch[plen-2]))) { + if (patch[plen-1] == '\n') add_nl_to_tail = 1; plen--; - while (0 < plen && isspace(patch[plen])) + while (0 < plen && isspace(patch[plen-1])) plen--; fixed = 1; } @@ -1675,25 +1674,25 @@ static int copy_wsfix(char *output, const char *patch, int plen, /* * Check leading whitespaces (indent) */ - for (i = 1; i < plen; i++) { + for (i = 0; i < plen; i++) { char ch = patch[i]; if (ch == '\t') { last_tab_in_indent = i; if ((ws_rule & WS_SPACE_BEFORE_TAB) && - 0 < last_space_in_indent) + 0 <= last_space_in_indent) need_fix_leading_space = 1; } else if (ch == ' ') { last_space_in_indent = i; if ((ws_rule & WS_INDENT_WITH_NON_TAB) && 8 <= i - last_tab_in_indent) need_fix_leading_space = 1; - } - else + } else break; } buf = output; if (need_fix_leading_space) { + /* Process indent ourselves */ int consecutive_spaces = 0; int last = last_tab_in_indent + 1; @@ -1706,10 +1705,10 @@ static int copy_wsfix(char *output, const char *patch, int plen, } /* - * between patch[1..last], strip the funny spaces, + * between patch[0..last-1], strip the funny spaces, * updating them to tab as needed. */ - for (i = 1; i < last; i++, plen--) { + for (i = 0; i < last; i++) { char ch = patch[i]; if (ch != ' ') { consecutive_spaces = 0; @@ -1724,13 +1723,12 @@ static int copy_wsfix(char *output, const char *patch, int plen, } while (0 < consecutive_spaces--) *output++ = ' '; + plen -= last; + patch += last; fixed = 1; - i = last; } - else - i = 1; - memcpy(output, patch + i, plen); + memcpy(output, patch, plen); if (add_nl_to_tail) output[plen++] = '\n'; if (fixed) @@ -1871,7 +1869,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, added = plen; } else { - added = copy_wsfix(new, patch, plen, ws_rule); + added = copy_wsfix(new, patch + 1, plen, ws_rule); } add_line_info(&postimage, new, added, (first == '+' ? 0 : LINE_COMMON)); -- cgit v1.2.3 From ee810b715912347775fa06a0d5b16092b4908d66 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 30 Jan 2008 15:11:23 -0800 Subject: builtin-apply.c: move copy_wsfix() function a bit higher. I'll be calling this from match_fragment() in later rounds. Signed-off-by: Junio C Hamano --- builtin-apply.c | 188 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 94 insertions(+), 94 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 0bc33bdd52..2af625a435 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1515,6 +1515,100 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) } } +static int copy_wsfix(char *output, const char *patch, int plen, + unsigned ws_rule) +{ + /* + * plen is number of bytes to be copied from patch, starting + * at patch. Typically patch[plen-1] is '\n', unless this is + * the incomplete last line. + */ + int i; + int add_nl_to_tail = 0; + int fixed = 0; + int last_tab_in_indent = -1; + int last_space_in_indent = -1; + int need_fix_leading_space = 0; + char *buf; + + /* + * Strip trailing whitespace + */ + if ((ws_rule & WS_TRAILING_SPACE) && + (2 < plen && isspace(patch[plen-2]))) { + if (patch[plen-1] == '\n') + add_nl_to_tail = 1; + plen--; + while (0 < plen && isspace(patch[plen-1])) + plen--; + fixed = 1; + } + + /* + * Check leading whitespaces (indent) + */ + for (i = 0; i < plen; i++) { + char ch = patch[i]; + if (ch == '\t') { + last_tab_in_indent = i; + if ((ws_rule & WS_SPACE_BEFORE_TAB) && + 0 <= last_space_in_indent) + need_fix_leading_space = 1; + } else if (ch == ' ') { + last_space_in_indent = i; + if ((ws_rule & WS_INDENT_WITH_NON_TAB) && + 8 <= i - last_tab_in_indent) + need_fix_leading_space = 1; + } else + break; + } + + buf = output; + if (need_fix_leading_space) { + /* Process indent ourselves */ + int consecutive_spaces = 0; + int last = last_tab_in_indent + 1; + + if (ws_rule & WS_INDENT_WITH_NON_TAB) { + /* have "last" point at one past the indent */ + if (last_tab_in_indent < last_space_in_indent) + last = last_space_in_indent + 1; + else + last = last_tab_in_indent + 1; + } + + /* + * between patch[0..last-1], strip the funny spaces, + * updating them to tab as needed. + */ + for (i = 0; i < last; i++) { + char ch = patch[i]; + if (ch != ' ') { + consecutive_spaces = 0; + *output++ = ch; + } else { + consecutive_spaces++; + if (consecutive_spaces == 8) { + *output++ = '\t'; + consecutive_spaces = 0; + } + } + } + while (0 < consecutive_spaces--) + *output++ = ' '; + plen -= last; + patch += last; + fixed = 1; + } + + memcpy(output, patch, plen); + if (add_nl_to_tail) + output[plen++] = '\n'; + if (fixed) + applied_after_fixing_ws++; + return output + plen - buf; +} + static int match_fragment(struct image *img, struct image *preimage, struct image *postimage, @@ -1642,100 +1736,6 @@ static void remove_last_line(struct image *img) img->len -= img->line[--img->nr].len; } -static int copy_wsfix(char *output, const char *patch, int plen, - unsigned ws_rule) -{ - /* - * plen is number of bytes to be copied from patch, starting - * at patch. Typically patch[plen-1] is '\n', unless this is - * the incomplete last line. - */ - int i; - int add_nl_to_tail = 0; - int fixed = 0; - int last_tab_in_indent = -1; - int last_space_in_indent = -1; - int need_fix_leading_space = 0; - char *buf; - - /* - * Strip trailing whitespace - */ - if ((ws_rule & WS_TRAILING_SPACE) && - (2 < plen && isspace(patch[plen-2]))) { - if (patch[plen-1] == '\n') - add_nl_to_tail = 1; - plen--; - while (0 < plen && isspace(patch[plen-1])) - plen--; - fixed = 1; - } - - /* - * Check leading whitespaces (indent) - */ - for (i = 0; i < plen; i++) { - char ch = patch[i]; - if (ch == '\t') { - last_tab_in_indent = i; - if ((ws_rule & WS_SPACE_BEFORE_TAB) && - 0 <= last_space_in_indent) - need_fix_leading_space = 1; - } else if (ch == ' ') { - last_space_in_indent = i; - if ((ws_rule & WS_INDENT_WITH_NON_TAB) && - 8 <= i - last_tab_in_indent) - need_fix_leading_space = 1; - } else - break; - } - - buf = output; - if (need_fix_leading_space) { - /* Process indent ourselves */ - int consecutive_spaces = 0; - int last = last_tab_in_indent + 1; - - if (ws_rule & WS_INDENT_WITH_NON_TAB) { - /* have "last" point at one past the indent */ - if (last_tab_in_indent < last_space_in_indent) - last = last_space_in_indent + 1; - else - last = last_tab_in_indent + 1; - } - - /* - * between patch[0..last-1], strip the funny spaces, - * updating them to tab as needed. - */ - for (i = 0; i < last; i++) { - char ch = patch[i]; - if (ch != ' ') { - consecutive_spaces = 0; - *output++ = ch; - } else { - consecutive_spaces++; - if (consecutive_spaces == 8) { - *output++ = '\t'; - consecutive_spaces = 0; - } - } - } - while (0 < consecutive_spaces--) - *output++ = ' '; - plen -= last; - patch += last; - fixed = 1; - } - - memcpy(output, patch, plen); - if (add_nl_to_tail) - output[plen++] = '\n'; - if (fixed) - applied_after_fixing_ws++; - return output + plen - buf; -} - static void update_image(struct image *img, int applied_pos, struct image *preimage, -- cgit v1.2.3 From c607aaa2f05b4dc38a6573f44b6f71db05dd8b39 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 30 Jan 2008 15:13:37 -0800 Subject: builtin-apply.c: pass ws_rule down to match_fragment() This is necessary to allow match_fragment() to attempt a match with a preimage that is based on a version before whitespace errors were fixed. Signed-off-by: Junio C Hamano --- builtin-apply.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 2af625a435..5f3c047f22 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1614,6 +1614,7 @@ static int match_fragment(struct image *img, struct image *postimage, unsigned long try, int try_lno, + unsigned ws_rule, int match_beginning, int match_end) { int i; @@ -1656,6 +1657,7 @@ static int find_pos(struct image *img, struct image *preimage, struct image *postimage, int line, + unsigned ws_rule, int match_beginning, int match_end) { int i; @@ -1691,7 +1693,7 @@ static int find_pos(struct image *img, for (i = 0; ; i++) { if (match_fragment(img, preimage, postimage, - try, try_lno, + try, try_lno, ws_rule, match_beginning, match_end)) return try_lno; @@ -1930,8 +1932,8 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, for (;;) { - applied_pos = find_pos(img, &preimage, &postimage, - pos, match_beginning, match_end); + applied_pos = find_pos(img, &preimage, &postimage, pos, + ws_rule, match_beginning, match_end); if (applied_pos >= 0) break; -- cgit v1.2.3 From c1beba5b479a39143ebef96ba10103bbd9a70089 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 30 Jan 2008 15:24:34 -0800 Subject: git-apply --whitespace=fix: fix whitespace fuzz introduced by previous run When you have more than one patch series, an earlier one of which tries to introduce whitespace breakages and a later one of which has such a new line in its context, "git-apply --whitespace=fix" will apply and fix the whitespace breakages in the earlier one, making the resulting file not to match the context of the later patch. A short demonstration is in the new test, t4125. For example, suppose the first patch is: diff a/hello.txt b/hello.txt --- a/hello.txt +++ b/hello.txt @@ -20,3 +20,3 @@ Hello world.$ -How Are you$ -Today?$ +How are you $ +today? $ to fix broken case in the string, but it introduces unwanted trailing whitespaces to the result (pretend you are looking at "cat -e" output of the patch --- '$' signs are not in the patch but are shown to make the EOL stand out). And the second patch is to change the wording of the greeting further: diff a/hello.txt b/hello.txt --- a/hello.txt +++ b/hello.txt @@ -18,5 +18,5 @@ Greetings $ -Hello world.$ +Hello, everybody. $ How are you $ -today? $ +these days? $ If you apply the first one with --whitespace=fix, you will get this as the result: Hello world.$ How are you$ today?$ and this does not match the preimage of the second patch, which demands extra whitespace after "How are you" and "today?". This series is about teaching "git apply --whitespace=fix" to cope with this situation better. If the patch does not apply, it rewrites the second patch like this and retries: diff a/hello.txt b/hello.txt --- a/hello.txt +++ b/hello.txt @@ -18,5 +18,5 @@ Greetings$ -Hello world.$ +Hello, everybody.$ How are you$ -today?$ +these days?$ This is done by rewriting the preimage lines in the hunk (i.e. the lines that begin with ' ' or '-'), using the same whitespace fixing rules as it is using to apply the patches, so that it can notice what it did to the previous ones in the series. A careful reader may notice that the first patch in the example did not touch the "Greetings" line, so the trailing whitespace that is in the original preimage of the second patch is not from the series. Is rewriting this context line a problem? If you think about it, you will realize that the reason for the difference is because the submitter's tree was based on an earlier version of the file that had whitespaces wrong on that "Greetings" line, and the change that introduced the "Greetings" line was added independently of this two-patch series to our tree already with an earlier "git apply --whitespace=fix". So it may appear this logic is rewriting too much, it is not so. It is just rewriting what we would have rewritten in the past. Signed-off-by: Junio C Hamano --- builtin-apply.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 128 insertions(+), 5 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 5f3c047f22..fccf4a40cc 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1516,7 +1516,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) } static int copy_wsfix(char *output, const char *patch, int plen, - unsigned ws_rule) + unsigned ws_rule, int count_error) { /* * plen is number of bytes to be copied from patch, starting @@ -1604,11 +1604,74 @@ static int copy_wsfix(char *output, const char *patch, int plen, memcpy(output, patch, plen); if (add_nl_to_tail) output[plen++] = '\n'; - if (fixed) + if (fixed && count_error) applied_after_fixing_ws++; return output + plen - buf; } +static void update_pre_post_images(struct image *preimage, + struct image *postimage, + char *buf, + size_t len) +{ + int i, ctx; + char *new, *old, *fixed; + struct image fixed_preimage; + + /* + * Update the preimage with whitespace fixes. Note that we + * are not losing preimage->buf -- apply_one_fragment() will + * free "oldlines". + */ + prepare_image(&fixed_preimage, buf, len, 1); + assert(fixed_preimage.nr == preimage->nr); + for (i = 0; i < preimage->nr; i++) + fixed_preimage.line[i].flag = preimage->line[i].flag; + free(preimage->line_allocated); + *preimage = fixed_preimage; + + /* + * Adjust the common context lines in postimage, in place. + * This is possible because whitespace fixing does not make + * the string grow. + */ + new = old = postimage->buf; + fixed = preimage->buf; + for (i = ctx = 0; i < postimage->nr; i++) { + size_t len = postimage->line[i].len; + if (!(postimage->line[i].flag & LINE_COMMON)) { + /* an added line -- no counterparts in preimage */ + memmove(new, old, len); + old += len; + new += len; + continue; + } + + /* a common context -- skip it in the original postimage */ + old += len; + + /* and find the corresponding one in the fixed preimage */ + while (ctx < preimage->nr && + !(preimage->line[ctx].flag & LINE_COMMON)) { + fixed += preimage->line[ctx].len; + ctx++; + } + if (preimage->nr <= ctx) + die("oops"); + + /* and copy it in, while fixing the line length */ + len = preimage->line[ctx].len; + memcpy(new, fixed, len); + new += len; + fixed += len; + postimage->line[i].len = len; + ctx++; + } + + /* Fix the length of the whole thing */ + postimage->len = new - postimage->buf; +} + static int match_fragment(struct image *img, struct image *preimage, struct image *postimage, @@ -1618,6 +1681,7 @@ static int match_fragment(struct image *img, int match_beginning, int match_end) { int i; + char *fixed_buf, *buf, *orig, *target; if (preimage->nr + try_lno > img->nr) return 0; @@ -1646,10 +1710,68 @@ static int match_fragment(struct image *img, !memcmp(img->buf + try, preimage->buf, preimage->len)) return 1; + if (ws_error_action != correct_ws_error) + return 0; + + /* + * The hunk does not apply byte-by-byte, but the hash says + * it might with whitespace fuzz. + */ + fixed_buf = xmalloc(preimage->len + 1); + buf = fixed_buf; + orig = preimage->buf; + target = img->buf + try; + for (i = 0; i < preimage->nr; i++) { + size_t fixlen; /* length after fixing the preimage */ + size_t oldlen = preimage->line[i].len; + size_t tgtlen = img->line[try_lno + i].len; + size_t tgtfixlen; /* length after fixing the target line */ + char tgtfixbuf[1024], *tgtfix; + int match; + + /* Try fixing the line in the preimage */ + fixlen = copy_wsfix(buf, orig, oldlen, ws_rule, 0); + + /* Try fixing the line in the target */ + if (sizeof(tgtfixbuf) < tgtlen) + tgtfix = tgtfixbuf; + else + tgtfix = xmalloc(tgtlen); + tgtfixlen = copy_wsfix(tgtfix, target, tgtlen, ws_rule, 0); + + /* + * If they match, either the preimage was based on + * a version before our tree fixed whitespace breakage, + * or we are lacking a whitespace-fix patch the tree + * the preimage was based on already had (i.e. target + * has whitespace breakage, the preimage doesn't). + * In either case, we are fixing the whitespace breakages + * so we might as well take the fix together with their + * real change. + */ + match = (tgtfixlen == fixlen && !memcmp(tgtfix, buf, fixlen)); + + if (tgtfix != tgtfixbuf) + free(tgtfix); + if (!match) + goto unmatch_exit; + + orig += oldlen; + buf += fixlen; + target += tgtlen; + } + /* - * NEEDSWORK: We can optionally match fuzzily here, but - * that is for a later round. + * Yes, the preimage is based on an older version that still + * has whitespace breakages unfixed, and fixing them makes the + * hunk match. Update the context lines in the postimage. */ + update_pre_post_images(preimage, postimage, + fixed_buf, buf - fixed_buf); + return 1; + + unmatch_exit: + free(fixed_buf); return 0; } @@ -1871,7 +1993,8 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, added = plen; } else { - added = copy_wsfix(new, patch + 1, plen, ws_rule); + added = copy_wsfix(new, patch + 1, plen, + ws_rule, 1); } add_line_info(&postimage, new, added, (first == '+' ? 0 : LINE_COMMON)); -- cgit v1.2.3 From b2979ff599a6bcf9dbf5e2ef1e32b81a1b88e115 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 15 Jan 2008 00:59:05 -0800 Subject: core.whitespace: cr-at-eol This new error mode allows a line to have a carriage return at the end of the line when checking and fixing trailing whitespace errors. Some people like to keep CRLF line ending recorded in the repository, and still want to take advantage of the automated trailing whitespace stripping. We still show ^M in the diff output piped to "less" to remind them that they do have the CR at the end, but these carriage return characters at the end are no longer flagged as errors. Signed-off-by: Junio C Hamano --- builtin-apply.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index fccf4a40cc..2b8ba81d81 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1525,6 +1525,7 @@ static int copy_wsfix(char *output, const char *patch, int plen, */ int i; int add_nl_to_tail = 0; + int add_cr_to_tail = 0; int fixed = 0; int last_tab_in_indent = -1; int last_space_in_indent = -1; @@ -1536,12 +1537,19 @@ static int copy_wsfix(char *output, const char *patch, int plen, */ if ((ws_rule & WS_TRAILING_SPACE) && (2 < plen && isspace(patch[plen-2]))) { - if (patch[plen-1] == '\n') + if (patch[plen - 1] == '\n') { add_nl_to_tail = 1; - plen--; - while (0 < plen && isspace(patch[plen-1])) plen--; - fixed = 1; + if (1 < plen && patch[plen - 1] == '\r') { + add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL); + plen--; + } + } + if (0 < plen && isspace(patch[plen - 1])) { + while (0 < plen && isspace(patch[plen-1])) + plen--; + fixed = 1; + } } /* @@ -1602,6 +1610,8 @@ static int copy_wsfix(char *output, const char *patch, int plen, } memcpy(output, patch, plen); + if (add_cr_to_tail) + output[plen++] = '\r'; if (add_nl_to_tail) output[plen++] = '\n'; if (fixed && count_error) -- cgit v1.2.3 From 52f3c81a9d41af019ab8f05051c5251f078b12e5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 11 Feb 2008 15:32:29 -0800 Subject: apply: do not barf on patch with too large an offset Previously a patch that records too large a line number caused the offset matching code in git-apply to overstep its internal buffer. Noticed by Johannes Schindelin. Signed-off-by: Junio C Hamano --- builtin-apply.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 2b8ba81d81..5ed4e918c0 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1809,6 +1809,9 @@ static int find_pos(struct image *img, else if (match_end) line = img->nr - preimage->nr; + if (line > img->nr) + line = img->nr; + try = 0; for (i = 0; i < line; i++) try += img->line[i].len; -- cgit v1.2.3 From fe3403c3205de44faa926a086b4b99f157fc63fe Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 23 Feb 2008 16:59:16 -0800 Subject: ws_fix_copy(): move the whitespace fixing function to ws.c This is used by git-apply but we can use it elsewhere by slightly generalizing it. Signed-off-by: Junio C Hamano --- builtin-apply.c | 111 ++------------------------------------------------------ 1 file changed, 3 insertions(+), 108 deletions(-) (limited to 'builtin-apply.c') diff --git a/builtin-apply.c b/builtin-apply.c index 5ed4e918c0..64471a27e7 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1515,110 +1515,6 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) } } -static int copy_wsfix(char *output, const char *patch, int plen, - unsigned ws_rule, int count_error) -{ - /* - * plen is number of bytes to be copied from patch, starting - * at patch. Typically patch[plen-1] is '\n', unless this is - * the incomplete last line. - */ - int i; - int add_nl_to_tail = 0; - int add_cr_to_tail = 0; - int fixed = 0; - int last_tab_in_indent = -1; - int last_space_in_indent = -1; - int need_fix_leading_space = 0; - char *buf; - - /* - * Strip trailing whitespace - */ - if ((ws_rule & WS_TRAILING_SPACE) && - (2 < plen && isspace(patch[plen-2]))) { - if (patch[plen - 1] == '\n') { - add_nl_to_tail = 1; - plen--; - if (1 < plen && patch[plen - 1] == '\r') { - add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL); - plen--; - } - } - if (0 < plen && isspace(patch[plen - 1])) { - while (0 < plen && isspace(patch[plen-1])) - plen--; - fixed = 1; - } - } - - /* - * Check leading whitespaces (indent) - */ - for (i = 0; i < plen; i++) { - char ch = patch[i]; - if (ch == '\t') { - last_tab_in_indent = i; - if ((ws_rule & WS_SPACE_BEFORE_TAB) && - 0 <= last_space_in_indent) - need_fix_leading_space = 1; - } else if (ch == ' ') { - last_space_in_indent = i; - if ((ws_rule & WS_INDENT_WITH_NON_TAB) && - 8 <= i - last_tab_in_indent) - need_fix_leading_space = 1; - } else - break; - } - - buf = output; - if (need_fix_leading_space) { - /* Process indent ourselves */ - int consecutive_spaces = 0; - int last = last_tab_in_indent + 1; - - if (ws_rule & WS_INDENT_WITH_NON_TAB) { - /* have "last" point at one past the indent */ - if (last_tab_in_indent < last_space_in_indent) - last = last_space_in_indent + 1; - else - last = last_tab_in_indent + 1; - } - - /* - * between patch[0..last-1], strip the funny spaces, - * updating them to tab as needed. - */ - for (i = 0; i < last; i++) { - char ch = patch[i]; - if (ch != ' ') { - consecutive_spaces = 0; - *output++ = ch; - } else { - consecutive_spaces++; - if (consecutive_spaces == 8) { - *output++ = '\t'; - consecutive_spaces = 0; - } - } - } - while (0 < consecutive_spaces--) - *output++ = ' '; - plen -= last; - patch += last; - fixed = 1; - } - - memcpy(output, patch, plen); - if (add_cr_to_tail) - output[plen++] = '\r'; - if (add_nl_to_tail) - output[plen++] = '\n'; - if (fixed && count_error) - applied_after_fixing_ws++; - return output + plen - buf; -} - static void update_pre_post_images(struct image *preimage, struct image *postimage, char *buf, @@ -1740,14 +1636,14 @@ static int match_fragment(struct image *img, int match; /* Try fixing the line in the preimage */ - fixlen = copy_wsfix(buf, orig, oldlen, ws_rule, 0); + fixlen = ws_fix_copy(buf, orig, oldlen, ws_rule, NULL); /* Try fixing the line in the target */ if (sizeof(tgtfixbuf) < tgtlen) tgtfix = tgtfixbuf; else tgtfix = xmalloc(tgtlen); - tgtfixlen = copy_wsfix(tgtfix, target, tgtlen, ws_rule, 0); + tgtfixlen = ws_fix_copy(tgtfix, target, tgtlen, ws_rule, NULL); /* * If they match, either the preimage was based on @@ -2006,8 +1902,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, added = plen; } else { - added = copy_wsfix(new, patch + 1, plen, - ws_rule, 1); + added = ws_fix_copy(new, patch + 1, plen, ws_rule, &applied_after_fixing_ws); } add_line_info(&postimage, new, added, (first == '+' ? 0 : LINE_COMMON)); -- cgit v1.2.3