From 47ac23d31483fa8f942117fa5e0c0a70572e9e59 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Aug 2021 18:47:42 -0400 Subject: range-diff: drop useless "offset" variable from read_patches() The "offset" variable was was introduced in 44b67cb62b (range-diff: split lines manually, 2019-07-11), but it has never done anything useful. We use it to count up the number of bytes we've consumed, but we never look at the result. It was probably copied accidentally from an almost-identical loop in apply.c:find_header() (and the point of that commit was to make use of the parse_git_diff_header() function which underlies both). Because the variable was set but not used, most compilers didn't seem to notice, but the upcoming clang-14 does complain about it, via its -Wunused-but-set-variable warning. Signed-off-by: Jeff King Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- range-diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'range-diff.c') diff --git a/range-diff.c b/range-diff.c index 1a4471fe4c..85ce535336 100644 --- a/range-diff.c +++ b/range-diff.c @@ -49,7 +49,7 @@ static int read_patches(const char *range, struct string_list *list, struct patch_util *util = NULL; int in_header = 1; char *line, *current_filename = NULL; - int offset, len; + int len; size_t size; strvec_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", @@ -86,7 +86,7 @@ static int read_patches(const char *range, struct string_list *list, line = contents.buf; size = contents.len; - for (offset = 0; size > 0; offset += len, size -= len, line += len) { + for (; size > 0; size -= len, line += len) { const char *p; len = find_end_of_line(line, size); -- cgit v1.2.3 From 7c86d365da1fe2e02f568365f0ae45deb2bd412e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Aug 2021 18:48:39 -0400 Subject: range-diff: handle unterminated lines in read_patches() When parsing our buffer of output from git-log, we have a find_end_of_line() helper that finds the next newline, and gives us the number of bytes to move past it, or the size of the whole remaining buffer if there is no newline. But trying to handle both those cases leads to some oddities: - we try to overwrite the newline with NUL in the caller, by writing over line[len-1]. This is at best redundant, since the helper will already have done so if it saw a newline. But if it didn't see a newline, it's actively wrong; we'll overwrite the byte at the end of the (unterminated) line. We could solve this just dropping the extra NUL assignment in the caller and just letting the helper do the right thing. But... - if we see a "diff --git" line, we'll restore the newline on top of the NUL byte, so we can pass the string to parse_git_diff_header(). But if there was no newline in the first place, we can't do this. There's no place to put it (the current code writes a newline over whatever byte we obliterated earlier). The best we can do is feed the complete remainder of the buffer to the function (which is, in fact, a string, by virtue of being a strbuf). To solve this, the caller needs to know whether we actually found a newline or not. We could modify find_end_of_line() to return that information, but we can further observe that it has only one caller. So let's just inline it in that caller. Nobody seems to have noticed this case, probably because git-log would never produce input that doesn't end with a newline. Arguably we could just return an error as soon as we see that the output does not end in a newline. But the code to do so actually ends up _longer_, mostly because of the cleanup we have to do in handling the error. Signed-off-by: Jeff King Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- range-diff.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'range-diff.c') diff --git a/range-diff.c b/range-diff.c index 85ce535336..87e82ee859 100644 --- a/range-diff.c +++ b/range-diff.c @@ -26,17 +26,6 @@ struct patch_util { struct object_id oid; }; -static size_t find_end_of_line(char *buffer, unsigned long size) -{ - char *eol = memchr(buffer, '\n', size); - - if (!eol) - return size; - - *eol = '\0'; - return eol + 1 - buffer; -} - /* * Reads the patches into a string list, with the `util` field being populated * as struct object_id (will need to be free()d). @@ -88,9 +77,16 @@ static int read_patches(const char *range, struct string_list *list, size = contents.len; for (; size > 0; size -= len, line += len) { const char *p; + char *eol; + + eol = memchr(line, '\n', size); + if (eol) { + *eol = '\0'; + len = eol + 1 - line; + } else { + len = size; + } - len = find_end_of_line(line, size); - line[len - 1] = '\0'; if (skip_prefix(line, "commit ", &p)) { if (util) { string_list_append(list, buf.buf)->util = util; @@ -132,7 +128,8 @@ static int read_patches(const char *range, struct string_list *list, strbuf_addch(&buf, '\n'); if (!util->diff_offset) util->diff_offset = buf.len; - line[len - 1] = '\n'; + if (eol) + *eol = '\n'; orig_len = len; len = parse_git_diff_header(&root, &linenr, 0, line, len, size, &patch); -- cgit v1.2.3 From c4d5907324394228e08a42589a044fa14d7ffdcc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Aug 2021 18:48:48 -0400 Subject: range-diff: use ssize_t for parsed "len" in read_patches() As we iterate through the buffer containing git-log output, parsing lines, we use an "int" to store the size of an individual line. This should be a size_t, as we have no guarantee that there is not a malicious 2GB+ commit-message line in the output. Overflowing this integer probably doesn't do anything _too_ terrible. We are not using the value to size a buffer, so the worst case is probably an out-of-bounds read from before the array. But it's easy enough to fix. Note that we have to use ssize_t here, since we also store the length result from parse_git_diff_header(), which may return a negative value for error. That function actually returns an int itself, which has a similar overflow problem, but I'll leave that for another day. Much of the apply.c code uses ints and should be converted as a whole; in the meantime, a negative return from parse_git_diff_header() will be interpreted as an error, and we'll bail (so we can't handle such a case, but given that it's likely to be malicious anyway, the important thing is we don't have any memory errors). Signed-off-by: Jeff King Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- range-diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'range-diff.c') diff --git a/range-diff.c b/range-diff.c index 87e82ee859..012b4ea6d2 100644 --- a/range-diff.c +++ b/range-diff.c @@ -38,7 +38,7 @@ static int read_patches(const char *range, struct string_list *list, struct patch_util *util = NULL; int in_header = 1; char *line, *current_filename = NULL; - int len; + ssize_t len; size_t size; strvec_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", -- cgit v1.2.3