From 947208b725188eb499625ebc5c6e43d54c97e4fc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:11 -0400 Subject: setup_traverse_info(): stop copying oid We assume that if setup_traverse_info() is passed a non-empty "base" string, that string is pointing into a tree object and we can read the object oid by skipping past the trailing NUL. As it turns out, this is not true for either of the two calls, and we may end up reading garbage bytes: 1. In git-merge-tree, our base string is either empty (in which case we'd never run this code), or it comes from our traverse_path() helper. The latter overallocates a buffer by the_hash_algo->rawsz bytes, but then fills it with only make_traverse_path(), leaving those extra bytes uninitialized (but part of a legitimate heap buffer). 2. In unpack_trees(), we pass o->prefix, which is some arbitrary string from the caller. In "git read-tree --prefix=foo", for instance, it will point to the command-line parameter, and we'll read 20 bytes past the end of the string. Interestingly, tools like ASan do not detect (2) because the process argv is part of a big pre-allocated buffer. So we're reading trash, but it's trash that's probably part of the next argument, or the environment. You can convince it to fail by putting something like this at the beginning of common-main.c's main() function: { int i; for (i = 0; i < argc; i++) argv[i] = xstrdup_or_null(argv[i]); } That puts the arguments into their own heap buffers, so running: make SANITIZE=address test will find problems when "read-tree --prefix" is used (e.g., in t3030). Doubly interesting, even with the hackery above, this does not fail prior to ea82b2a085 (tree-walk: store object_id in a separate member, 2019-01-15). That commit switched setup_traverse_info() to actually copying the hash, rather than simply pointing to it. That pointer was always pointing to garbage memory, but that commit started actually dereferencing the bytes, which is what triggers ASan. That also implies that nobody actually cares about reading these oid bytes anyway (or at least no path covered by our tests). And manual inspection of the code backs that up (I'll follow this patch with some cleanups that show definitively this is the case, but they're quite invasive, so it's worth doing this fix on its own). So let's drop the bogus hashcpy(), along with the confusing oversizing in merge-tree. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index ec32a47b2e..ba106152ef 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -177,10 +177,8 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->pathlen = pathlen ? pathlen + 1 : 0; info->name.path = base; info->name.pathlen = pathlen; - if (pathlen) { - hashcpy(info->name.oid.hash, (const unsigned char *)base + pathlen + 1); + if (pathlen) info->prev = &dummy; - } } char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n) -- cgit v1.2.3 From 9055384710dd8963b125f4f87c24d8f67d9fa24f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:15 -0400 Subject: tree-walk: drop oid from traverse_info As the previous commit shows, the presence of an oid in each level of the traverse_info is confusing and ultimately not necessary. Let's drop it to make it clear that it will not always be set (as well as convince us that it's unused, and let the compiler catch any merges with other branches that do add new uses). Since the oid is part of name_entry, we'll actually stop embedding a name_entry entirely, and instead just separately hold the pathname, its length, and the mode. This makes the resulting code slightly more verbose as we have to pass those elements around individually. But it also makes it more clear what each code path is going to use (and in most of the paths, we really only care about the pathname itself). A few of these conversions are noisier than they need to be, as they also take the opportunity to rename "len" to "namelen" for clarity (especially where we also have "pathlen" or "ce_len" alongside). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index ba106152ef..4610f77383 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -175,27 +175,27 @@ void setup_traverse_info(struct traverse_info *info, const char *base) if (pathlen && base[pathlen-1] == '/') pathlen--; info->pathlen = pathlen ? pathlen + 1 : 0; - info->name.path = base; - info->name.pathlen = pathlen; + info->name = base; + info->namelen = pathlen; if (pathlen) info->prev = &dummy; } -char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n) +char *make_traverse_path(char *path, const struct traverse_info *info, + const char *name, size_t namelen) { - int len = tree_entry_len(n); int pathlen = info->pathlen; - path[pathlen + len] = 0; + path[pathlen + namelen] = 0; for (;;) { - memcpy(path + pathlen, n->path, len); + memcpy(path + pathlen, name, namelen); if (!pathlen) break; path[--pathlen] = '/'; - n = &info->name; - len = tree_entry_len(n); + name = info->name; + namelen = info->namelen; info = info->prev; - pathlen -= len; + pathlen -= namelen; } return path; } @@ -397,12 +397,13 @@ int traverse_trees(struct index_state *istate, if (info->prev) { strbuf_grow(&base, info->pathlen); - make_traverse_path(base.buf, info->prev, &info->name); + make_traverse_path(base.buf, info->prev, info->name, + info->namelen); base.buf[info->pathlen-1] = '/'; strbuf_setlen(&base, info->pathlen); traverse_path = xstrndup(base.buf, info->pathlen); } else { - traverse_path = xstrndup(info->name.path, info->pathlen); + traverse_path = xstrndup(info->name, info->pathlen); } info->traverse_path = traverse_path; for (;;) { -- cgit v1.2.3 From 37806080d7be1ab5b2fa918f6a528652596ea2c1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:18 -0400 Subject: tree-walk: use size_t consistently We store and manipulate the cumulative traverse_info.pathlen as an "int", which can overflow when we are fed ridiculously long pathnames (e.g., ones at the edge of 2GB or 4GB, even if the individual tree entry names are smaller than that). The results can be confusing, though after some prodding I was not able to use this integer overflow to cause an under-allocated buffer. Let's consistently use size_t to generate and store these, and make sure our addition doesn't overflow. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index 4610f77383..70f9eb5f1b 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -168,7 +168,7 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry) void setup_traverse_info(struct traverse_info *info, const char *base) { - int pathlen = strlen(base); + size_t pathlen = strlen(base); static struct traverse_info dummy; memset(info, 0, sizeof(*info)); @@ -184,7 +184,7 @@ void setup_traverse_info(struct traverse_info *info, const char *base) char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen) { - int pathlen = info->pathlen; + size_t pathlen = info->pathlen; path[pathlen + namelen] = 0; for (;;) { -- cgit v1.2.3 From c43ab062598d0299ea6e0d115a6018189a7793bf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:23 -0400 Subject: tree-walk: add a strbuf wrapper for make_traverse_path() All but one of the callers of make_traverse_path() allocate a new heap buffer to store the path. Let's give them an easy way to write to a strbuf, which saves them from computing the length themselves (which is especially tricky when they want to add to the path). It will also make it easier for us to change the make_traverse_path() interface in a future patch to improve its bounds-checking. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index 70f9eb5f1b..c2952f3793 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -200,6 +200,17 @@ char *make_traverse_path(char *path, const struct traverse_info *info, return path; } +void strbuf_make_traverse_path(struct strbuf *out, + const struct traverse_info *info, + const char *name, size_t namelen) +{ + size_t len = traverse_path_len(info, namelen); + + strbuf_grow(out, len); + make_traverse_path(out->buf + out->len, info, name, namelen); + strbuf_setlen(out, out->len + len); +} + struct tree_desc_skip { struct tree_desc_skip *prev; const void *ptr; @@ -396,12 +407,10 @@ int traverse_trees(struct index_state *istate, tx[i].d = t[i]; if (info->prev) { - strbuf_grow(&base, info->pathlen); - make_traverse_path(base.buf, info->prev, info->name, - info->namelen); - base.buf[info->pathlen-1] = '/'; - strbuf_setlen(&base, info->pathlen); - traverse_path = xstrndup(base.buf, info->pathlen); + strbuf_make_traverse_path(&base, info->prev, + info->name, info->namelen); + strbuf_addch(&base, '/'); + traverse_path = xstrndup(base.buf, base.len); } else { traverse_path = xstrndup(info->name, info->pathlen); } -- cgit v1.2.3 From 5aa02f98685d78666293149087d3f69b97528cfb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:25 -0400 Subject: tree-walk: harden make_traverse_path() length computations The make_traverse_path() function isn't very careful about checking its output buffer boundaries. In fact, it doesn't even _know_ the size of the buffer it's writing to, and just assumes that the caller used traverse_path_len() correctly. And even then we assume that our traverse_info.pathlen components are all correct, and just blindly write into the buffer. Let's improve this situation a bit: - have the caller pass in their allocated buffer length, which we'll check against our own computations - check for integer underflow as we do our backwards-insertion of pathnames into the buffer - check that we do not run out items in our list to traverse before we've filled the expected number of bytes None of these should be triggerable in practice (especially since our switch to size_t everywhere in a previous commit), but it doesn't hurt to check our assumptions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index c2952f3793..4f1e9d79ab 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -181,21 +181,32 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->prev = &dummy; } -char *make_traverse_path(char *path, const struct traverse_info *info, +char *make_traverse_path(char *path, size_t pathlen, + const struct traverse_info *info, const char *name, size_t namelen) { - size_t pathlen = info->pathlen; + /* Always points to the end of the name we're about to add */ + size_t pos = st_add(info->pathlen, namelen); - path[pathlen + namelen] = 0; + if (pos >= pathlen) + BUG("too small buffer passed to make_traverse_path"); + + path[pos] = 0; for (;;) { - memcpy(path + pathlen, name, namelen); - if (!pathlen) + if (pos < namelen) + BUG("traverse_info pathlen does not match strings"); + pos -= namelen; + memcpy(path + pos, name, namelen); + + if (!pos) break; - path[--pathlen] = '/'; + path[--pos] = '/'; + + if (!info) + BUG("traverse_info ran out of list items"); name = info->name; namelen = info->namelen; info = info->prev; - pathlen -= namelen; } return path; } @@ -207,7 +218,8 @@ void strbuf_make_traverse_path(struct strbuf *out, size_t len = traverse_path_len(info, namelen); strbuf_grow(out, len); - make_traverse_path(out->buf + out->len, info, name, namelen); + make_traverse_path(out->buf + out->len, out->alloc - out->len, + info, name, namelen); strbuf_setlen(out, out->len + len); } -- cgit v1.2.3