From eb22e7dfa23da6bd9aed9bd1dad69e1e8e167d24 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:15 +0100 Subject: attr: fix overflow when upserting attribute with overly long name The function `git_attr_internal()` is called to upsert attributes into the global map. And while all callers pass a `size_t`, the function itself accepts an `int` as the attribute name's length. This can lead to an integer overflow in case the attribute name is longer than `INT_MAX`. Now this overflow seems harmless as the first thing we do is to call `attr_name_valid()`, and that function only succeeds in case all chars in the range of `namelen` match a certain small set of chars. We thus can't do an out-of-bounds read as NUL is not part of that set and all strings passed to this function are NUL-terminated. And furthermore, we wouldn't ever read past the current attribute name anyway due to the same reason. And if validation fails we will return early. On the other hand it feels fragile to rely on this behaviour, even more so given that we pass `namelen` to `FLEX_ALLOC_MEM()`. So let's instead just do the correct thing here and accept a `size_t` as line length. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 4ef85d668b..39ce0eb95e 100644 --- a/attr.c +++ b/attr.c @@ -210,7 +210,7 @@ static void report_invalid_attr(const char *name, size_t len, * dictionary. If no entry is found, create a new attribute and store it in * the dictionary. */ -static const struct git_attr *git_attr_internal(const char *name, int namelen) +static const struct git_attr *git_attr_internal(const char *name, size_t namelen) { struct git_attr *a; -- cgit v1.2.3 From 8d0d48cf2157cfb914db1f53b3fe40785b86f3aa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:19 +0100 Subject: attr: fix out-of-bounds read with huge attribute names There is an out-of-bounds read possible when parsing gitattributes that have an attribute that is 2^31+1 bytes long. This is caused due to an integer overflow when we assign the result of strlen(3P) to an `int`, where we use the wrapped-around value in a subsequent call to memcpy(3P). The following code reproduces the issue: blob=$(perl -e 'print "a" x 2147483649 . " attr"' | git hash-object -w --stdin) git update-index --add --cacheinfo 100644,$blob,.gitattributes git check-attr --all file AddressSanitizer:DEADLYSIGNAL ================================================================= ==8451==ERROR: AddressSanitizer: SEGV on unknown address 0x7f93efa00800 (pc 0x7f94f1f8f082 bp 0x7ffddb59b3a0 sp 0x7ffddb59ab28 T0) ==8451==The signal is caused by a READ memory access. #0 0x7f94f1f8f082 (/usr/lib/libc.so.6+0x176082) #1 0x7f94f2047d9c in __interceptor_strspn /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:752 #2 0x560e190f7f26 in parse_attr_line attr.c:375 #3 0x560e190f9663 in handle_attr_line attr.c:660 #4 0x560e190f9ddd in read_attr_from_index attr.c:769 #5 0x560e190f9f14 in read_attr attr.c:797 #6 0x560e190fa24e in bootstrap_attr_stack attr.c:867 #7 0x560e190fa4a5 in prepare_attr_stack attr.c:902 #8 0x560e190fb5dc in collect_some_attrs attr.c:1097 #9 0x560e190fb93f in git_all_attrs attr.c:1128 #10 0x560e18e6136e in check_attr builtin/check-attr.c:67 #11 0x560e18e61c12 in cmd_check_attr builtin/check-attr.c:183 #12 0x560e18e15993 in run_builtin git.c:466 #13 0x560e18e16397 in handle_builtin git.c:721 #14 0x560e18e16b2b in run_argv git.c:788 #15 0x560e18e17991 in cmd_main git.c:926 #16 0x560e190ae2bd in main common-main.c:57 #17 0x7f94f1e3c28f (/usr/lib/libc.so.6+0x2328f) #18 0x7f94f1e3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #19 0x560e18e110e4 in _start ../sysdeps/x86_64/start.S:115 AddressSanitizer can not provide additional info. SUMMARY: AddressSanitizer: SEGV (/usr/lib/libc.so.6+0x176082) ==8451==ABORTING Fix this bug by converting the variable to a `size_t` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 39ce0eb95e..9d42bc1721 100644 --- a/attr.c +++ b/attr.c @@ -333,7 +333,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp, static struct match_attr *parse_attr_line(const char *line, const char *src, int lineno, int macro_ok) { - int namelen; + size_t namelen; int num_attr, i; const char *cp, *name, *states; struct match_attr *res = NULL; -- cgit v1.2.3 From 24557209500e6ed618f04a8795a111a0c491a29c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:23 +0100 Subject: attr: fix integer overflow when parsing huge attribute names It is possible to trigger an integer overflow when parsing attribute names that are longer than 2^31 bytes because we assign the result of strlen(3P) to an `int` instead of to a `size_t`. This can lead to an abort in vsnprintf(3P) with the following reproducer: blob=$(perl -e 'print "A " . "B"x2147483648 . "\n"' | git hash-object -w --stdin) git update-index --add --cacheinfo 100644,$blob,.gitattributes git check-attr --all path BUG: strbuf.c:400: your vsnprintf is broken (returned -1) But furthermore, assuming that the attribute name is even longer than that, it can cause us to silently truncate the attribute and thus lead to wrong results. Fix this integer overflow by using a `size_t` instead. This fixes the silent truncation of attribute names, but it only partially fixes the BUG we hit: even though the initial BUG is fixed, we can still hit a BUG when parsing invalid attribute lines via `report_invalid_attr()`. This is due to an underlying design issue in vsnprintf(3P) which only knows to return an `int`, and thus it may always overflow with large inputs. This issue is benign though: the worst that can happen is that the error message is misreported to be either truncated or too long, but due to the buffer being NUL terminated we wouldn't ever do an out-of-bounds read here. Reported-by: Markus Vervier Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 9d42bc1721..4a10ba4d94 100644 --- a/attr.c +++ b/attr.c @@ -289,7 +289,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp, struct attr_state *e) { const char *ep, *equals; - int len; + size_t len; ep = cp + strcspn(cp, blank); equals = strchr(cp, '='); -- cgit v1.2.3 From 34ace8bad02bb14ecc5b631f7e3daaa7a9bba7d9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:27 +0100 Subject: attr: fix out-of-bounds write when parsing huge number of attributes It is possible to trigger an integer overflow when parsing attribute names when there are more than 2^31 of them for a single pattern. This can either lead to us dying due to trying to request too many bytes: blob=$(perl -e 'print "f" . " a=" x 2147483649' | git hash-object -w --stdin) git update-index --add --cacheinfo 100644,$blob,.gitattributes git attr-check --all file ================================================================= ==1022==ERROR: AddressSanitizer: requested allocation size 0xfffffff800000032 (0xfffffff800001038 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0) #0 0x7fd3efabf411 in __interceptor_calloc /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:77 #1 0x5563a0a1e3d3 in xcalloc wrapper.c:150 #2 0x5563a058d005 in parse_attr_line attr.c:384 #3 0x5563a058e661 in handle_attr_line attr.c:660 #4 0x5563a058eddb in read_attr_from_index attr.c:769 #5 0x5563a058ef12 in read_attr attr.c:797 #6 0x5563a058f24c in bootstrap_attr_stack attr.c:867 #7 0x5563a058f4a3 in prepare_attr_stack attr.c:902 #8 0x5563a05905da in collect_some_attrs attr.c:1097 #9 0x5563a059093d in git_all_attrs attr.c:1128 #10 0x5563a02f636e in check_attr builtin/check-attr.c:67 #11 0x5563a02f6c12 in cmd_check_attr builtin/check-attr.c:183 #12 0x5563a02aa993 in run_builtin git.c:466 #13 0x5563a02ab397 in handle_builtin git.c:721 #14 0x5563a02abb2b in run_argv git.c:788 #15 0x5563a02ac991 in cmd_main git.c:926 #16 0x5563a05432bd in main common-main.c:57 #17 0x7fd3ef82228f (/usr/lib/libc.so.6+0x2328f) ==1022==HINT: if you don't care about these errors you may set allocator_may_return_null=1 SUMMARY: AddressSanitizer: allocation-size-too-big /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:77 in __interceptor_calloc ==1022==ABORTING Or, much worse, it can lead to an out-of-bounds write because we underallocate and then memcpy(3P) into an array: perl -e ' print "A " . "\rh="x2000000000; print "\rh="x2000000000; print "\rh="x294967294 . "\n" ' >.gitattributes git add .gitattributes git commit -am "evil attributes" $ git clone --quiet /path/to/repo ================================================================= ==15062==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000002550 at pc 0x5555559884d5 bp 0x7fffffffbc60 sp 0x7fffffffbc58 WRITE of size 8 at 0x602000002550 thread T0 #0 0x5555559884d4 in parse_attr_line attr.c:393 #1 0x5555559884d4 in handle_attr_line attr.c:660 #2 0x555555988902 in read_attr_from_index attr.c:784 #3 0x555555988902 in read_attr_from_index attr.c:747 #4 0x555555988a1d in read_attr attr.c:800 #5 0x555555989b0c in bootstrap_attr_stack attr.c:882 #6 0x555555989b0c in prepare_attr_stack attr.c:917 #7 0x555555989b0c in collect_some_attrs attr.c:1112 #8 0x55555598b141 in git_check_attr attr.c:1126 #9 0x555555a13004 in convert_attrs convert.c:1311 #10 0x555555a95e04 in checkout_entry_ca entry.c:553 #11 0x555555d58bf6 in checkout_entry entry.h:42 #12 0x555555d58bf6 in check_updates unpack-trees.c:480 #13 0x555555d5eb55 in unpack_trees unpack-trees.c:2040 #14 0x555555785ab7 in checkout builtin/clone.c:724 #15 0x555555785ab7 in cmd_clone builtin/clone.c:1384 #16 0x55555572443c in run_builtin git.c:466 #17 0x55555572443c in handle_builtin git.c:721 #18 0x555555727872 in run_argv git.c:788 #19 0x555555727872 in cmd_main git.c:926 #20 0x555555721fa0 in main common-main.c:57 #21 0x7ffff73f1d09 in __libc_start_main ../csu/libc-start.c:308 #22 0x555555723f39 in _start (git+0x1cff39) 0x602000002552 is located 0 bytes to the right of 2-byte region [0x602000002550,0x602000002552) allocated by thread T0 here: #0 0x7ffff768c037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x555555d7fff7 in xcalloc wrapper.c:150 #2 0x55555598815f in parse_attr_line attr.c:384 #3 0x55555598815f in handle_attr_line attr.c:660 #4 0x555555988902 in read_attr_from_index attr.c:784 #5 0x555555988902 in read_attr_from_index attr.c:747 #6 0x555555988a1d in read_attr attr.c:800 #7 0x555555989b0c in bootstrap_attr_stack attr.c:882 #8 0x555555989b0c in prepare_attr_stack attr.c:917 #9 0x555555989b0c in collect_some_attrs attr.c:1112 #10 0x55555598b141 in git_check_attr attr.c:1126 #11 0x555555a13004 in convert_attrs convert.c:1311 #12 0x555555a95e04 in checkout_entry_ca entry.c:553 #13 0x555555d58bf6 in checkout_entry entry.h:42 #14 0x555555d58bf6 in check_updates unpack-trees.c:480 #15 0x555555d5eb55 in unpack_trees unpack-trees.c:2040 #16 0x555555785ab7 in checkout builtin/clone.c:724 #17 0x555555785ab7 in cmd_clone builtin/clone.c:1384 #18 0x55555572443c in run_builtin git.c:466 #19 0x55555572443c in handle_builtin git.c:721 #20 0x555555727872 in run_argv git.c:788 #21 0x555555727872 in cmd_main git.c:926 #22 0x555555721fa0 in main common-main.c:57 #23 0x7ffff73f1d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: heap-buffer-overflow attr.c:393 in parse_attr_line Shadow bytes around the buggy address: 0x0c047fff8450: fa fa 00 02 fa fa 00 07 fa fa fd fd fa fa 00 00 0x0c047fff8460: fa fa 02 fa fa fa fd fd fa fa 00 06 fa fa 05 fa 0x0c047fff8470: fa fa fd fd fa fa 00 02 fa fa 06 fa fa fa 05 fa 0x0c047fff8480: fa fa 07 fa fa fa fd fd fa fa 00 01 fa fa 00 02 0x0c047fff8490: fa fa 00 03 fa fa 00 fa fa fa 00 01 fa fa 00 03 =>0x0c047fff84a0: fa fa 00 01 fa fa 00 02 fa fa[02]fa fa fa fa fa 0x0c047fff84b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84d0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84e0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc ==15062==ABORTING Fix this bug by using `size_t` instead to count the number of attributes so that this value cannot reasonably overflow without running out of memory before already. Reported-by: Markus Vervier Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 4a10ba4d94..525f6da201 100644 --- a/attr.c +++ b/attr.c @@ -272,7 +272,7 @@ struct match_attr { const struct git_attr *attr; } u; char is_macro; - unsigned num_attr; + size_t num_attr; struct attr_state state[FLEX_ARRAY]; }; @@ -333,8 +333,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp, static struct match_attr *parse_attr_line(const char *line, const char *src, int lineno, int macro_ok) { - size_t namelen; - int num_attr, i; + size_t namelen, num_attr, i; const char *cp, *name, *states; struct match_attr *res = NULL; int is_macro; @@ -451,7 +450,8 @@ static void attr_stack_free(struct attr_stack *e) free(e->origin); for (i = 0; i < e->num_matches; i++) { struct match_attr *a = e->attrs[i]; - int j; + size_t j; + for (j = 0; j < a->num_attr; j++) { const char *setto = a->state[j].setto; if (setto == ATTR__TRUE || @@ -1001,12 +1001,12 @@ static int macroexpand_one(struct all_attrs_item *all_attrs, int nr, int rem); static int fill_one(const char *what, struct all_attrs_item *all_attrs, const struct match_attr *a, int rem) { - int i; + size_t i; - for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) { - const struct git_attr *attr = a->state[i].attr; + for (i = a->num_attr; rem > 0 && i > 0; i--) { + const struct git_attr *attr = a->state[i - 1].attr; const char **n = &(all_attrs[attr->attr_nr].value); - const char *v = a->state[i].setto; + const char *v = a->state[i - 1].setto; if (*n == ATTR__UNKNOWN) { debug_set(what, -- cgit v1.2.3 From 447ac906e189535e77dcb1f4bbe3f1bc917d4c12 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:31 +0100 Subject: attr: fix out-of-bounds read with unreasonable amount of patterns The `struct attr_stack` tracks the stack of all patterns together with their attributes. When parsing a gitattributes file that has more than 2^31 such patterns though we may trigger multiple out-of-bounds reads on 64 bit platforms. This is because while the `num_matches` variable is an unsigned integer, we always use a signed integer to iterate over them. I have not been able to reproduce this issue due to memory constraints on my systems. But despite the out-of-bounds reads, the worst thing that can seemingly happen is to call free(3P) with a garbage pointer when calling `attr_stack_free()`. Fix this bug by using unsigned integers to iterate over the array. While this makes the iteration somewhat awkward when iterating in reverse, it is at least better than knowingly running into an out-of-bounds read. While at it, convert the call to `ALLOC_GROW` to use `ALLOC_GROW_BY` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 525f6da201..98c231d675 100644 --- a/attr.c +++ b/attr.c @@ -446,7 +446,7 @@ struct attr_stack { static void attr_stack_free(struct attr_stack *e) { - int i; + unsigned i; free(e->origin); for (i = 0; i < e->num_matches; i++) { struct match_attr *a = e->attrs[i]; @@ -660,8 +660,8 @@ static void handle_attr_line(struct attr_stack *res, a = parse_attr_line(line, src, lineno, macro_ok); if (!a) return; - ALLOC_GROW(res->attrs, res->num_matches + 1, res->alloc); - res->attrs[res->num_matches++] = a; + ALLOC_GROW_BY(res->attrs, res->num_matches, 1, res->alloc); + res->attrs[res->num_matches - 1] = a; } static struct attr_stack *read_attr_from_array(const char **list) @@ -1025,11 +1025,11 @@ static int fill(const char *path, int pathlen, int basename_offset, struct all_attrs_item *all_attrs, int rem) { for (; rem > 0 && stack; stack = stack->prev) { - int i; + unsigned i; const char *base = stack->origin ? stack->origin : ""; - for (i = stack->num_matches - 1; 0 < rem && 0 <= i; i--) { - const struct match_attr *a = stack->attrs[i]; + for (i = stack->num_matches; 0 < rem && 0 < i; i--) { + const struct match_attr *a = stack->attrs[i - 1]; if (a->is_macro) continue; if (path_matches(path, pathlen, basename_offset, @@ -1060,9 +1060,9 @@ static void determine_macros(struct all_attrs_item *all_attrs, const struct attr_stack *stack) { for (; stack; stack = stack->prev) { - int i; - for (i = stack->num_matches - 1; i >= 0; i--) { - const struct match_attr *ma = stack->attrs[i]; + unsigned i; + for (i = stack->num_matches; i > 0; i--) { + const struct match_attr *ma = stack->attrs[i - 1]; if (ma->is_macro) { int n = ma->u.attr->attr_nr; if (!all_attrs[n].macro) { -- cgit v1.2.3 From e1e12e97ac73ded85f7d000da1063a774b3cc14f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:36 +0100 Subject: attr: fix integer overflow with more than INT_MAX macros Attributes have a field that tracks the position in the `all_attrs` array they're stored inside. This field gets set via `hashmap_get_size` when adding the attribute to the global map of attributes. But while the field is of type `int`, the value returned by `hashmap_get_size` is an `unsigned int`. It can thus happen that the value overflows, where we would now dereference teh `all_attrs` array at an out-of-bounds value. We do have a sanity check for this overflow via an assert that verifies the index matches the new hashmap's size. But asserts are not a proper mechanism to detect against any such overflows as they may not in fact be compiled into production code. Fix this by using an `unsigned int` to track the index and convert the assert to a call `die()`. Reported-by: Jeff King Signed-off-by: Junio C Hamano --- attr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 98c231d675..d1faf69083 100644 --- a/attr.c +++ b/attr.c @@ -28,7 +28,7 @@ static const char git_attr__unknown[] = "(builtin)unknown"; #endif struct git_attr { - int attr_nr; /* unique attribute number */ + unsigned int attr_nr; /* unique attribute number */ char name[FLEX_ARRAY]; /* attribute name */ }; @@ -226,8 +226,8 @@ static const struct git_attr *git_attr_internal(const char *name, size_t namelen a->attr_nr = hashmap_get_size(&g_attr_hashmap.map); attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a); - assert(a->attr_nr == - (hashmap_get_size(&g_attr_hashmap.map) - 1)); + if (a->attr_nr != hashmap_get_size(&g_attr_hashmap.map) - 1) + die(_("unable to add additional attribute")); } hashmap_unlock(&g_attr_hashmap); @@ -1064,7 +1064,7 @@ static void determine_macros(struct all_attrs_item *all_attrs, for (i = stack->num_matches; i > 0; i--) { const struct match_attr *ma = stack->attrs[i - 1]; if (ma->is_macro) { - int n = ma->u.attr->attr_nr; + unsigned int n = ma->u.attr->attr_nr; if (!all_attrs[n].macro) { all_attrs[n].macro = ma; } @@ -1116,7 +1116,7 @@ void git_check_attr(const struct index_state *istate, collect_some_attrs(istate, path, check); for (i = 0; i < check->nr; i++) { - size_t n = check->items[i].attr->attr_nr; + unsigned int n = check->items[i].attr->attr_nr; const char *value = check->all_attrs[n].value; if (value == ATTR__UNKNOWN) value = ATTR__UNSET; -- cgit v1.2.3 From a60a66e409c265b2944f18bf43581c146812586d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:40 +0100 Subject: attr: harden allocation against integer overflows When parsing an attributes line, we need to allocate an array that holds all attributes specified for the given file pattern. The calculation to determine the number of bytes that need to be allocated was prone to an overflow though when there was an unreasonable amount of attributes. Harden the allocation by instead using the `st_` helper functions that cause us to die when we hit an integer overflow. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index d1faf69083..a9f7063cfc 100644 --- a/attr.c +++ b/attr.c @@ -380,10 +380,9 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, goto fail_return; } - res = xcalloc(1, - sizeof(*res) + - sizeof(struct attr_state) * num_attr + - (is_macro ? 0 : namelen + 1)); + res = xcalloc(1, st_add3(sizeof(*res), + st_mult(sizeof(struct attr_state), num_attr), + is_macro ? 0 : namelen + 1)); if (is_macro) { res->u.attr = git_attr_internal(name, namelen); } else { -- cgit v1.2.3 From d74b1fd54fdbc45966d12ea907dece11e072fb2b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:44 +0100 Subject: attr: fix silently splitting up lines longer than 2048 bytes When reading attributes from a file we use fgets(3P) with a buffer size of 2048 bytes. This means that as soon as a line exceeds the buffer size we split it up into multiple parts and parse each of them as a separate pattern line. This is of course not what the user intended, and even worse the behaviour is inconsistent with how we read attributes from the index. Fix this bug by converting the code to use `strbuf_getline()` instead. This will indeed read in the whole line, which may theoretically lead to an out-of-memory situation when the gitattributes file is huge. We're about to reject any gitattributes files larger than 100MB in the next commit though, which makes this less of a concern. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index a9f7063cfc..41657479ff 100644 --- a/attr.c +++ b/attr.c @@ -699,21 +699,22 @@ void git_attr_set_direction(enum git_attr_direction new_direction) static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) { + struct strbuf buf = STRBUF_INIT; FILE *fp = fopen_or_warn(path, "r"); struct attr_stack *res; - char buf[2048]; int lineno = 0; if (!fp) return NULL; res = xcalloc(1, sizeof(*res)); - while (fgets(buf, sizeof(buf), fp)) { - char *bufp = buf; - if (!lineno) - skip_utf8_bom(&bufp, strlen(bufp)); - handle_attr_line(res, bufp, path, ++lineno, macro_ok); + while (strbuf_getline(&buf, fp) != EOF) { + if (!lineno && starts_with(buf.buf, utf8_bom)) + strbuf_remove(&buf, 0, strlen(utf8_bom)); + handle_attr_line(res, buf.buf, path, ++lineno, macro_ok); } + fclose(fp); + strbuf_release(&buf); return res; } -- cgit v1.2.3 From dfa6b32b5e599d97448337ed4fc18dd50c90758f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:48 +0100 Subject: attr: ignore attribute lines exceeding 2048 bytes There are two different code paths to read gitattributes: once via a file, and once via the index. These two paths used to behave differently because when reading attributes from a file, we used fgets(3P) with a buffer size of 2kB. Consequentially, we silently truncate line lengths when lines are longer than that and will then parse the remainder of the line as a new pattern. It goes without saying that this is entirely unexpected, but it's even worse that the behaviour depends on how the gitattributes are parsed. While this is simply wrong, the silent truncation saves us with the recently discovered vulnerabilities that can cause out-of-bound writes or reads with unreasonably long lines due to integer overflows. As the common path is to read gitattributes via the worktree file instead of via the index, we can assume that any gitattributes file that had lines longer than that is already broken anyway. So instead of lifting the limit here, we can double down on it to fix the vulnerabilities. Introduce an explicit line length limit of 2kB that is shared across all paths that read attributes and ignore any line that hits this limit while printing a warning. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 41657479ff..38ecd2fff3 100644 --- a/attr.c +++ b/attr.c @@ -344,6 +344,11 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, return NULL; name = cp; + if (strlen(line) >= ATTR_MAX_LINE_LENGTH) { + warning(_("ignoring overly long attributes line %d"), lineno); + return NULL; + } + if (*cp == '"' && !unquote_c_style(&pattern, name, &states)) { name = pattern.buf; namelen = pattern.len; -- cgit v1.2.3 From 3c50032ff5289cc45659f21949c8d09e52164579 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:53 +0100 Subject: attr: ignore overly large gitattributes files Similar as with the preceding commit, start ignoring gitattributes files that are overly large to protect us against out-of-bounds reads and writes caused by integer overflows. Unfortunately, we cannot just define "overly large" in terms of any preexisting limits in the codebase. Instead, we choose a very conservative limit of 100MB. This is plenty of room for specifying gitattributes, and incidentally it is also the limit for blob sizes for GitHub. While we don't want GitHub to dictate limits here, it is still sensible to use this fact for an informed decision given that it is hosting a huge set of repositories. Furthermore, over at GitLab we scanned a subset of repositories for their root-level attribute files. We found that 80% of them have a gitattributes file smaller than 100kB, 99.99% have one smaller than 1MB, and only a single repository had one that was almost 3MB in size. So enforcing a limit of 100MB seems to give us ample of headroom. With this limit in place we can be reasonably sure that there is no easy way to exploit the gitattributes file via integer overflows anymore. Furthermore, it protects us against resource exhaustion caused by allocating the in-memory data structures required to represent the parsed attributes. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'attr.c') diff --git a/attr.c b/attr.c index 38ecd2fff3..f9316d14ba 100644 --- a/attr.c +++ b/attr.c @@ -708,10 +708,25 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) FILE *fp = fopen_or_warn(path, "r"); struct attr_stack *res; int lineno = 0; + int fd; + struct stat st; if (!fp) return NULL; - res = xcalloc(1, sizeof(*res)); + + fd = fileno(fp); + if (fstat(fd, &st)) { + warning_errno(_("cannot fstat gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + if (st.st_size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + + CALLOC_ARRAY(res, 1); while (strbuf_getline(&buf, fp) != EOF) { if (!lineno && starts_with(buf.buf, utf8_bom)) strbuf_remove(&buf, 0, strlen(utf8_bom)); @@ -730,13 +745,18 @@ static struct attr_stack *read_attr_from_index(const struct index_state *istate, struct attr_stack *res; char *buf, *sp; int lineno = 0; + size_t size; if (!istate) return NULL; - buf = read_blob_data_from_index(istate, path, NULL); + buf = read_blob_data_from_index(istate, path, &size); if (!buf) return NULL; + if (size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes blob '%s'"), path); + return NULL; + } res = xcalloc(1, sizeof(*res)); for (sp = buf; *sp; ) { -- cgit v1.2.3