diff options
author | Campbell Barton <ideasman42@gmail.com> | 2021-08-24 06:25:26 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2021-08-24 07:25:15 +0300 |
commit | 8b55cda04812255922f488ad6bacd228d5d290a6 (patch) | |
tree | 9d45f358d6cc1835c461763fdc0c83872854c331 /source/blender/blenlib/tests | |
parent | 8371df8b1c12bdae574370d77819c46d8280f20f (diff) |
Fix BLI_str_utf8_as_unicode_step reading past intended bounds
Add a string length argument to BLI_str_utf8_as_unicode_step to prevent
reading past the buffer bounds or the intended range since some callers
of this function take a string length to operate on part of the string.
Font drawing for example didn't respect the length argument,
potentially causing a buffer over-read with multi-byte characters
that could read past the end of the string.
The following command would read 5 bytes past the end of the input.
`BLF_draw(font_id, (char[]){252}, 1);`
In practice strings are typically null terminated so this didn't crash
reading past buffer bounds.
Nevertheless, this wasn't correct and could cause bugs in the future.
Clamping by the length now has the same behavior as a null byte.
Add test to ensure this is working as intended.
Diffstat (limited to 'source/blender/blenlib/tests')
-rw-r--r-- | source/blender/blenlib/tests/BLI_string_utf8_test.cc | 111 |
1 files changed, 110 insertions, 1 deletions
diff --git a/source/blender/blenlib/tests/BLI_string_utf8_test.cc b/source/blender/blenlib/tests/BLI_string_utf8_test.cc index 13f5cb7f284..b25f2310e1e 100644 --- a/source/blender/blenlib/tests/BLI_string_utf8_test.cc +++ b/source/blender/blenlib/tests/BLI_string_utf8_test.cc @@ -2,6 +2,7 @@ #include "testing/testing.h" +#include "BLI_rand.h" #include "BLI_string.h" #include "BLI_string_utf8.h" #include "BLI_utildefines.h" @@ -11,7 +12,8 @@ * quite their share of lines, they deserved their own file. */ /* -------------------------------------------------------------------- */ -/* tests */ +/** \name Test #BLI_str_utf8_invalid_strip + * \{ */ /* Breaking strings is confusing here, prefer over-long lines. */ /* clang-format off */ @@ -284,3 +286,110 @@ TEST(string, Utf8InvalidBytes) EXPECT_STREQ(buff, tst_stripped); } } + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Test #BLI_str_utf8_as_unicode_step + * \{ */ + +static size_t utf8_as_char32(const char *str, const char str_len, char32_t *r_result) +{ + size_t i = 0, result_len = 0; + while ((i < str_len) && (str[i] != '\0')) { + char32_t c = BLI_str_utf8_as_unicode_step(str, str_len, &i); + if (c != BLI_UTF8_ERR) { + r_result[result_len++] = c; + } + } + return i; +} + +template<size_t Size, size_t SizeWithPadding> +void utf8_as_char32_test_compare_with_pad_bytes(const char utf8_src[Size]) +{ + char utf8_src_with_pad[SizeWithPadding] = {0}; + + memcpy(utf8_src_with_pad, utf8_src, Size); + + char32_t unicode_dst_a[Size], unicode_dst_b[Size]; + + memset(unicode_dst_a, 0xff, sizeof(unicode_dst_a)); + const size_t index_a = utf8_as_char32(utf8_src, Size, unicode_dst_a); + + /* Test with padded and un-padded size, + * to ensure that extra available space doesn't yield a different result. */ + for (int pass = 0; pass < 2; pass++) { + memset(unicode_dst_b, 0xff, sizeof(unicode_dst_b)); + const size_t index_b = utf8_as_char32( + utf8_src_with_pad, pass ? Size : SizeWithPadding, unicode_dst_b); + + /* Check the resulting content matches. */ + EXPECT_EQ_ARRAY(unicode_dst_a, unicode_dst_b, Size); + /* Check the index of the source strings match. */ + EXPECT_EQ(index_a, index_b); + } +} + +template<size_t Size> void utf8_as_char32_test_compare(const char utf8_src[Size]) +{ + /* Note that 7 is a little arbitrary, + * chosen since it's the maximum length of multi-byte character + 1 + * to account for any errors that read past null bytes. */ + utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 1>(utf8_src); + utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 7>(utf8_src); +} + +template<size_t Size> void utf8_as_char32_test_at_buffer_size() +{ + char utf8_src[Size]; + + /* Test uniform bytes, also with offsets ascending & descending. */ + for (int i = 0; i <= 0xff; i++) { + memset(utf8_src, i, sizeof(utf8_src)); + utf8_as_char32_test_compare<Size>(utf8_src); + + /* Offset trailing bytes up and down in steps of 1, 2, 4 .. etc. */ + if (Size > 1) { + for (int mul = 1; mul < 256; mul *= 2) { + for (int ofs = 1; ofs < (int)Size; ofs++) { + utf8_src[ofs] = (char)(i + (ofs * mul)); + } + utf8_as_char32_test_compare<Size>(utf8_src); + + for (int ofs = 1; ofs < (int)Size; ofs++) { + utf8_src[ofs] = (char)(i - (ofs * mul)); + } + utf8_as_char32_test_compare<Size>(utf8_src); + } + } + } + + /* Random bytes. */ + RNG *rng = BLI_rng_new(1); + for (int i = 0; i < 256; i++) { + BLI_rng_get_char_n(rng, utf8_src, sizeof(utf8_src)); + utf8_as_char32_test_compare<Size>(utf8_src); + } + BLI_rng_free(rng); +} + +TEST(string, Utf8AsUnicodeStep) +{ + + /* Run tests at different buffer sizes. */ + utf8_as_char32_test_at_buffer_size<1>(); + utf8_as_char32_test_at_buffer_size<2>(); + utf8_as_char32_test_at_buffer_size<3>(); + utf8_as_char32_test_at_buffer_size<4>(); + utf8_as_char32_test_at_buffer_size<5>(); + utf8_as_char32_test_at_buffer_size<6>(); + utf8_as_char32_test_at_buffer_size<7>(); + utf8_as_char32_test_at_buffer_size<8>(); + utf8_as_char32_test_at_buffer_size<9>(); + utf8_as_char32_test_at_buffer_size<10>(); + utf8_as_char32_test_at_buffer_size<11>(); + utf8_as_char32_test_at_buffer_size<12>(); +} + +/** \} */ |