Fix BLI_str_utf8_as_unicode_step reading past intended bounds

Add a string length argument to BLI_str_utf8_as_unicode_step to prevent reading past the buffer bounds or the intended range since some callers of this function take a string length to operate on part of the string. Font drawing for example didn't respect the length argument, potentially causing a buffer over-read with multi-byte characters that could read past the end of the string. The following command would read 5 bytes past the end of the input. `BLF_draw(font_id, (char[]){252}, 1);` In practice strings are typically null terminated so this didn't crash reading past buffer bounds. Nevertheless, this wasn't correct and could cause bugs in the future. Clamping by the length now has the same behavior as a null byte. Add test to ensure this is working as intended.
author: Campbell Barton <ideasman42@gmail.com> 2021-08-24 06:25:26 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2021-08-24 07:25:15 +0300
commit: 8b55cda04812255922f488ad6bacd228d5d290a6 (patch)
tree: 9d45f358d6cc1835c461763fdc0c83872854c331 /source/blender/blenlib/tests
parent: 8371df8b1c12bdae574370d77819c46d8280f20f (diff)
1 files changed, 110 insertions, 1 deletions
diff --git a/source/blender/blenlib/tests/BLI_string_utf8_test.cc b/source/blender/blenlib/tests/BLI_string_utf8_test.cc
index 13f5cb7f284..b25f2310e1e 100644
--- a/source/blender/blenlib/tests/BLI_string_utf8_test.cc
+++ b/source/blender/blenlib/tests/BLI_string_utf8_test.cc
@@ -2,6 +2,7 @@
 
 #include "testing/testing.h"
 
+#include "BLI_rand.h"
 #include "BLI_string.h"
 #include "BLI_string_utf8.h"
 #include "BLI_utildefines.h"
@@ -11,7 +12,8 @@
  * quite their share of lines, they deserved their own file. */
 
 /* -------------------------------------------------------------------- */
-/* tests */
+/** \name Test #BLI_str_utf8_invalid_strip
+ * \{ */
 
 /* Breaking strings is confusing here, prefer over-long lines. */
 /* clang-format off */
@@ -284,3 +286,110 @@ TEST(string, Utf8InvalidBytes)
     EXPECT_STREQ(buff, tst_stripped);
   }
 }
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Test #BLI_str_utf8_as_unicode_step
+ * \{ */
+
+static size_t utf8_as_char32(const char *str, const char str_len, char32_t *r_result)
+{
+  size_t i = 0, result_len = 0;
+  while ((i < str_len) && (str[i] != '\0')) {
+    char32_t c = BLI_str_utf8_as_unicode_step(str, str_len, &i);
+    if (c != BLI_UTF8_ERR) {
+      r_result[result_len++] = c;
+    }
+  }
+  return i;
+}
+
+template<size_t Size, size_t SizeWithPadding>
+void utf8_as_char32_test_compare_with_pad_bytes(const char utf8_src[Size])
+{
+  char utf8_src_with_pad[SizeWithPadding] = {0};
+
+  memcpy(utf8_src_with_pad, utf8_src, Size);
+
+  char32_t unicode_dst_a[Size], unicode_dst_b[Size];
+
+  memset(unicode_dst_a, 0xff, sizeof(unicode_dst_a));
+  const size_t index_a = utf8_as_char32(utf8_src, Size, unicode_dst_a);
+
+  /* Test with padded and un-padded size,
+   * to ensure that extra available space doesn't yield a different result. */
+  for (int pass = 0; pass < 2; pass++) {
+    memset(unicode_dst_b, 0xff, sizeof(unicode_dst_b));
+    const size_t index_b = utf8_as_char32(
+        utf8_src_with_pad, pass ? Size : SizeWithPadding, unicode_dst_b);
+
+    /* Check the resulting content matches. */
+    EXPECT_EQ_ARRAY(unicode_dst_a, unicode_dst_b, Size);
+    /* Check the index of the source strings match. */
+    EXPECT_EQ(index_a, index_b);
+  }
+}
+
+template<size_t Size> void utf8_as_char32_test_compare(const char utf8_src[Size])
+{
+  /* Note that 7 is a little arbitrary,
+   * chosen since it's the maximum length of multi-byte character + 1
+   * to account for any errors that read past null bytes. */
+  utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 1>(utf8_src);
+  utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 7>(utf8_src);
+}
+
+template<size_t Size> void utf8_as_char32_test_at_buffer_size()
+{
+  char utf8_src[Size];
+
+  /* Test uniform bytes, also with offsets ascending & descending. */
+  for (int i = 0; i <= 0xff; i++) {
+    memset(utf8_src, i, sizeof(utf8_src));
+    utf8_as_char32_test_compare<Size>(utf8_src);
+
+    /* Offset trailing bytes up and down in steps of 1, 2, 4 .. etc. */
+    if (Size > 1) {
+      for (int mul = 1; mul < 256; mul *= 2) {
+        for (int ofs = 1; ofs < (int)Size; ofs++) {
+          utf8_src[ofs] = (char)(i + (ofs * mul));
+        }
+        utf8_as_char32_test_compare<Size>(utf8_src);
+
+        for (int ofs = 1; ofs < (int)Size; ofs++) {
+          utf8_src[ofs] = (char)(i - (ofs * mul));
+        }
+        utf8_as_char32_test_compare<Size>(utf8_src);
+      }
+    }
+  }
+
+  /* Random bytes. */
+  RNG *rng = BLI_rng_new(1);
+  for (int i = 0; i < 256; i++) {
+    BLI_rng_get_char_n(rng, utf8_src, sizeof(utf8_src));
+    utf8_as_char32_test_compare<Size>(utf8_src);
+  }
+  BLI_rng_free(rng);
+}
+
+TEST(string, Utf8AsUnicodeStep)
+{
+
+  /* Run tests at different buffer sizes. */
+  utf8_as_char32_test_at_buffer_size<1>();
+  utf8_as_char32_test_at_buffer_size<2>();
+  utf8_as_char32_test_at_buffer_size<3>();
+  utf8_as_char32_test_at_buffer_size<4>();
+  utf8_as_char32_test_at_buffer_size<5>();
+  utf8_as_char32_test_at_buffer_size<6>();
+  utf8_as_char32_test_at_buffer_size<7>();
+  utf8_as_char32_test_at_buffer_size<8>();
+  utf8_as_char32_test_at_buffer_size<9>();
+  utf8_as_char32_test_at_buffer_size<10>();
+  utf8_as_char32_test_at_buffer_size<11>();
+  utf8_as_char32_test_at_buffer_size<12>();
+}
+
+/** \} */
author	Campbell Barton <ideasman42@gmail.com>	2021-08-24 06:25:26 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2021-08-24 07:25:15 +0300
commit	8b55cda04812255922f488ad6bacd228d5d290a6 (patch)
tree	9d45f358d6cc1835c461763fdc0c83872854c331 /source/blender/blenlib/tests
parent	8371df8b1c12bdae574370d77819c46d8280f20f (diff)