BLI_string_utf8: remove unnecessary utf8 decoding functions

Remove BLI_str_utf8_as_unicode_and_size and BLI_str_utf8_as_unicode_and_size_safe. Use BLI_str_utf8_as_unicode_step instead since it takes a buffer bounds argument to prevent buffer over-reading.
author: Campbell Barton <ideasman42@gmail.com> 2021-08-25 08:19:00 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2021-08-25 08:28:59 +0300
commit: 38630711a02e553f209ace9a8627a7a851820a2d (patch)
tree: c0eab96fbfc8b39ed274721c919ca39e2eedf17a /source/blender/blenlib/intern/string_utf8.c
parent: be906f44c6bb51eb492ecb90dbc1e8e0bc01d1ec (diff)
1 files changed, 14 insertions, 39 deletions
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index 06fd3168c24..7a01077bb44 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -546,40 +546,6 @@ uint BLI_str_utf8_as_unicode(const char *p)
   return result;
 }
 
-/* variant that increments the length */
-uint BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *__restrict index)
-{
-  int i, len;
-  uint mask = 0;
-  uint result;
-  const unsigned char c = (unsigned char)*p;
-
-  UTF8_COMPUTE(c, mask, len, -1);
-  if (UNLIKELY(len == -1)) {
-    return BLI_UTF8_ERR;
-  }
-  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
-  *index += (size_t)len;
-  return result;
-}
-
-uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__restrict index)
-{
-  int i, len;
-  uint mask = 0;
-  uint result;
-  const unsigned char c = (unsigned char)*p;
-
-  UTF8_COMPUTE(c, mask, len, -1);
-  if (UNLIKELY(len == -1)) {
-    *index += 1;
-    return c;
-  }
-  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
-  *index += (size_t)len;
-  return result;
-}
-
 /**
  * UTF8 decoding that steps over the index (unless an error is encountered).
  *
@@ -709,16 +675,23 @@ size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
   memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy);
 #endif
 
+  const size_t src_c_len = strlen(src_c);
+  const char *src_c_end = src_c + src_c_len;
+  size_t index = 0;
   while (*src_c && len != maxlen) {
-    size_t step = 0;
-    uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
+    const uint unicode = BLI_str_utf8_as_unicode_step_or_error(src_c, src_c_len, &index);
     if (unicode != BLI_UTF8_ERR) {
       *dst_w = unicode;
-      src_c += step;
     }
     else {
       *dst_w = '?';
-      src_c = BLI_str_find_next_char_utf8(src_c, NULL);
+      const char *src_c_next = BLI_str_find_next_char_utf8(src_c + index, src_c_end);
+      if (src_c_next != NULL) {
+        index = (size_t)(src_c_next - src_c);
+      }
+      else {
+        index += 1;
+      }
     }
     dst_w++;
     len++;
@@ -898,7 +871,9 @@ size_t BLI_str_partition_ex_utf8(const char *str,
       index = 0;
        *sep >= str && (!end || *sep < end) && **sep != '\0';
        *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index)) {
-    const uint c = BLI_str_utf8_as_unicode_and_size(*sep, &index);
+    size_t index_ofs = 0;
+    const uint c = BLI_str_utf8_as_unicode_step_or_error(*sep, (size_t)(end - *sep), &index_ofs);
+    index += index_ofs;
 
     if (c == BLI_UTF8_ERR) {
       *suf = *sep = NULL;
author	Campbell Barton <ideasman42@gmail.com>	2021-08-25 08:19:00 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2021-08-25 08:28:59 +0300
commit	38630711a02e553f209ace9a8627a7a851820a2d (patch)
tree	c0eab96fbfc8b39ed274721c919ca39e2eedf17a /source/blender/blenlib/intern/string_utf8.c
parent	be906f44c6bb51eb492ecb90dbc1e8e0bc01d1ec (diff)