diff options
-rw-r--r-- | source/blender/blenfont/intern/blf_font.c | 24 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/text.c | 4 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_string_utf8.h | 6 | ||||
-rw-r--r-- | source/blender/blenlib/intern/string_utf8.c | 73 | ||||
-rw-r--r-- | source/blender/blenlib/tests/BLI_string_utf8_test.cc | 111 | ||||
-rw-r--r-- | source/blender/editors/space_text/text_ops.c | 7 |
6 files changed, 179 insertions, 46 deletions
diff --git a/source/blender/blenfont/intern/blf_font.c b/source/blender/blenfont/intern/blf_font.c index 50b4bb09b7b..5ad48aa08d4 100644 --- a/source/blender/blenfont/intern/blf_font.c +++ b/source/blender/blenfont/intern/blf_font.c @@ -298,7 +298,7 @@ static void blf_batch_draw_end(void) */ BLI_INLINE GlyphBLF *blf_utf8_next_fast( - FontBLF *font, GlyphCacheBLF *gc, const char *str, size_t *i_p, uint *r_c) + FontBLF *font, GlyphCacheBLF *gc, const char *str, size_t str_len, size_t *i_p, uint *r_c) { GlyphBLF *g; if ((*r_c = str[*i_p]) < GLYPH_ASCII_TABLE_SIZE) { @@ -309,7 +309,7 @@ BLI_INLINE GlyphBLF *blf_utf8_next_fast( } (*i_p)++; } - else if ((*r_c = BLI_str_utf8_as_unicode_step(str, i_p)) != BLI_UTF8_ERR) { + else if ((*r_c = BLI_str_utf8_as_unicode_step(str, str_len, i_p)) != BLI_UTF8_ERR) { g = blf_glyph_search(gc, *r_c); if (UNLIKELY(g == NULL)) { g = blf_glyph_add(font, gc, FT_Get_Char_Index(font->face, *r_c), *r_c); @@ -382,7 +382,7 @@ static void blf_font_draw_ex(FontBLF *font, blf_batch_draw_begin(font); while ((i < str_len) && str[i]) { - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (UNLIKELY(c == BLI_UTF8_ERR)) { break; @@ -478,7 +478,7 @@ int blf_font_draw_mono(FontBLF *font, const char *str, const size_t str_len, int blf_batch_draw_begin(font); while ((i < str_len) && str[i]) { - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (UNLIKELY(c == BLI_UTF8_ERR)) { break; @@ -535,7 +535,7 @@ static void blf_font_draw_buffer_ex(FontBLF *font, /* another buffer specific call for color conversion */ while ((i < str_len) && str[i]) { - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (UNLIKELY(c == BLI_UTF8_ERR)) { break; @@ -703,7 +703,7 @@ size_t blf_font_width_to_strlen( for (i_prev = i = 0, width_new = pen_x = 0, g_prev = NULL, c_prev = 0; (i < str_len) && str[i]; i_prev = i, width_new = pen_x, c_prev = c, g_prev = g) { - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (blf_font_width_to_strlen_glyph_process(font, c_prev, c, g_prev, g, &pen_x, width_i)) { break; @@ -737,7 +737,7 @@ size_t blf_font_width_to_rstrlen( i_prev = (size_t)((s_prev != NULL) ? s_prev - str : 0); i_tmp = i; - g = blf_utf8_next_fast(font, gc, str, &i_tmp, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c); for (width_new = pen_x = 0; (s != NULL); i = i_prev, s = s_prev, c = c_prev, g = g_prev, g_prev = NULL, width_new = pen_x) { s_prev = BLI_str_find_prev_char_utf8(str, s); @@ -745,7 +745,7 @@ size_t blf_font_width_to_rstrlen( if (s_prev != NULL) { i_tmp = i_prev; - g_prev = blf_utf8_next_fast(font, gc, str, &i_tmp, &c_prev); + g_prev = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c_prev); BLI_assert(i_tmp == i); } @@ -788,7 +788,7 @@ static void blf_font_boundbox_ex(FontBLF *font, box->ymax = -32000.0f; while ((i < str_len) && str[i]) { - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (UNLIKELY(c == BLI_UTF8_ERR)) { break; @@ -961,7 +961,7 @@ static void blf_font_boundbox_foreach_glyph_ex(FontBLF *font, while ((i < str_len) && str[i]) { i_curr = i; - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (UNLIKELY(c == BLI_UTF8_ERR)) { break; @@ -1051,7 +1051,7 @@ static void blf_font_wrap_apply(FontBLF *font, size_t i_curr = i; bool do_draw = false; - g = blf_utf8_next_fast(font, gc, str, &i, &c); + g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c); if (UNLIKELY(c == BLI_UTF8_ERR)) { break; @@ -1202,7 +1202,7 @@ int blf_font_count_missing_chars(FontBLF *font, if ((c = str[i]) < GLYPH_ASCII_TABLE_SIZE) { i++; } - else if ((c = BLI_str_utf8_as_unicode_step(str, &i)) != BLI_UTF8_ERR) { + else if ((c = BLI_str_utf8_as_unicode_step(str, str_len, &i)) != BLI_UTF8_ERR) { if (FT_Get_Char_Index((font)->face, c) == 0) { missing++; } diff --git a/source/blender/blenkernel/intern/text.c b/source/blender/blenkernel/intern/text.c index 06137f5d110..c2ab91251b6 100644 --- a/source/blender/blenkernel/intern/text.c +++ b/source/blender/blenkernel/intern/text.c @@ -1660,7 +1660,7 @@ void txt_insert_buf(Text *text, const char *in_buffer) /* Read the first line (or as close as possible */ while (buffer[i] && buffer[i] != '\n') { - txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, &i)); + txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, len, &i)); } if (buffer[i] == '\n') { @@ -1682,7 +1682,7 @@ void txt_insert_buf(Text *text, const char *in_buffer) } else { for (j = i - l; j < i && j < len;) { - txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, &j)); + txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, len, &j)); } break; } diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h index e1d7e2c58f7..b936e39731d 100644 --- a/source/blender/blenlib/BLI_string_utf8.h +++ b/source/blender/blenlib/BLI_string_utf8.h @@ -43,8 +43,10 @@ unsigned int BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t * ATTR_NONNULL(); unsigned int BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__restrict index) ATTR_NONNULL(); -unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index) - ATTR_NONNULL(); +unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, + size_t p_len, + size_t *__restrict index) ATTR_NONNULL(1, 3); + size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf); size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, const char *__restrict src_c, diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c index 5710bd6b150..dbde5221d7e 100644 --- a/source/blender/blenlib/intern/string_utf8.c +++ b/source/blender/blenlib/intern/string_utf8.c @@ -582,49 +582,69 @@ uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__r /** * Another variant that steps over the index. + * + * \param p: The text to step over. + * \param p_len: The length of `p`. + * \param index: Index of `p` to step over. + * * \note currently this also falls back to latin1 for text drawing. + * + * \note The behavior for clipped text (where `p_len` limits decoding trailing bytes) + * must have the same behavior is encountering a nil byte, + * so functions that only use the first part of a string has matching behavior to functions + * that null terminate the text. */ -uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index) +uint BLI_str_utf8_as_unicode_step(const char *__restrict p, + const size_t p_len, + size_t *__restrict index) { int i, len; uint mask = 0; uint result; - unsigned char c; + const char c = p[*index]; - p += *index; - c = (unsigned char)*p; + BLI_assert(*index < p_len); + BLI_assert(c != '\0'); UTF8_COMPUTE(c, mask, len, -1); if (UNLIKELY(len == -1)) { - /* when called with NULL end, result will never be NULL, - * checks for a NULL character */ - const char *p_next = BLI_str_find_next_char_utf8(p, NULL); - /* will never return the same pointer unless '\0', - * eternal loop is prevented */ - *index += (size_t)(p_next - p); - return BLI_UTF8_ERR; + const char *p_next = BLI_str_find_next_char_utf8(p + *index, p + p_len); + /* #BLI_str_find_next_char_utf8 ensures the nil byte will terminate. + * so there is no chance this sets the index past the nil byte (assert this is the case). */ + BLI_assert(p_next || (memchr(p + *index, '\0', p_len - *index) == NULL)); + len = (int)((p_next ? (size_t)(p_next - p) : p_len) - *index); + result = BLI_UTF8_ERR; + } + else if (UNLIKELY(*index + (size_t)len > p_len)) { + /* A multi-byte character reads past the buffer bounds, + * match the behavior of encountering an byte with invalid encoding below. */ + len = 1; + result = (uint)c; } - - /* this is tricky since there are a few ways we can bail out of bad unicode - * values, 3 possible solutions. */ + else { + /* This is tricky since there are a few ways we can bail out of bad unicode + * values, 3 possible solutions. */ + p += *index; #if 0 - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); #elif 1 - /* WARNING: this is NOT part of glib, or supported by similar functions. - * this is added for text drawing because some filepaths can have latin1 - * characters */ - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); - if (result == BLI_UTF8_ERR) { - len = 1; - result = *p; - } - /* end warning! */ + /* WARNING: this is NOT part of glib, or supported by similar functions. + * this is added for text drawing because some filepaths can have latin1 + * characters */ + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + if (result == BLI_UTF8_ERR) { + len = 1; + result = (uint)c; + } + /* end warning! */ #else - /* without a fallback like '?', text drawing will stop on this value */ - UTF8_GET(result, p, i, mask, len, '?'); + /* Without a fallback like '?', text drawing will stop on this value. */ + UTF8_GET(result, p, i, mask, len, '?'); #endif + } *index += (size_t)len; + BLI_assert(*index <= p_len); return result; } @@ -810,6 +830,7 @@ char *BLI_str_find_next_char_utf8(const char *p, const char *end) { if (*p) { if (end) { + BLI_assert(end >= p); for (++p; p < end && (*p & 0xc0) == 0x80; p++) { /* do nothing */ } diff --git a/source/blender/blenlib/tests/BLI_string_utf8_test.cc b/source/blender/blenlib/tests/BLI_string_utf8_test.cc index 13f5cb7f284..b25f2310e1e 100644 --- a/source/blender/blenlib/tests/BLI_string_utf8_test.cc +++ b/source/blender/blenlib/tests/BLI_string_utf8_test.cc @@ -2,6 +2,7 @@ #include "testing/testing.h" +#include "BLI_rand.h" #include "BLI_string.h" #include "BLI_string_utf8.h" #include "BLI_utildefines.h" @@ -11,7 +12,8 @@ * quite their share of lines, they deserved their own file. */ /* -------------------------------------------------------------------- */ -/* tests */ +/** \name Test #BLI_str_utf8_invalid_strip + * \{ */ /* Breaking strings is confusing here, prefer over-long lines. */ /* clang-format off */ @@ -284,3 +286,110 @@ TEST(string, Utf8InvalidBytes) EXPECT_STREQ(buff, tst_stripped); } } + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Test #BLI_str_utf8_as_unicode_step + * \{ */ + +static size_t utf8_as_char32(const char *str, const char str_len, char32_t *r_result) +{ + size_t i = 0, result_len = 0; + while ((i < str_len) && (str[i] != '\0')) { + char32_t c = BLI_str_utf8_as_unicode_step(str, str_len, &i); + if (c != BLI_UTF8_ERR) { + r_result[result_len++] = c; + } + } + return i; +} + +template<size_t Size, size_t SizeWithPadding> +void utf8_as_char32_test_compare_with_pad_bytes(const char utf8_src[Size]) +{ + char utf8_src_with_pad[SizeWithPadding] = {0}; + + memcpy(utf8_src_with_pad, utf8_src, Size); + + char32_t unicode_dst_a[Size], unicode_dst_b[Size]; + + memset(unicode_dst_a, 0xff, sizeof(unicode_dst_a)); + const size_t index_a = utf8_as_char32(utf8_src, Size, unicode_dst_a); + + /* Test with padded and un-padded size, + * to ensure that extra available space doesn't yield a different result. */ + for (int pass = 0; pass < 2; pass++) { + memset(unicode_dst_b, 0xff, sizeof(unicode_dst_b)); + const size_t index_b = utf8_as_char32( + utf8_src_with_pad, pass ? Size : SizeWithPadding, unicode_dst_b); + + /* Check the resulting content matches. */ + EXPECT_EQ_ARRAY(unicode_dst_a, unicode_dst_b, Size); + /* Check the index of the source strings match. */ + EXPECT_EQ(index_a, index_b); + } +} + +template<size_t Size> void utf8_as_char32_test_compare(const char utf8_src[Size]) +{ + /* Note that 7 is a little arbitrary, + * chosen since it's the maximum length of multi-byte character + 1 + * to account for any errors that read past null bytes. */ + utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 1>(utf8_src); + utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 7>(utf8_src); +} + +template<size_t Size> void utf8_as_char32_test_at_buffer_size() +{ + char utf8_src[Size]; + + /* Test uniform bytes, also with offsets ascending & descending. */ + for (int i = 0; i <= 0xff; i++) { + memset(utf8_src, i, sizeof(utf8_src)); + utf8_as_char32_test_compare<Size>(utf8_src); + + /* Offset trailing bytes up and down in steps of 1, 2, 4 .. etc. */ + if (Size > 1) { + for (int mul = 1; mul < 256; mul *= 2) { + for (int ofs = 1; ofs < (int)Size; ofs++) { + utf8_src[ofs] = (char)(i + (ofs * mul)); + } + utf8_as_char32_test_compare<Size>(utf8_src); + + for (int ofs = 1; ofs < (int)Size; ofs++) { + utf8_src[ofs] = (char)(i - (ofs * mul)); + } + utf8_as_char32_test_compare<Size>(utf8_src); + } + } + } + + /* Random bytes. */ + RNG *rng = BLI_rng_new(1); + for (int i = 0; i < 256; i++) { + BLI_rng_get_char_n(rng, utf8_src, sizeof(utf8_src)); + utf8_as_char32_test_compare<Size>(utf8_src); + } + BLI_rng_free(rng); +} + +TEST(string, Utf8AsUnicodeStep) +{ + + /* Run tests at different buffer sizes. */ + utf8_as_char32_test_at_buffer_size<1>(); + utf8_as_char32_test_at_buffer_size<2>(); + utf8_as_char32_test_at_buffer_size<3>(); + utf8_as_char32_test_at_buffer_size<4>(); + utf8_as_char32_test_at_buffer_size<5>(); + utf8_as_char32_test_at_buffer_size<6>(); + utf8_as_char32_test_at_buffer_size<7>(); + utf8_as_char32_test_at_buffer_size<8>(); + utf8_as_char32_test_at_buffer_size<9>(); + utf8_as_char32_test_at_buffer_size<10>(); + utf8_as_char32_test_at_buffer_size<11>(); + utf8_as_char32_test_at_buffer_size<12>(); +} + +/** \} */ diff --git a/source/blender/editors/space_text/text_ops.c b/source/blender/editors/space_text/text_ops.c index b5fcadbefe8..f480f60a2b9 100644 --- a/source/blender/editors/space_text/text_ops.c +++ b/source/blender/editors/space_text/text_ops.c @@ -3424,25 +3424,26 @@ static int text_insert_exec(bContext *C, wmOperator *op) SpaceText *st = CTX_wm_space_text(C); Text *text = CTX_data_edit_text(C); char *str; + int str_len; bool done = false; size_t i = 0; uint code; text_drawcache_tag_update(st, 0); - str = RNA_string_get_alloc(op->ptr, "text", NULL, 0, NULL); + str = RNA_string_get_alloc(op->ptr, "text", NULL, 0, &str_len); ED_text_undo_push_init(C); if (st && st->overwrite) { while (str[i]) { - code = BLI_str_utf8_as_unicode_step(str, &i); + code = BLI_str_utf8_as_unicode_step(str, str_len, &i); done |= txt_replace_char(text, code); } } else { while (str[i]) { - code = BLI_str_utf8_as_unicode_step(str, &i); + code = BLI_str_utf8_as_unicode_step(str, str_len, &i); done |= txt_add_char(text, code); } } |