From 12fdf9069abe3cd2250a9efec6e059eb85ec59d8 Mon Sep 17 00:00:00 2001 From: Harley Acheson Date: Tue, 27 Sep 2022 08:39:24 -0700 Subject: BLF: Editing Text with Combining Characters Corrections for caret insertion & movement and deletion for text strings that include non-precomposed diacritical marks (Unicode combining characters). See D15659 for more details and examples. Differential Revision: https://developer.blender.org/D15659 Reviewed by Campbell Barton --- source/blender/blenlib/intern/string_cursor_utf8.c | 59 +++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) (limited to 'source/blender/blenlib/intern') diff --git a/source/blender/blenlib/intern/string_cursor_utf8.c b/source/blender/blenlib/intern/string_cursor_utf8.c index 6b7151be969..2405b134428 100644 --- a/source/blender/blenlib/intern/string_cursor_utf8.c +++ b/source/blender/blenlib/intern/string_cursor_utf8.c @@ -96,27 +96,35 @@ static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8, return cursor_delim_type_unicode(uch); } +/* Keep in sync with BLI_str_cursor_step_next_utf32. */ bool BLI_str_cursor_step_next_utf8(const char *str, size_t maxlen, int *pos) { + if ((*pos) >= (int)maxlen) { + return false; + } const char *str_end = str + (maxlen + 1); const char *str_pos = str + (*pos); - const char *str_next = BLI_str_find_next_char_utf8(str_pos, str_end); - if (str_next != str_end) { - (*pos) += (str_next - str_pos); - if ((*pos) > (int)maxlen) { - (*pos) = (int)maxlen; - } - return true; + const char *str_next = str_pos; + do { + str_next = BLI_str_find_next_char_utf8(str_next, str_end); + } while (str_next < str_end && str_next[0] != 0 && BLI_str_utf8_char_width(str_next) < 1); + (*pos) += (str_next - str_pos); + if ((*pos) > (int)maxlen) { + (*pos) = (int)maxlen; } - return false; + return true; } -bool BLI_str_cursor_step_prev_utf8(const char *str, size_t UNUSED(maxlen), int *pos) +/* Keep in sync with BLI_str_cursor_step_prev_utf32. */ +bool BLI_str_cursor_step_prev_utf8(const char *str, size_t maxlen, int *pos) { - if ((*pos) > 0) { + if ((*pos) > 0 && (*pos) <= maxlen) { const char *str_pos = str + (*pos); - const char *str_prev = BLI_str_find_prev_char_utf8(str_pos, str); + const char *str_prev = str_pos; + do { + str_prev = BLI_str_find_prev_char_utf8(str_prev, str); + } while (str_prev > str && BLI_str_utf8_char_width(str_prev) == 0); (*pos) -= (str_pos - str_prev); return true; } @@ -202,26 +210,29 @@ void BLI_str_cursor_step_utf8(const char *str, } } -/* UTF32 version of BLI_str_cursor_step_utf8 (keep in sync!) - * less complex since it doesn't need to do multi-byte stepping. - */ - -/* Helper functions so we can match #BLI_str_cursor_step_utf8. */ -static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos) +/* Keep in sync with BLI_str_cursor_step_next_utf8. */ +bool BLI_str_cursor_step_next_utf32(const char32_t *str, size_t maxlen, int *pos) { if ((*pos) >= (int)maxlen) { return false; } - (*pos)++; + do { + (*pos)++; + } while (*pos < (int)maxlen && str[*pos] != 0 && BLI_wcwidth(str[*pos]) == 0); + return true; } -static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos) +/* Keep in sync with BLI_str_cursor_step_prev_utf8. */ +bool BLI_str_cursor_step_prev_utf32(const char32_t *str, size_t UNUSED(maxlen), int *pos) { if ((*pos) <= 0) { return false; } - (*pos)--; + do { + (*pos)--; + } while (*pos > 0 && BLI_wcwidth(str[*pos]) == 0); + return true; } @@ -236,7 +247,7 @@ void BLI_str_cursor_step_utf32(const char32_t *str, if (direction == STRCUR_DIR_NEXT) { if (use_init_step) { - cursor_step_next_utf32(str, maxlen, pos); + BLI_str_cursor_step_next_utf32(str, maxlen, pos); } else { BLI_assert(jump == STRCUR_JUMP_DELIM); @@ -250,7 +261,7 @@ void BLI_str_cursor_step_utf32(const char32_t *str, * look at function cursor_delim_type_unicode() for complete * list of special character, ctr -> */ while ((*pos) < maxlen) { - if (cursor_step_next_utf32(str, maxlen, pos)) { + if (BLI_str_cursor_step_next_utf32(str, maxlen, pos)) { if ((jump != STRCUR_JUMP_ALL) && (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) { break; @@ -264,7 +275,7 @@ void BLI_str_cursor_step_utf32(const char32_t *str, } else if (direction == STRCUR_DIR_PREV) { if (use_init_step) { - cursor_step_prev_utf32(str, maxlen, pos); + BLI_str_cursor_step_prev_utf32(str, maxlen, pos); } else { BLI_assert(jump == STRCUR_JUMP_DELIM); @@ -279,7 +290,7 @@ void BLI_str_cursor_step_utf32(const char32_t *str, * list of special character, ctr -> */ while ((*pos) > 0) { const int pos_prev = *pos; - if (cursor_step_prev_utf32(str, maxlen, pos)) { + if (BLI_str_cursor_step_prev_utf32(str, maxlen, pos)) { if ((jump != STRCUR_JUMP_ALL) && (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) { /* left only: compensate for index/change in direction */ -- cgit v1.2.3