diff options
author | mano-wii <germano.costa@ig.com.br> | 2019-11-22 18:26:54 +0300 |
---|---|---|
committer | mano-wii <germano.costa@ig.com.br> | 2019-11-22 18:27:34 +0300 |
commit | 177dfc6384b926dd19e3b7e98a995ccb4da9167c (patch) | |
tree | 865abe9f707bee039a506bfc17cb4cff218bd6c6 /source/blender/blenlib | |
parent | 1304cee920c5f01fd9f0474ea782db61ac031403 (diff) |
Fix T71273: Bad encoding of utf-8 for Text objects
`BLI_strncpy_wchar_from_utf8` internally assumes `wchar_t` is 32 bits
which is not the case on windows.
The solution is to replace `wchar_t` with `char32_t`.
Thanks to @robbott for compatibility on macOS.
Differential Revision: https://developer.blender.org/D6198
Diffstat (limited to 'source/blender/blenlib')
-rw-r--r-- | source/blender/blenlib/BLI_string_cursor_utf8.h | 2 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_string_utf8.h | 12 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_sys_types.h | 9 | ||||
-rw-r--r-- | source/blender/blenlib/intern/string_cursor_utf8.c | 16 | ||||
-rw-r--r-- | source/blender/blenlib/intern/string_utf8.c | 95 |
5 files changed, 119 insertions, 15 deletions
diff --git a/source/blender/blenlib/BLI_string_cursor_utf8.h b/source/blender/blenlib/BLI_string_cursor_utf8.h index 2d0acabc9de..a54089ad8d6 100644 --- a/source/blender/blenlib/BLI_string_cursor_utf8.h +++ b/source/blender/blenlib/BLI_string_cursor_utf8.h @@ -45,7 +45,7 @@ void BLI_str_cursor_step_utf8(const char *str, eStrCursorJumpType jump, bool use_init_step); -void BLI_str_cursor_step_wchar(const wchar_t *str, +void BLI_str_cursor_step_utf32(const char32_t *str, size_t maxlen, int *pos, eStrCursorJumpDirection direction, diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h index 0cdd6e94610..1db4cdfecd6 100644 --- a/source/blender/blenlib/BLI_string_utf8.h +++ b/source/blender/blenlib/BLI_string_utf8.h @@ -26,6 +26,7 @@ extern "C" { #endif #include "BLI_compiler_attrs.h" +#include "BLI_sys_types.h" char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy) ATTR_NONNULL(); @@ -48,6 +49,13 @@ unsigned int BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index) ATTR_NONNULL(); size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf); +size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, + const char *__restrict src_c, + const size_t maxncpy) ATTR_NONNULL(); +size_t BLI_str_utf32_as_utf8(char *__restrict dst, + const char32_t *__restrict src, + const size_t maxncpy) ATTR_NONNULL(); +size_t BLI_str_utf32_as_utf8_len(const char32_t *src) ATTR_NONNULL(); char *BLI_str_find_prev_char_utf8(const char *str, const char *p) ATTR_NONNULL(); char *BLI_str_find_next_char_utf8(const char *p, const char *end) ATTR_NONNULL(1); @@ -68,8 +76,8 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst, const size_t maxcpy) ATTR_NONNULL(); /* count columns that character/string occupies, based on wcwidth.c */ -int BLI_wcwidth(wchar_t ucs); -int BLI_wcswidth(const wchar_t *pwcs, size_t n) ATTR_NONNULL(); +int BLI_wcwidth(char32_t ucs); +int BLI_wcswidth(const char32_t *pwcs, size_t n) ATTR_NONNULL(); /* warning, can return -1 on bad chars */ int BLI_str_utf8_char_width(const char *p) ATTR_NONNULL(); int BLI_str_utf8_char_width_safe(const char *p) ATTR_NONNULL(); diff --git a/source/blender/blenlib/BLI_sys_types.h b/source/blender/blenlib/BLI_sys_types.h index a82e6a562e0..354d27385a2 100644 --- a/source/blender/blenlib/BLI_sys_types.h +++ b/source/blender/blenlib/BLI_sys_types.h @@ -72,6 +72,15 @@ typedef uint64_t u_int64_t; #include <stddef.h> /* size_t define */ #include <stdbool.h> +#ifndef __cplusplus +# if defined(__APPLE__) +/* The <uchar.h> standard header is missing on macOS. */ +typedef unsigned int char32_t; +# else +# include <uchar.h> +# endif +#endif + typedef unsigned int uint; typedef unsigned short ushort; typedef unsigned long ulong; diff --git a/source/blender/blenlib/intern/string_cursor_utf8.c b/source/blender/blenlib/intern/string_cursor_utf8.c index f0113a7028a..ee4c11b1c04 100644 --- a/source/blender/blenlib/intern/string_cursor_utf8.c +++ b/source/blender/blenlib/intern/string_cursor_utf8.c @@ -211,12 +211,12 @@ void BLI_str_cursor_step_utf8(const char *str, } } -/* wchar_t version of BLI_str_cursor_step_utf8 (keep in sync!) +/* UTF32 version of BLI_str_cursor_step_utf8 (keep in sync!) * less complex since it doesn't need to do multi-byte stepping. */ /* helper funcs so we can match BLI_str_cursor_step_utf8 */ -static bool wchar_t_step_next(const wchar_t *UNUSED(str), size_t maxlen, int *pos) +static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos) { if ((*pos) >= (int)maxlen) { return false; @@ -225,7 +225,7 @@ static bool wchar_t_step_next(const wchar_t *UNUSED(str), size_t maxlen, int *po return true; } -static bool wchar_t_step_prev(const wchar_t *UNUSED(str), size_t UNUSED(maxlen), int *pos) +static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos) { if ((*pos) <= 0) { return false; @@ -234,7 +234,7 @@ static bool wchar_t_step_prev(const wchar_t *UNUSED(str), size_t UNUSED(maxlen), return true; } -void BLI_str_cursor_step_wchar(const wchar_t *str, +void BLI_str_cursor_step_utf32(const char32_t *str, size_t maxlen, int *pos, eStrCursorJumpDirection direction, @@ -245,7 +245,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str, if (direction == STRCUR_DIR_NEXT) { if (use_init_step) { - wchar_t_step_next(str, maxlen, pos); + cursor_step_next_utf32(str, maxlen, pos); } else { BLI_assert(jump == STRCUR_JUMP_DELIM); @@ -259,7 +259,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str, * look at function cursor_delim_type_unicode() for complete * list of special character, ctr -> */ while ((*pos) < maxlen) { - if (wchar_t_step_next(str, maxlen, pos)) { + if (cursor_step_next_utf32(str, maxlen, pos)) { if ((jump != STRCUR_JUMP_ALL) && (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) { break; @@ -273,7 +273,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str, } else if (direction == STRCUR_DIR_PREV) { if (use_init_step) { - wchar_t_step_prev(str, maxlen, pos); + cursor_step_prev_utf32(str, maxlen, pos); } else { BLI_assert(jump == STRCUR_JUMP_DELIM); @@ -288,7 +288,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str, * list of special character, ctr -> */ while ((*pos) > 0) { const int pos_prev = *pos; - if (wchar_t_step_prev(str, maxlen, pos)) { + if (cursor_step_prev_utf32(str, maxlen, pos)) { if ((jump != STRCUR_JUMP_ALL) && (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) { /* left only: compensate for index/change in direction */ diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c index 92c4ec73768..63657f33bba 100644 --- a/source/blender/blenlib/intern/string_utf8.c +++ b/source/blender/blenlib/intern/string_utf8.c @@ -430,6 +430,11 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, size_t step = 0; uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step); if (unicode != BLI_UTF8_ERR) { + /* TODO: `wchar_t` type is an implementation-defined and may represent + * 16-bit or 32-bit depending on operating system. + * So the ideal would be to do the corresponding encoding. + * But for now just assert that it has no conflicting use. */ + BLI_assert(step <= sizeof(wchar_t)); *dst_w = (wchar_t)unicode; src_c += step; } @@ -451,12 +456,12 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, /* count columns that character/string occupies, based on wcwidth.c */ -int BLI_wcwidth(wchar_t ucs) +int BLI_wcwidth(char32_t ucs) { return mk_wcwidth(ucs); } -int BLI_wcswidth(const wchar_t *pwcs, size_t n) +int BLI_wcswidth(const char32_t *pwcs, size_t n) { return mk_wcswidth(pwcs, n); } @@ -468,7 +473,7 @@ int BLI_str_utf8_char_width(const char *p) return -1; } - return BLI_wcwidth((wchar_t)unicode); + return BLI_wcwidth((char32_t)unicode); } int BLI_str_utf8_char_width_safe(const char *p) @@ -480,7 +485,7 @@ int BLI_str_utf8_char_width_safe(const char *p) return 1; } - columns = BLI_wcwidth((wchar_t)unicode); + columns = BLI_wcwidth((char32_t)unicode); return (columns < 0) ? 1 : columns; } @@ -726,6 +731,88 @@ size_t BLI_str_utf8_from_unicode(uint c, char *outbuf) return len; } +size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, + const char *__restrict src_c, + const size_t maxncpy) +{ + const size_t maxlen = maxncpy - 1; + size_t len = 0; + + BLI_assert(maxncpy != 0); + +#ifdef DEBUG_STRSIZE + memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy); +#endif + + while (*src_c && len != maxlen) { + size_t step = 0; + uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step); + if (unicode != BLI_UTF8_ERR) { + *dst_w = unicode; + src_c += step; + } + else { + *dst_w = '?'; + src_c = BLI_str_find_next_char_utf8(src_c, NULL); + } + dst_w++; + len++; + } + + *dst_w = 0; + + return len; +} + +size_t BLI_str_utf32_as_utf8(char *__restrict dst, + const char32_t *__restrict src, + const size_t maxncpy) +{ + const size_t maxlen = maxncpy - 1; + /* 6 is max utf8 length of an unicode char. */ + const int64_t maxlen_secured = (int64_t)maxlen - 6; + size_t len = 0; + + BLI_assert(maxncpy != 0); + +#ifdef DEBUG_STRSIZE + memset(dst, 0xff, sizeof(*dst) * maxncpy); +#endif + + while (*src && len <= maxlen_secured) { + len += BLI_str_utf8_from_unicode((uint)*src++, dst + len); + } + + /* We have to be more careful for the last six bytes, + * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */ + while (*src) { + char t[6]; + size_t l = BLI_str_utf8_from_unicode((uint)*src++, t); + BLI_assert(l <= 6); + if (len + l > maxlen) { + break; + } + memcpy(dst + len, t, l); + len += l; + } + + dst[len] = '\0'; + + return len; +} + +/* utf32 len in utf8 */ +size_t BLI_str_utf32_as_utf8_len(const char32_t *src) +{ + size_t len = 0; + + while (*src) { + len += BLI_str_utf8_from_unicode((uint)*src++, NULL); + } + + return len; +} + /* was g_utf8_find_prev_char */ /** * BLI_str_find_prev_char_utf8: |