diff options
author | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:17:24 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:21:24 +0300 |
commit | e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch) | |
tree | 8cf3453d12edb177a218ef8009357518ec6cab6a /source/blender/blenlib/intern/string_utf8.c | |
parent | b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff) |
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211.
For details on usage and instructions for migrating branches
without conflicts, see:
https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'source/blender/blenlib/intern/string_utf8.c')
-rw-r--r-- | source/blender/blenlib/intern/string_utf8.c | 1010 |
1 files changed, 533 insertions, 477 deletions
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c index 0c07d1c361a..01412416854 100644 --- a/source/blender/blenlib/intern/string_utf8.c +++ b/source/blender/blenlib/intern/string_utf8.c @@ -32,7 +32,7 @@ #include "BLI_utildefines.h" -#include "BLI_string_utf8.h" /* own include */ +#include "BLI_string_utf8.h" /* own include */ #ifdef __GNUC__ # pragma GCC diagnostic error "-Wsign-conversion" @@ -44,14 +44,14 @@ /* Note: last two values (0xfe and 0xff) are forbidden in utf-8, * so they are considered 1 byte length too. */ static const size_t utf8_skip_data[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, }; /* from libswish3, originally called u8_isvalid(), @@ -70,97 +70,127 @@ static const size_t utf8_skip_data[256] = { */ ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length) { - const unsigned char *p, *perr, *pend = (const unsigned char *)str + length; - unsigned char c; - int ab; - - for (p = (const unsigned char *)str; p < pend; p++, length--) { - c = *p; - perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */ - if (ELEM(c, 0xfe, 0xff, 0x00)) { - /* Those three values are not allowed in utf8 string. */ - goto utf8_error; - } - if (c < 128) { - continue; - } - if ((c & 0xc0) != 0xc0) { - goto utf8_error; - } - - /* Note that since we always increase p (and decrease length) by one byte in main loop, - * we only add/subtract extra utf8 bytes in code below - * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */ - ab = (int)utf8_skip_data[c] - 1; - if (length <= ab) { - goto utf8_error; - } - - /* Check top bits in the second byte */ - p++; - length--; - if ((*p & 0xc0) != 0x80) { - goto utf8_error; - } - - /* Check for overlong sequences for each different length */ - switch (ab) { - case 1: - /* Check for xx00 000x */ - if ((c & 0x3e) == 0) { goto utf8_error; } - continue; /* We know there aren't any more bytes to check */ - - case 2: - /* Check for 1110 0000, xx0x xxxx */ - if (c == 0xe0 && (*p & 0x20) == 0) { goto utf8_error; } - /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn - * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */ - /* From section 5.1 (and 5.2) */ - if (c == 0xed) { - if (*p == 0xa0 && *(p + 1) == 0x80) { goto utf8_error; } - if (*p == 0xad && *(p + 1) == 0xbf) { goto utf8_error; } - if (*p == 0xae && *(p + 1) == 0x80) { goto utf8_error; } - if (*p == 0xaf && *(p + 1) == 0xbf) { goto utf8_error; } - if (*p == 0xb0 && *(p + 1) == 0x80) { goto utf8_error; } - if (*p == 0xbe && *(p + 1) == 0x80) { goto utf8_error; } - if (*p == 0xbf && *(p + 1) == 0xbf) { goto utf8_error; } - } - /* From section 5.3 */ - if (c == 0xef) { - if (*p == 0xbf && *(p + 1) == 0xbe) { goto utf8_error; } - if (*p == 0xbf && *(p + 1) == 0xbf) { goto utf8_error; } - } - break; - - case 3: - /* Check for 1111 0000, xx00 xxxx */ - if (c == 0xf0 && (*p & 0x30) == 0) { goto utf8_error; } - break; - - case 4: - /* Check for 1111 1000, xx00 0xxx */ - if (c == 0xf8 && (*p & 0x38) == 0) { goto utf8_error; } - break; - - case 5: - /* Check for 1111 1100, xx00 00xx */ - if (c == 0xfc && (*p & 0x3c) == 0) { goto utf8_error; } - break; - } - - /* Check for valid bytes after the 2nd, if any; all must start 10 */ - while (--ab > 0) { - p++; - length--; - if ((*p & 0xc0) != 0x80) { goto utf8_error; } - } - } - - return -1; + const unsigned char *p, *perr, *pend = (const unsigned char *)str + length; + unsigned char c; + int ab; + + for (p = (const unsigned char *)str; p < pend; p++, length--) { + c = *p; + perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */ + if (ELEM(c, 0xfe, 0xff, 0x00)) { + /* Those three values are not allowed in utf8 string. */ + goto utf8_error; + } + if (c < 128) { + continue; + } + if ((c & 0xc0) != 0xc0) { + goto utf8_error; + } + + /* Note that since we always increase p (and decrease length) by one byte in main loop, + * we only add/subtract extra utf8 bytes in code below + * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */ + ab = (int)utf8_skip_data[c] - 1; + if (length <= ab) { + goto utf8_error; + } + + /* Check top bits in the second byte */ + p++; + length--; + if ((*p & 0xc0) != 0x80) { + goto utf8_error; + } + + /* Check for overlong sequences for each different length */ + switch (ab) { + case 1: + /* Check for xx00 000x */ + if ((c & 0x3e) == 0) { + goto utf8_error; + } + continue; /* We know there aren't any more bytes to check */ + + case 2: + /* Check for 1110 0000, xx0x xxxx */ + if (c == 0xe0 && (*p & 0x20) == 0) { + goto utf8_error; + } + /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn + * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */ + /* From section 5.1 (and 5.2) */ + if (c == 0xed) { + if (*p == 0xa0 && *(p + 1) == 0x80) { + goto utf8_error; + } + if (*p == 0xad && *(p + 1) == 0xbf) { + goto utf8_error; + } + if (*p == 0xae && *(p + 1) == 0x80) { + goto utf8_error; + } + if (*p == 0xaf && *(p + 1) == 0xbf) { + goto utf8_error; + } + if (*p == 0xb0 && *(p + 1) == 0x80) { + goto utf8_error; + } + if (*p == 0xbe && *(p + 1) == 0x80) { + goto utf8_error; + } + if (*p == 0xbf && *(p + 1) == 0xbf) { + goto utf8_error; + } + } + /* From section 5.3 */ + if (c == 0xef) { + if (*p == 0xbf && *(p + 1) == 0xbe) { + goto utf8_error; + } + if (*p == 0xbf && *(p + 1) == 0xbf) { + goto utf8_error; + } + } + break; + + case 3: + /* Check for 1111 0000, xx00 xxxx */ + if (c == 0xf0 && (*p & 0x30) == 0) { + goto utf8_error; + } + break; + + case 4: + /* Check for 1111 1000, xx00 0xxx */ + if (c == 0xf8 && (*p & 0x38) == 0) { + goto utf8_error; + } + break; + + case 5: + /* Check for 1111 1100, xx00 00xx */ + if (c == 0xfc && (*p & 0x3c) == 0) { + goto utf8_error; + } + break; + } + + /* Check for valid bytes after the 2nd, if any; all must start 10 */ + while (--ab > 0) { + p++; + length--; + if ((*p & 0xc0) != 0x80) { + goto utf8_error; + } + } + } + + return -1; utf8_error: - return ((const char *)perr - (const char *)str); + return ((const char *)perr - (const char *)str); } /** @@ -170,97 +200,108 @@ utf8_error: */ int BLI_utf8_invalid_strip(char *str, size_t length) { - ptrdiff_t bad_char; - int tot = 0; - - BLI_assert(str[length] == '\0'); - - while ((bad_char = BLI_utf8_invalid_byte(str, length)) != -1) { - str += bad_char; - length -= (size_t)(bad_char + 1); - - if (length == 0) { - /* last character bad, strip it */ - *str = '\0'; - tot++; - break; - } - else { - /* strip, keep looking */ - memmove(str, str + 1, length + 1); /* +1 for NULL char! */ - tot++; - } - } - - return tot; + ptrdiff_t bad_char; + int tot = 0; + + BLI_assert(str[length] == '\0'); + + while ((bad_char = BLI_utf8_invalid_byte(str, length)) != -1) { + str += bad_char; + length -= (size_t)(bad_char + 1); + + if (length == 0) { + /* last character bad, strip it */ + *str = '\0'; + tot++; + break; + } + else { + /* strip, keep looking */ + memmove(str, str + 1, length + 1); /* +1 for NULL char! */ + tot++; + } + } + + return tot; } - /* compatible with BLI_strncpy, but esnure no partial utf8 chars */ -#define BLI_STR_UTF8_CPY(dst, src, maxncpy) \ - { \ - size_t utf8_size; \ - while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) {\ - maxncpy -= utf8_size; \ - switch (utf8_size) { \ - case 6: *dst ++ = *src ++; ATTR_FALLTHROUGH; \ - case 5: *dst ++ = *src ++; ATTR_FALLTHROUGH; \ - case 4: *dst ++ = *src ++; ATTR_FALLTHROUGH; \ - case 3: *dst ++ = *src ++; ATTR_FALLTHROUGH; \ - case 2: *dst ++ = *src ++; ATTR_FALLTHROUGH; \ - case 1: *dst ++ = *src ++; \ - } \ - } \ - *dst = '\0'; \ - } (void)0 +#define BLI_STR_UTF8_CPY(dst, src, maxncpy) \ + { \ + size_t utf8_size; \ + while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) { \ + maxncpy -= utf8_size; \ + switch (utf8_size) { \ + case 6: \ + *dst++ = *src++; \ + ATTR_FALLTHROUGH; \ + case 5: \ + *dst++ = *src++; \ + ATTR_FALLTHROUGH; \ + case 4: \ + *dst++ = *src++; \ + ATTR_FALLTHROUGH; \ + case 3: \ + *dst++ = *src++; \ + ATTR_FALLTHROUGH; \ + case 2: \ + *dst++ = *src++; \ + ATTR_FALLTHROUGH; \ + case 1: \ + *dst++ = *src++; \ + } \ + } \ + *dst = '\0'; \ + } \ + (void)0 char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy) { - char *r_dst = dst; + char *r_dst = dst; - BLI_assert(maxncpy != 0); + BLI_assert(maxncpy != 0); #ifdef DEBUG_STRSIZE - memset(dst, 0xff, sizeof(*dst) * maxncpy); + memset(dst, 0xff, sizeof(*dst) * maxncpy); #endif - /* note: currently we don't attempt to deal with invalid utf8 chars */ - BLI_STR_UTF8_CPY(dst, src, maxncpy); + /* note: currently we don't attempt to deal with invalid utf8 chars */ + BLI_STR_UTF8_CPY(dst, src, maxncpy); - return r_dst; + return r_dst; } size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy) { - char *r_dst = dst; + char *r_dst = dst; - BLI_assert(maxncpy != 0); + BLI_assert(maxncpy != 0); #ifdef DEBUG_STRSIZE - memset(dst, 0xff, sizeof(*dst) * maxncpy); + memset(dst, 0xff, sizeof(*dst) * maxncpy); #endif - /* note: currently we don't attempt to deal with invalid utf8 chars */ - BLI_STR_UTF8_CPY(dst, src, maxncpy); + /* note: currently we don't attempt to deal with invalid utf8 chars */ + BLI_STR_UTF8_CPY(dst, src, maxncpy); - return (size_t)(dst - r_dst); + return (size_t)(dst - r_dst); } char *BLI_strncat_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy) { - while (*dst && maxncpy > 0) { - dst++; - maxncpy--; - } + while (*dst && maxncpy > 0) { + dst++; + maxncpy--; + } #ifdef DEBUG_STRSIZE - memset(dst, 0xff, sizeof(*dst) * maxncpy); + memset(dst, 0xff, sizeof(*dst) * maxncpy); #endif - BLI_STR_UTF8_CPY(dst, src, maxncpy); + BLI_STR_UTF8_CPY(dst, src, maxncpy); - return dst; + return dst; } #undef BLI_STR_UTF8_CPY @@ -268,89 +309,91 @@ char *BLI_strncat_utf8(char *__restrict dst, const char *__restrict src, size_t /* --------------------------------------------------------------------------*/ /* wchar_t / utf8 functions */ -size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy) +size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, + const wchar_t *__restrict src, + const size_t maxncpy) { - const size_t maxlen = maxncpy - 1; - /* 6 is max utf8 length of an unicode char. */ - const int64_t maxlen_secured = (int64_t)maxlen - 6; - size_t len = 0; + const size_t maxlen = maxncpy - 1; + /* 6 is max utf8 length of an unicode char. */ + const int64_t maxlen_secured = (int64_t)maxlen - 6; + size_t len = 0; - BLI_assert(maxncpy != 0); + BLI_assert(maxncpy != 0); #ifdef DEBUG_STRSIZE - memset(dst, 0xff, sizeof(*dst) * maxncpy); + memset(dst, 0xff, sizeof(*dst) * maxncpy); #endif - while (*src && len <= maxlen_secured) { - len += BLI_str_utf8_from_unicode((uint)*src++, dst + len); - } - - /* We have to be more careful for the last six bytes, - * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */ - while (*src) { - char t[6]; - size_t l = BLI_str_utf8_from_unicode((uint)*src++, t); - BLI_assert(l <= 6); - if (len + l > maxlen) { - break; - } - memcpy(dst + len, t, l); - len += l; - } - - dst[len] = '\0'; - - return len; + while (*src && len <= maxlen_secured) { + len += BLI_str_utf8_from_unicode((uint)*src++, dst + len); + } + + /* We have to be more careful for the last six bytes, + * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */ + while (*src) { + char t[6]; + size_t l = BLI_str_utf8_from_unicode((uint)*src++, t); + BLI_assert(l <= 6); + if (len + l > maxlen) { + break; + } + memcpy(dst + len, t, l); + len += l; + } + + dst[len] = '\0'; + + return len; } /* wchar len in utf8 */ size_t BLI_wstrlen_utf8(const wchar_t *src) { - size_t len = 0; + size_t len = 0; - while (*src) { - len += BLI_str_utf8_from_unicode((uint)*src++, NULL); - } + while (*src) { + len += BLI_str_utf8_from_unicode((uint)*src++, NULL); + } - return len; + return len; } size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes) { - size_t len; - const char *strc_orig = strc; + size_t len; + const char *strc_orig = strc; - for (len = 0; *strc; len++) { - strc += BLI_str_utf8_size_safe(strc); - } + for (len = 0; *strc; len++) { + strc += BLI_str_utf8_size_safe(strc); + } - *r_len_bytes = (size_t)(strc - strc_orig); - return len; + *r_len_bytes = (size_t)(strc - strc_orig); + return len; } size_t BLI_strlen_utf8(const char *strc) { - size_t len; + size_t len; - for (len = 0; *strc; len++) { - strc += BLI_str_utf8_size_safe(strc); - } + for (len = 0; *strc; len++) { + strc += BLI_str_utf8_size_safe(strc); + } - return len; + return len; } size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes) { - size_t len; - const char *strc_orig = strc; - const char *strc_end = strc + maxlen; + size_t len; + const char *strc_orig = strc; + const char *strc_end = strc + maxlen; - for (len = 0; *strc && strc < strc_end; len++) { - strc += BLI_str_utf8_size_safe(strc); - } + for (len = 0; *strc && strc < strc_end; len++) { + strc += BLI_str_utf8_size_safe(strc); + } - *r_len_bytes = (size_t)(strc - strc_orig); - return len; + *r_len_bytes = (size_t)(strc - strc_orig); + return len; } /** @@ -360,45 +403,47 @@ size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_ */ size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen) { - size_t len; - const char *strc_end = strc + maxlen; + size_t len; + const char *strc_end = strc + maxlen; - for (len = 0; *strc && strc < strc_end; len++) { - strc += BLI_str_utf8_size_safe(strc); - } + for (len = 0; *strc && strc < strc_end; len++) { + strc += BLI_str_utf8_size_safe(strc); + } - return len; + return len; } -size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy) +size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, + const char *__restrict src_c, + const size_t maxncpy) { - const size_t maxlen = maxncpy - 1; - size_t len = 0; + const size_t maxlen = maxncpy - 1; + size_t len = 0; - BLI_assert(maxncpy != 0); + BLI_assert(maxncpy != 0); #ifdef DEBUG_STRSIZE - memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy); + memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy); #endif - while (*src_c && len != maxlen) { - size_t step = 0; - uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step); - if (unicode != BLI_UTF8_ERR) { - *dst_w = (wchar_t)unicode; - src_c += step; - } - else { - *dst_w = '?'; - src_c = BLI_str_find_next_char_utf8(src_c, NULL); - } - dst_w++; - len++; - } - - *dst_w = 0; - - return len; + while (*src_c && len != maxlen) { + size_t step = 0; + uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step); + if (unicode != BLI_UTF8_ERR) { + *dst_w = (wchar_t)unicode; + src_c += step; + } + else { + *dst_w = '?'; + src_c = BLI_str_find_next_char_utf8(src_c, NULL); + } + dst_w++; + len++; + } + + *dst_w = 0; + + return len; } /* end wchar_t / utf8 functions */ @@ -408,36 +453,36 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__rest int BLI_wcwidth(wchar_t ucs) { - return mk_wcwidth(ucs); + return mk_wcwidth(ucs); } int BLI_wcswidth(const wchar_t *pwcs, size_t n) { - return mk_wcswidth(pwcs, n); + return mk_wcswidth(pwcs, n); } int BLI_str_utf8_char_width(const char *p) { - uint unicode = BLI_str_utf8_as_unicode(p); - if (unicode == BLI_UTF8_ERR) { - return -1; - } + uint unicode = BLI_str_utf8_as_unicode(p); + if (unicode == BLI_UTF8_ERR) { + return -1; + } - return BLI_wcwidth((wchar_t)unicode); + return BLI_wcwidth((wchar_t)unicode); } int BLI_str_utf8_char_width_safe(const char *p) { - int columns; + int columns; - uint unicode = BLI_str_utf8_as_unicode(p); - if (unicode == BLI_UTF8_ERR) { - return 1; - } + uint unicode = BLI_str_utf8_as_unicode(p); + if (unicode == BLI_UTF8_ERR) { + return 1; + } - columns = BLI_wcwidth((wchar_t)unicode); + columns = BLI_wcwidth((wchar_t)unicode); - return (columns < 0) ? 1 : columns; + return (columns < 0) ? 1 : columns; } /* --------------------------------------------------------------------------*/ @@ -447,73 +492,74 @@ int BLI_str_utf8_char_width_safe(const char *p) /* note, glib uses uint for unicode, best we do the same, * though we don't typedef it - campbell */ -#define UTF8_COMPUTE(Char, Mask, Len, Err) \ - if (Char < 128) { \ - Len = 1; \ - Mask = 0x7f; \ - } \ - else if ((Char & 0xe0) == 0xc0) { \ - Len = 2; \ - Mask = 0x1f; \ - } \ - else if ((Char & 0xf0) == 0xe0) { \ - Len = 3; \ - Mask = 0x0f; \ - } \ - else if ((Char & 0xf8) == 0xf0) { \ - Len = 4; \ - Mask = 0x07; \ - } \ - else if ((Char & 0xfc) == 0xf8) { \ - Len = 5; \ - Mask = 0x03; \ - } \ - else if ((Char & 0xfe) == 0xfc) { \ - Len = 6; \ - Mask = 0x01; \ - } \ - else { \ - Len = Err; /* -1 is the typical error value or 1 to skip */ \ - } (void)0 +#define UTF8_COMPUTE(Char, Mask, Len, Err) \ + if (Char < 128) { \ + Len = 1; \ + Mask = 0x7f; \ + } \ + else if ((Char & 0xe0) == 0xc0) { \ + Len = 2; \ + Mask = 0x1f; \ + } \ + else if ((Char & 0xf0) == 0xe0) { \ + Len = 3; \ + Mask = 0x0f; \ + } \ + else if ((Char & 0xf8) == 0xf0) { \ + Len = 4; \ + Mask = 0x07; \ + } \ + else if ((Char & 0xfc) == 0xf8) { \ + Len = 5; \ + Mask = 0x03; \ + } \ + else if ((Char & 0xfe) == 0xfc) { \ + Len = 6; \ + Mask = 0x01; \ + } \ + else { \ + Len = Err; /* -1 is the typical error value or 1 to skip */ \ + } \ + (void)0 /* same as glib define but added an 'Err' arg */ -#define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \ - (Result) = (Chars)[0] & (Mask); \ - for ((Count) = 1; (Count) < (Len); ++(Count)) { \ - if (((Chars)[(Count)] & 0xc0) != 0x80) { \ - (Result) = Err; \ - break; \ - } \ - (Result) <<= 6; \ - (Result) |= ((Chars)[(Count)] & 0x3f); \ - } (void)0 - +#define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \ + (Result) = (Chars)[0] & (Mask); \ + for ((Count) = 1; (Count) < (Len); ++(Count)) { \ + if (((Chars)[(Count)] & 0xc0) != 0x80) { \ + (Result) = Err; \ + break; \ + } \ + (Result) <<= 6; \ + (Result) |= ((Chars)[(Count)] & 0x3f); \ + } \ + (void)0 /* uses glib functions but not from glib */ /* gets the size of a single utf8 char */ int BLI_str_utf8_size(const char *p) { - int mask = 0, len; - const unsigned char c = (unsigned char) *p; + int mask = 0, len; + const unsigned char c = (unsigned char)*p; - UTF8_COMPUTE(c, mask, len, -1); + UTF8_COMPUTE(c, mask, len, -1); - (void)mask; /* quiet warning */ + (void)mask; /* quiet warning */ - return len; + return len; } /* use when we want to skip errors */ int BLI_str_utf8_size_safe(const char *p) { - int mask = 0, len; - const unsigned char c = (unsigned char) *p; + int mask = 0, len; + const unsigned char c = (unsigned char)*p; - UTF8_COMPUTE(c, mask, len, 1); + UTF8_COMPUTE(c, mask, len, 1); - (void)mask; /* quiet warning */ + (void)mask; /* quiet warning */ - return len; + return len; } /* was g_utf8_get_char */ @@ -531,98 +577,98 @@ int BLI_str_utf8_size_safe(const char *p) */ uint BLI_str_utf8_as_unicode(const char *p) { - int i, len; - uint mask = 0; - uint result; - const unsigned char c = (unsigned char) *p; - - UTF8_COMPUTE(c, mask, len, -1); - if (UNLIKELY(len == -1)) { - return BLI_UTF8_ERR; - } - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); - - return result; + int i, len; + uint mask = 0; + uint result; + const unsigned char c = (unsigned char)*p; + + UTF8_COMPUTE(c, mask, len, -1); + if (UNLIKELY(len == -1)) { + return BLI_UTF8_ERR; + } + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + + return result; } /* variant that increments the length */ uint BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *__restrict index) { - int i, len; - unsigned mask = 0; - uint result; - const unsigned char c = (unsigned char) *p; - - UTF8_COMPUTE(c, mask, len, -1); - if (UNLIKELY(len == -1)) { - return BLI_UTF8_ERR; - } - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); - *index += (size_t)len; - return result; + int i, len; + unsigned mask = 0; + uint result; + const unsigned char c = (unsigned char)*p; + + UTF8_COMPUTE(c, mask, len, -1); + if (UNLIKELY(len == -1)) { + return BLI_UTF8_ERR; + } + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + *index += (size_t)len; + return result; } uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__restrict index) { - int i, len; - uint mask = 0; - uint result; - const unsigned char c = (unsigned char) *p; - - UTF8_COMPUTE(c, mask, len, -1); - if (UNLIKELY(len == -1)) { - *index += 1; - return c; - } - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); - *index += (size_t)len; - return result; + int i, len; + uint mask = 0; + uint result; + const unsigned char c = (unsigned char)*p; + + UTF8_COMPUTE(c, mask, len, -1); + if (UNLIKELY(len == -1)) { + *index += 1; + return c; + } + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + *index += (size_t)len; + return result; } /* another variant that steps over the index, * note, currently this also falls back to latin1 for text drawing. */ uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index) { - int i, len; - uint mask = 0; - uint result; - unsigned char c; - - p += *index; - c = (unsigned char) *p; - - UTF8_COMPUTE(c, mask, len, -1); - if (UNLIKELY(len == -1)) { - /* when called with NULL end, result will never be NULL, - * checks for a NULL character */ - const char *p_next = BLI_str_find_next_char_utf8(p, NULL); - /* will never return the same pointer unless '\0', - * eternal loop is prevented */ - *index += (size_t)(p_next - p); - return BLI_UTF8_ERR; - } - - /* this is tricky since there are a few ways we can bail out of bad unicode - * values, 3 possible solutions. */ + int i, len; + uint mask = 0; + uint result; + unsigned char c; + + p += *index; + c = (unsigned char)*p; + + UTF8_COMPUTE(c, mask, len, -1); + if (UNLIKELY(len == -1)) { + /* when called with NULL end, result will never be NULL, + * checks for a NULL character */ + const char *p_next = BLI_str_find_next_char_utf8(p, NULL); + /* will never return the same pointer unless '\0', + * eternal loop is prevented */ + *index += (size_t)(p_next - p); + return BLI_UTF8_ERR; + } + + /* this is tricky since there are a few ways we can bail out of bad unicode + * values, 3 possible solutions. */ #if 0 - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); #elif 1 - /* WARNING: this is NOT part of glib, or supported by similar functions. - * this is added for text drawing because some filepaths can have latin1 - * characters */ - UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); - if (result == BLI_UTF8_ERR) { - len = 1; - result = *p; - } - /* end warning! */ + /* WARNING: this is NOT part of glib, or supported by similar functions. + * this is added for text drawing because some filepaths can have latin1 + * characters */ + UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); + if (result == BLI_UTF8_ERR) { + len = 1; + result = *p; + } + /* end warning! */ #else - /* without a fallback like '?', text drawing will stop on this value */ - UTF8_GET(result, p, i, mask, len, '?'); + /* without a fallback like '?', text drawing will stop on this value */ + UTF8_GET(result, p, i, mask, len, '?'); #endif - *index += (size_t)len; - return result; + *index += (size_t)len; + return result; } /* was g_unichar_to_utf8 */ @@ -639,45 +685,45 @@ uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict i */ size_t BLI_str_utf8_from_unicode(uint c, char *outbuf) { - /* If this gets modified, also update the copy in g_string_insert_unichar() */ - uint len = 0; - uint first; - uint i; - - if (c < 0x80) { - first = 0; - len = 1; - } - else if (c < 0x800) { - first = 0xc0; - len = 2; - } - else if (c < 0x10000) { - first = 0xe0; - len = 3; - } - else if (c < 0x200000) { - first = 0xf0; - len = 4; - } - else if (c < 0x4000000) { - first = 0xf8; - len = 5; - } - else { - first = 0xfc; - len = 6; - } - - if (outbuf) { - for (i = len - 1; i > 0; --i) { - outbuf[i] = (c & 0x3f) | 0x80; - c >>= 6; - } - outbuf[0] = c | first; - } - - return len; + /* If this gets modified, also update the copy in g_string_insert_unichar() */ + uint len = 0; + uint first; + uint i; + + if (c < 0x80) { + first = 0; + len = 1; + } + else if (c < 0x800) { + first = 0xc0; + len = 2; + } + else if (c < 0x10000) { + first = 0xe0; + len = 3; + } + else if (c < 0x200000) { + first = 0xf0; + len = 4; + } + else if (c < 0x4000000) { + first = 0xf8; + len = 5; + } + else { + first = 0xfc; + len = 6; + } + + if (outbuf) { + for (i = len - 1; i > 0; --i) { + outbuf[i] = (c & 0x3f) | 0x80; + c >>= 6; + } + outbuf[0] = c | first; + } + + return len; } /* was g_utf8_find_prev_char */ @@ -698,12 +744,12 @@ size_t BLI_str_utf8_from_unicode(uint c, char *outbuf) */ char *BLI_str_find_prev_char_utf8(const char *str, const char *p) { - for (--p; p >= str; --p) { - if ((*p & 0xc0) != 0x80) { - return (char *)p; - } - } - return NULL; + for (--p; p >= str; --p) { + if ((*p & 0xc0) != 0x80) { + return (char *)p; + } + } + return NULL; } /* was g_utf8_find_next_char */ @@ -723,19 +769,19 @@ char *BLI_str_find_prev_char_utf8(const char *str, const char *p) */ char *BLI_str_find_next_char_utf8(const char *p, const char *end) { - if (*p) { - if (end) { - for (++p; p < end && (*p & 0xc0) == 0x80; ++p) { - /* do nothing */ - } - } - else { - for (++p; (*p & 0xc0) == 0x80; ++p) { - /* do nothing */ - } - } - } - return (p == end) ? NULL : (char *)p; + if (*p) { + if (end) { + for (++p; p < end && (*p & 0xc0) == 0x80; ++p) { + /* do nothing */ + } + } + else { + for (++p; (*p & 0xc0) == 0x80; ++p) { + /* do nothing */ + } + } + } + return (p == end) ? NULL : (char *)p; } /* was g_utf8_prev_char */ @@ -754,61 +800,71 @@ char *BLI_str_find_next_char_utf8(const char *p, const char *end) */ char *BLI_str_prev_char_utf8(const char *p) { - while (1) { - p--; - if ((*p & 0xc0) != 0x80) { - return (char *)p; - } - } + while (1) { + p--; + if ((*p & 0xc0) != 0x80) { + return (char *)p; + } + } } /* end glib copy */ -size_t BLI_str_partition_utf8(const char *str, const uint delim[], const char **sep, const char **suf) +size_t BLI_str_partition_utf8(const char *str, + const uint delim[], + const char **sep, + const char **suf) { - return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false); + return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false); } -size_t BLI_str_rpartition_utf8(const char *str, const uint delim[], const char **sep, const char **suf) +size_t BLI_str_rpartition_utf8(const char *str, + const uint delim[], + const char **sep, + const char **suf) { - return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true); + return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true); } -size_t BLI_str_partition_ex_utf8( - const char *str, const char *end, const uint delim[], const char **sep, const char **suf, const bool from_right) +size_t BLI_str_partition_ex_utf8(const char *str, + const char *end, + const uint delim[], + const char **sep, + const char **suf, + const bool from_right) { - const uint *d; - const size_t str_len = end ? (size_t)(end - str) : strlen(str); - size_t index; - - /* Note that here, we assume end points to a valid utf8 char! */ - BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR))); - - *suf = (char *)(str + str_len); - - for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str), index = 0; - *sep >= str && (!end || *sep < end) && **sep != '\0'; - *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index)) - { - const uint c = BLI_str_utf8_as_unicode_and_size(*sep, &index); - - if (c == BLI_UTF8_ERR) { - *suf = *sep = NULL; - break; - } - - for (d = delim; *d != '\0'; ++d) { - if (*d == c) { - /* *suf is already correct in case from_right is true. */ - if (!from_right) { - *suf = (char *)(str + index); - } - return (size_t)(*sep - str); - } - } - - *suf = *sep; /* Useful in 'from_right' case! */ - } - - *suf = *sep = NULL; - return str_len; + const uint *d; + const size_t str_len = end ? (size_t)(end - str) : strlen(str); + size_t index; + + /* Note that here, we assume end points to a valid utf8 char! */ + BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR))); + + *suf = (char *)(str + str_len); + + for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str), + index = 0; + *sep >= str && (!end || *sep < end) && **sep != '\0'; + *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index)) { + const uint c = BLI_str_utf8_as_unicode_and_size(*sep, &index); + + if (c == BLI_UTF8_ERR) { + *suf = *sep = NULL; + break; + } + + for (d = delim; *d != '\0'; ++d) { + if (*d == c) { + /* *suf is already correct in case from_right is true. */ + if (!from_right) { + *suf = (char *)(str + index); + } + return (size_t)(*sep - str); + } + } + + *suf = *sep; /* Useful in 'from_right' case! */ + } + + *suf = *sep = NULL; + return str_len; } |