diff options
Diffstat (limited to 'source/blender/blenlib/intern/string_utf8.c')
-rw-r--r-- | source/blender/blenlib/intern/string_utf8.c | 64 |
1 files changed, 38 insertions, 26 deletions
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c index 12ada9b6fd9..0c07d1c361a 100644 --- a/source/blender/blenlib/intern/string_utf8.c +++ b/source/blender/blenlib/intern/string_utf8.c @@ -77,12 +77,16 @@ ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length) for (p = (const unsigned char *)str; p < pend; p++, length--) { c = *p; perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */ - if (ELEM(c, 0xfe, 0xff, 0x00)) /* Those three values are not allowed in utf8 string. */ + if (ELEM(c, 0xfe, 0xff, 0x00)) { + /* Those three values are not allowed in utf8 string. */ goto utf8_error; - if (c < 128) + } + if (c < 128) { continue; - if ((c & 0xc0) != 0xc0) + } + if ((c & 0xc0) != 0xc0) { goto utf8_error; + } /* Note that since we always increase p (and decrease length) by one byte in main loop, * we only add/subtract extra utf8 bytes in code below @@ -95,51 +99,52 @@ ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length) /* Check top bits in the second byte */ p++; length--; - if ((*p & 0xc0) != 0x80) + if ((*p & 0xc0) != 0x80) { goto utf8_error; + } /* Check for overlong sequences for each different length */ switch (ab) { case 1: /* Check for xx00 000x */ - if ((c & 0x3e) == 0) goto utf8_error; + if ((c & 0x3e) == 0) { goto utf8_error; } continue; /* We know there aren't any more bytes to check */ case 2: /* Check for 1110 0000, xx0x xxxx */ - if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error; + if (c == 0xe0 && (*p & 0x20) == 0) { goto utf8_error; } /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */ /* From section 5.1 (and 5.2) */ if (c == 0xed) { - if (*p == 0xa0 && *(p + 1) == 0x80) goto utf8_error; - if (*p == 0xad && *(p + 1) == 0xbf) goto utf8_error; - if (*p == 0xae && *(p + 1) == 0x80) goto utf8_error; - if (*p == 0xaf && *(p + 1) == 0xbf) goto utf8_error; - if (*p == 0xb0 && *(p + 1) == 0x80) goto utf8_error; - if (*p == 0xbe && *(p + 1) == 0x80) goto utf8_error; - if (*p == 0xbf && *(p + 1) == 0xbf) goto utf8_error; + if (*p == 0xa0 && *(p + 1) == 0x80) { goto utf8_error; } + if (*p == 0xad && *(p + 1) == 0xbf) { goto utf8_error; } + if (*p == 0xae && *(p + 1) == 0x80) { goto utf8_error; } + if (*p == 0xaf && *(p + 1) == 0xbf) { goto utf8_error; } + if (*p == 0xb0 && *(p + 1) == 0x80) { goto utf8_error; } + if (*p == 0xbe && *(p + 1) == 0x80) { goto utf8_error; } + if (*p == 0xbf && *(p + 1) == 0xbf) { goto utf8_error; } } /* From section 5.3 */ if (c == 0xef) { - if (*p == 0xbf && *(p + 1) == 0xbe) goto utf8_error; - if (*p == 0xbf && *(p + 1) == 0xbf) goto utf8_error; + if (*p == 0xbf && *(p + 1) == 0xbe) { goto utf8_error; } + if (*p == 0xbf && *(p + 1) == 0xbf) { goto utf8_error; } } break; case 3: /* Check for 1111 0000, xx00 xxxx */ - if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error; + if (c == 0xf0 && (*p & 0x30) == 0) { goto utf8_error; } break; case 4: /* Check for 1111 1000, xx00 0xxx */ - if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error; + if (c == 0xf8 && (*p & 0x38) == 0) { goto utf8_error; } break; case 5: /* Check for 1111 1100, xx00 00xx */ - if (c == 0xfc && (*p & 0x3c) == 0) goto utf8_error; + if (c == 0xfc && (*p & 0x3c) == 0) { goto utf8_error; } break; } @@ -147,7 +152,7 @@ ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length) while (--ab > 0) { p++; length--; - if ((*p & 0xc0) != 0x80) goto utf8_error; + if ((*p & 0xc0) != 0x80) { goto utf8_error; } } } @@ -315,8 +320,9 @@ size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes) size_t len; const char *strc_orig = strc; - for (len = 0; *strc; len++) + for (len = 0; *strc; len++) { strc += BLI_str_utf8_size_safe(strc); + } *r_len_bytes = (size_t)(strc - strc_orig); return len; @@ -326,8 +332,9 @@ size_t BLI_strlen_utf8(const char *strc) { size_t len; - for (len = 0; *strc; len++) + for (len = 0; *strc; len++) { strc += BLI_str_utf8_size_safe(strc); + } return len; } @@ -412,8 +419,9 @@ int BLI_wcswidth(const wchar_t *pwcs, size_t n) int BLI_str_utf8_char_width(const char *p) { uint unicode = BLI_str_utf8_as_unicode(p); - if (unicode == BLI_UTF8_ERR) + if (unicode == BLI_UTF8_ERR) { return -1; + } return BLI_wcwidth((wchar_t)unicode); } @@ -423,8 +431,9 @@ int BLI_str_utf8_char_width_safe(const char *p) int columns; uint unicode = BLI_str_utf8_as_unicode(p); - if (unicode == BLI_UTF8_ERR) + if (unicode == BLI_UTF8_ERR) { return 1; + } columns = BLI_wcwidth((wchar_t)unicode); @@ -528,8 +537,9 @@ uint BLI_str_utf8_as_unicode(const char *p) const unsigned char c = (unsigned char) *p; UTF8_COMPUTE(c, mask, len, -1); - if (UNLIKELY(len == -1)) + if (UNLIKELY(len == -1)) { return BLI_UTF8_ERR; + } UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); return result; @@ -544,8 +554,9 @@ uint BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *__restri const unsigned char c = (unsigned char) *p; UTF8_COMPUTE(c, mask, len, -1); - if (UNLIKELY(len == -1)) + if (UNLIKELY(len == -1)) { return BLI_UTF8_ERR; + } UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR); *index += (size_t)len; return result; @@ -788,8 +799,9 @@ size_t BLI_str_partition_ex_utf8( for (d = delim; *d != '\0'; ++d) { if (*d == c) { /* *suf is already correct in case from_right is true. */ - if (!from_right) + if (!from_right) { *suf = (char *)(str + index); + } return (size_t)(*sep - str); } } |