Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/blenlib/intern/string_utf8.c')
-rw-r--r--source/blender/blenlib/intern/string_utf8.c1010
1 files changed, 533 insertions, 477 deletions
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index 0c07d1c361a..01412416854 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -32,7 +32,7 @@
#include "BLI_utildefines.h"
-#include "BLI_string_utf8.h" /* own include */
+#include "BLI_string_utf8.h" /* own include */
#ifdef __GNUC__
# pragma GCC diagnostic error "-Wsign-conversion"
@@ -44,14 +44,14 @@
/* Note: last two values (0xfe and 0xff) are forbidden in utf-8,
* so they are considered 1 byte length too. */
static const size_t utf8_skip_data[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
};
/* from libswish3, originally called u8_isvalid(),
@@ -70,97 +70,127 @@ static const size_t utf8_skip_data[256] = {
*/
ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length)
{
- const unsigned char *p, *perr, *pend = (const unsigned char *)str + length;
- unsigned char c;
- int ab;
-
- for (p = (const unsigned char *)str; p < pend; p++, length--) {
- c = *p;
- perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */
- if (ELEM(c, 0xfe, 0xff, 0x00)) {
- /* Those three values are not allowed in utf8 string. */
- goto utf8_error;
- }
- if (c < 128) {
- continue;
- }
- if ((c & 0xc0) != 0xc0) {
- goto utf8_error;
- }
-
- /* Note that since we always increase p (and decrease length) by one byte in main loop,
- * we only add/subtract extra utf8 bytes in code below
- * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */
- ab = (int)utf8_skip_data[c] - 1;
- if (length <= ab) {
- goto utf8_error;
- }
-
- /* Check top bits in the second byte */
- p++;
- length--;
- if ((*p & 0xc0) != 0x80) {
- goto utf8_error;
- }
-
- /* Check for overlong sequences for each different length */
- switch (ab) {
- case 1:
- /* Check for xx00 000x */
- if ((c & 0x3e) == 0) { goto utf8_error; }
- continue; /* We know there aren't any more bytes to check */
-
- case 2:
- /* Check for 1110 0000, xx0x xxxx */
- if (c == 0xe0 && (*p & 0x20) == 0) { goto utf8_error; }
- /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn
- * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */
- /* From section 5.1 (and 5.2) */
- if (c == 0xed) {
- if (*p == 0xa0 && *(p + 1) == 0x80) { goto utf8_error; }
- if (*p == 0xad && *(p + 1) == 0xbf) { goto utf8_error; }
- if (*p == 0xae && *(p + 1) == 0x80) { goto utf8_error; }
- if (*p == 0xaf && *(p + 1) == 0xbf) { goto utf8_error; }
- if (*p == 0xb0 && *(p + 1) == 0x80) { goto utf8_error; }
- if (*p == 0xbe && *(p + 1) == 0x80) { goto utf8_error; }
- if (*p == 0xbf && *(p + 1) == 0xbf) { goto utf8_error; }
- }
- /* From section 5.3 */
- if (c == 0xef) {
- if (*p == 0xbf && *(p + 1) == 0xbe) { goto utf8_error; }
- if (*p == 0xbf && *(p + 1) == 0xbf) { goto utf8_error; }
- }
- break;
-
- case 3:
- /* Check for 1111 0000, xx00 xxxx */
- if (c == 0xf0 && (*p & 0x30) == 0) { goto utf8_error; }
- break;
-
- case 4:
- /* Check for 1111 1000, xx00 0xxx */
- if (c == 0xf8 && (*p & 0x38) == 0) { goto utf8_error; }
- break;
-
- case 5:
- /* Check for 1111 1100, xx00 00xx */
- if (c == 0xfc && (*p & 0x3c) == 0) { goto utf8_error; }
- break;
- }
-
- /* Check for valid bytes after the 2nd, if any; all must start 10 */
- while (--ab > 0) {
- p++;
- length--;
- if ((*p & 0xc0) != 0x80) { goto utf8_error; }
- }
- }
-
- return -1;
+ const unsigned char *p, *perr, *pend = (const unsigned char *)str + length;
+ unsigned char c;
+ int ab;
+
+ for (p = (const unsigned char *)str; p < pend; p++, length--) {
+ c = *p;
+ perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */
+ if (ELEM(c, 0xfe, 0xff, 0x00)) {
+ /* Those three values are not allowed in utf8 string. */
+ goto utf8_error;
+ }
+ if (c < 128) {
+ continue;
+ }
+ if ((c & 0xc0) != 0xc0) {
+ goto utf8_error;
+ }
+
+ /* Note that since we always increase p (and decrease length) by one byte in main loop,
+ * we only add/subtract extra utf8 bytes in code below
+ * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */
+ ab = (int)utf8_skip_data[c] - 1;
+ if (length <= ab) {
+ goto utf8_error;
+ }
+
+ /* Check top bits in the second byte */
+ p++;
+ length--;
+ if ((*p & 0xc0) != 0x80) {
+ goto utf8_error;
+ }
+
+ /* Check for overlong sequences for each different length */
+ switch (ab) {
+ case 1:
+ /* Check for xx00 000x */
+ if ((c & 0x3e) == 0) {
+ goto utf8_error;
+ }
+ continue; /* We know there aren't any more bytes to check */
+
+ case 2:
+ /* Check for 1110 0000, xx0x xxxx */
+ if (c == 0xe0 && (*p & 0x20) == 0) {
+ goto utf8_error;
+ }
+ /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn
+ * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */
+ /* From section 5.1 (and 5.2) */
+ if (c == 0xed) {
+ if (*p == 0xa0 && *(p + 1) == 0x80) {
+ goto utf8_error;
+ }
+ if (*p == 0xad && *(p + 1) == 0xbf) {
+ goto utf8_error;
+ }
+ if (*p == 0xae && *(p + 1) == 0x80) {
+ goto utf8_error;
+ }
+ if (*p == 0xaf && *(p + 1) == 0xbf) {
+ goto utf8_error;
+ }
+ if (*p == 0xb0 && *(p + 1) == 0x80) {
+ goto utf8_error;
+ }
+ if (*p == 0xbe && *(p + 1) == 0x80) {
+ goto utf8_error;
+ }
+ if (*p == 0xbf && *(p + 1) == 0xbf) {
+ goto utf8_error;
+ }
+ }
+ /* From section 5.3 */
+ if (c == 0xef) {
+ if (*p == 0xbf && *(p + 1) == 0xbe) {
+ goto utf8_error;
+ }
+ if (*p == 0xbf && *(p + 1) == 0xbf) {
+ goto utf8_error;
+ }
+ }
+ break;
+
+ case 3:
+ /* Check for 1111 0000, xx00 xxxx */
+ if (c == 0xf0 && (*p & 0x30) == 0) {
+ goto utf8_error;
+ }
+ break;
+
+ case 4:
+ /* Check for 1111 1000, xx00 0xxx */
+ if (c == 0xf8 && (*p & 0x38) == 0) {
+ goto utf8_error;
+ }
+ break;
+
+ case 5:
+ /* Check for 1111 1100, xx00 00xx */
+ if (c == 0xfc && (*p & 0x3c) == 0) {
+ goto utf8_error;
+ }
+ break;
+ }
+
+ /* Check for valid bytes after the 2nd, if any; all must start 10 */
+ while (--ab > 0) {
+ p++;
+ length--;
+ if ((*p & 0xc0) != 0x80) {
+ goto utf8_error;
+ }
+ }
+ }
+
+ return -1;
utf8_error:
- return ((const char *)perr - (const char *)str);
+ return ((const char *)perr - (const char *)str);
}
/**
@@ -170,97 +200,108 @@ utf8_error:
*/
int BLI_utf8_invalid_strip(char *str, size_t length)
{
- ptrdiff_t bad_char;
- int tot = 0;
-
- BLI_assert(str[length] == '\0');
-
- while ((bad_char = BLI_utf8_invalid_byte(str, length)) != -1) {
- str += bad_char;
- length -= (size_t)(bad_char + 1);
-
- if (length == 0) {
- /* last character bad, strip it */
- *str = '\0';
- tot++;
- break;
- }
- else {
- /* strip, keep looking */
- memmove(str, str + 1, length + 1); /* +1 for NULL char! */
- tot++;
- }
- }
-
- return tot;
+ ptrdiff_t bad_char;
+ int tot = 0;
+
+ BLI_assert(str[length] == '\0');
+
+ while ((bad_char = BLI_utf8_invalid_byte(str, length)) != -1) {
+ str += bad_char;
+ length -= (size_t)(bad_char + 1);
+
+ if (length == 0) {
+ /* last character bad, strip it */
+ *str = '\0';
+ tot++;
+ break;
+ }
+ else {
+ /* strip, keep looking */
+ memmove(str, str + 1, length + 1); /* +1 for NULL char! */
+ tot++;
+ }
+ }
+
+ return tot;
}
-
/* compatible with BLI_strncpy, but esnure no partial utf8 chars */
-#define BLI_STR_UTF8_CPY(dst, src, maxncpy) \
- { \
- size_t utf8_size; \
- while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) {\
- maxncpy -= utf8_size; \
- switch (utf8_size) { \
- case 6: *dst ++ = *src ++; ATTR_FALLTHROUGH; \
- case 5: *dst ++ = *src ++; ATTR_FALLTHROUGH; \
- case 4: *dst ++ = *src ++; ATTR_FALLTHROUGH; \
- case 3: *dst ++ = *src ++; ATTR_FALLTHROUGH; \
- case 2: *dst ++ = *src ++; ATTR_FALLTHROUGH; \
- case 1: *dst ++ = *src ++; \
- } \
- } \
- *dst = '\0'; \
- } (void)0
+#define BLI_STR_UTF8_CPY(dst, src, maxncpy) \
+ { \
+ size_t utf8_size; \
+ while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) { \
+ maxncpy -= utf8_size; \
+ switch (utf8_size) { \
+ case 6: \
+ *dst++ = *src++; \
+ ATTR_FALLTHROUGH; \
+ case 5: \
+ *dst++ = *src++; \
+ ATTR_FALLTHROUGH; \
+ case 4: \
+ *dst++ = *src++; \
+ ATTR_FALLTHROUGH; \
+ case 3: \
+ *dst++ = *src++; \
+ ATTR_FALLTHROUGH; \
+ case 2: \
+ *dst++ = *src++; \
+ ATTR_FALLTHROUGH; \
+ case 1: \
+ *dst++ = *src++; \
+ } \
+ } \
+ *dst = '\0'; \
+ } \
+ (void)0
char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
{
- char *r_dst = dst;
+ char *r_dst = dst;
- BLI_assert(maxncpy != 0);
+ BLI_assert(maxncpy != 0);
#ifdef DEBUG_STRSIZE
- memset(dst, 0xff, sizeof(*dst) * maxncpy);
+ memset(dst, 0xff, sizeof(*dst) * maxncpy);
#endif
- /* note: currently we don't attempt to deal with invalid utf8 chars */
- BLI_STR_UTF8_CPY(dst, src, maxncpy);
+ /* note: currently we don't attempt to deal with invalid utf8 chars */
+ BLI_STR_UTF8_CPY(dst, src, maxncpy);
- return r_dst;
+ return r_dst;
}
size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy)
{
- char *r_dst = dst;
+ char *r_dst = dst;
- BLI_assert(maxncpy != 0);
+ BLI_assert(maxncpy != 0);
#ifdef DEBUG_STRSIZE
- memset(dst, 0xff, sizeof(*dst) * maxncpy);
+ memset(dst, 0xff, sizeof(*dst) * maxncpy);
#endif
- /* note: currently we don't attempt to deal with invalid utf8 chars */
- BLI_STR_UTF8_CPY(dst, src, maxncpy);
+ /* note: currently we don't attempt to deal with invalid utf8 chars */
+ BLI_STR_UTF8_CPY(dst, src, maxncpy);
- return (size_t)(dst - r_dst);
+ return (size_t)(dst - r_dst);
}
char *BLI_strncat_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
{
- while (*dst && maxncpy > 0) {
- dst++;
- maxncpy--;
- }
+ while (*dst && maxncpy > 0) {
+ dst++;
+ maxncpy--;
+ }
#ifdef DEBUG_STRSIZE
- memset(dst, 0xff, sizeof(*dst) * maxncpy);
+ memset(dst, 0xff, sizeof(*dst) * maxncpy);
#endif
- BLI_STR_UTF8_CPY(dst, src, maxncpy);
+ BLI_STR_UTF8_CPY(dst, src, maxncpy);
- return dst;
+ return dst;
}
#undef BLI_STR_UTF8_CPY
@@ -268,89 +309,91 @@ char *BLI_strncat_utf8(char *__restrict dst, const char *__restrict src, size_t
/* --------------------------------------------------------------------------*/
/* wchar_t / utf8 functions */
-size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy)
+size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst,
+ const wchar_t *__restrict src,
+ const size_t maxncpy)
{
- const size_t maxlen = maxncpy - 1;
- /* 6 is max utf8 length of an unicode char. */
- const int64_t maxlen_secured = (int64_t)maxlen - 6;
- size_t len = 0;
+ const size_t maxlen = maxncpy - 1;
+ /* 6 is max utf8 length of an unicode char. */
+ const int64_t maxlen_secured = (int64_t)maxlen - 6;
+ size_t len = 0;
- BLI_assert(maxncpy != 0);
+ BLI_assert(maxncpy != 0);
#ifdef DEBUG_STRSIZE
- memset(dst, 0xff, sizeof(*dst) * maxncpy);
+ memset(dst, 0xff, sizeof(*dst) * maxncpy);
#endif
- while (*src && len <= maxlen_secured) {
- len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
- }
-
- /* We have to be more careful for the last six bytes,
- * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
- while (*src) {
- char t[6];
- size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
- BLI_assert(l <= 6);
- if (len + l > maxlen) {
- break;
- }
- memcpy(dst + len, t, l);
- len += l;
- }
-
- dst[len] = '\0';
-
- return len;
+ while (*src && len <= maxlen_secured) {
+ len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
+ }
+
+ /* We have to be more careful for the last six bytes,
+ * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
+ while (*src) {
+ char t[6];
+ size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
+ BLI_assert(l <= 6);
+ if (len + l > maxlen) {
+ break;
+ }
+ memcpy(dst + len, t, l);
+ len += l;
+ }
+
+ dst[len] = '\0';
+
+ return len;
}
/* wchar len in utf8 */
size_t BLI_wstrlen_utf8(const wchar_t *src)
{
- size_t len = 0;
+ size_t len = 0;
- while (*src) {
- len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
- }
+ while (*src) {
+ len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
+ }
- return len;
+ return len;
}
size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
{
- size_t len;
- const char *strc_orig = strc;
+ size_t len;
+ const char *strc_orig = strc;
- for (len = 0; *strc; len++) {
- strc += BLI_str_utf8_size_safe(strc);
- }
+ for (len = 0; *strc; len++) {
+ strc += BLI_str_utf8_size_safe(strc);
+ }
- *r_len_bytes = (size_t)(strc - strc_orig);
- return len;
+ *r_len_bytes = (size_t)(strc - strc_orig);
+ return len;
}
size_t BLI_strlen_utf8(const char *strc)
{
- size_t len;
+ size_t len;
- for (len = 0; *strc; len++) {
- strc += BLI_str_utf8_size_safe(strc);
- }
+ for (len = 0; *strc; len++) {
+ strc += BLI_str_utf8_size_safe(strc);
+ }
- return len;
+ return len;
}
size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes)
{
- size_t len;
- const char *strc_orig = strc;
- const char *strc_end = strc + maxlen;
+ size_t len;
+ const char *strc_orig = strc;
+ const char *strc_end = strc + maxlen;
- for (len = 0; *strc && strc < strc_end; len++) {
- strc += BLI_str_utf8_size_safe(strc);
- }
+ for (len = 0; *strc && strc < strc_end; len++) {
+ strc += BLI_str_utf8_size_safe(strc);
+ }
- *r_len_bytes = (size_t)(strc - strc_orig);
- return len;
+ *r_len_bytes = (size_t)(strc - strc_orig);
+ return len;
}
/**
@@ -360,45 +403,47 @@ size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_
*/
size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen)
{
- size_t len;
- const char *strc_end = strc + maxlen;
+ size_t len;
+ const char *strc_end = strc + maxlen;
- for (len = 0; *strc && strc < strc_end; len++) {
- strc += BLI_str_utf8_size_safe(strc);
- }
+ for (len = 0; *strc && strc < strc_end; len++) {
+ strc += BLI_str_utf8_size_safe(strc);
+ }
- return len;
+ return len;
}
-size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
+size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
+ const char *__restrict src_c,
+ const size_t maxncpy)
{
- const size_t maxlen = maxncpy - 1;
- size_t len = 0;
+ const size_t maxlen = maxncpy - 1;
+ size_t len = 0;
- BLI_assert(maxncpy != 0);
+ BLI_assert(maxncpy != 0);
#ifdef DEBUG_STRSIZE
- memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy);
+ memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy);
#endif
- while (*src_c && len != maxlen) {
- size_t step = 0;
- uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
- if (unicode != BLI_UTF8_ERR) {
- *dst_w = (wchar_t)unicode;
- src_c += step;
- }
- else {
- *dst_w = '?';
- src_c = BLI_str_find_next_char_utf8(src_c, NULL);
- }
- dst_w++;
- len++;
- }
-
- *dst_w = 0;
-
- return len;
+ while (*src_c && len != maxlen) {
+ size_t step = 0;
+ uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
+ if (unicode != BLI_UTF8_ERR) {
+ *dst_w = (wchar_t)unicode;
+ src_c += step;
+ }
+ else {
+ *dst_w = '?';
+ src_c = BLI_str_find_next_char_utf8(src_c, NULL);
+ }
+ dst_w++;
+ len++;
+ }
+
+ *dst_w = 0;
+
+ return len;
}
/* end wchar_t / utf8 functions */
@@ -408,36 +453,36 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__rest
int BLI_wcwidth(wchar_t ucs)
{
- return mk_wcwidth(ucs);
+ return mk_wcwidth(ucs);
}
int BLI_wcswidth(const wchar_t *pwcs, size_t n)
{
- return mk_wcswidth(pwcs, n);
+ return mk_wcswidth(pwcs, n);
}
int BLI_str_utf8_char_width(const char *p)
{
- uint unicode = BLI_str_utf8_as_unicode(p);
- if (unicode == BLI_UTF8_ERR) {
- return -1;
- }
+ uint unicode = BLI_str_utf8_as_unicode(p);
+ if (unicode == BLI_UTF8_ERR) {
+ return -1;
+ }
- return BLI_wcwidth((wchar_t)unicode);
+ return BLI_wcwidth((wchar_t)unicode);
}
int BLI_str_utf8_char_width_safe(const char *p)
{
- int columns;
+ int columns;
- uint unicode = BLI_str_utf8_as_unicode(p);
- if (unicode == BLI_UTF8_ERR) {
- return 1;
- }
+ uint unicode = BLI_str_utf8_as_unicode(p);
+ if (unicode == BLI_UTF8_ERR) {
+ return 1;
+ }
- columns = BLI_wcwidth((wchar_t)unicode);
+ columns = BLI_wcwidth((wchar_t)unicode);
- return (columns < 0) ? 1 : columns;
+ return (columns < 0) ? 1 : columns;
}
/* --------------------------------------------------------------------------*/
@@ -447,73 +492,74 @@ int BLI_str_utf8_char_width_safe(const char *p)
/* note, glib uses uint for unicode, best we do the same,
* though we don't typedef it - campbell */
-#define UTF8_COMPUTE(Char, Mask, Len, Err) \
- if (Char < 128) { \
- Len = 1; \
- Mask = 0x7f; \
- } \
- else if ((Char & 0xe0) == 0xc0) { \
- Len = 2; \
- Mask = 0x1f; \
- } \
- else if ((Char & 0xf0) == 0xe0) { \
- Len = 3; \
- Mask = 0x0f; \
- } \
- else if ((Char & 0xf8) == 0xf0) { \
- Len = 4; \
- Mask = 0x07; \
- } \
- else if ((Char & 0xfc) == 0xf8) { \
- Len = 5; \
- Mask = 0x03; \
- } \
- else if ((Char & 0xfe) == 0xfc) { \
- Len = 6; \
- Mask = 0x01; \
- } \
- else { \
- Len = Err; /* -1 is the typical error value or 1 to skip */ \
- } (void)0
+#define UTF8_COMPUTE(Char, Mask, Len, Err) \
+ if (Char < 128) { \
+ Len = 1; \
+ Mask = 0x7f; \
+ } \
+ else if ((Char & 0xe0) == 0xc0) { \
+ Len = 2; \
+ Mask = 0x1f; \
+ } \
+ else if ((Char & 0xf0) == 0xe0) { \
+ Len = 3; \
+ Mask = 0x0f; \
+ } \
+ else if ((Char & 0xf8) == 0xf0) { \
+ Len = 4; \
+ Mask = 0x07; \
+ } \
+ else if ((Char & 0xfc) == 0xf8) { \
+ Len = 5; \
+ Mask = 0x03; \
+ } \
+ else if ((Char & 0xfe) == 0xfc) { \
+ Len = 6; \
+ Mask = 0x01; \
+ } \
+ else { \
+ Len = Err; /* -1 is the typical error value or 1 to skip */ \
+ } \
+ (void)0
/* same as glib define but added an 'Err' arg */
-#define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \
- (Result) = (Chars)[0] & (Mask); \
- for ((Count) = 1; (Count) < (Len); ++(Count)) { \
- if (((Chars)[(Count)] & 0xc0) != 0x80) { \
- (Result) = Err; \
- break; \
- } \
- (Result) <<= 6; \
- (Result) |= ((Chars)[(Count)] & 0x3f); \
- } (void)0
-
+#define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \
+ (Result) = (Chars)[0] & (Mask); \
+ for ((Count) = 1; (Count) < (Len); ++(Count)) { \
+ if (((Chars)[(Count)] & 0xc0) != 0x80) { \
+ (Result) = Err; \
+ break; \
+ } \
+ (Result) <<= 6; \
+ (Result) |= ((Chars)[(Count)] & 0x3f); \
+ } \
+ (void)0
/* uses glib functions but not from glib */
/* gets the size of a single utf8 char */
int BLI_str_utf8_size(const char *p)
{
- int mask = 0, len;
- const unsigned char c = (unsigned char) *p;
+ int mask = 0, len;
+ const unsigned char c = (unsigned char)*p;
- UTF8_COMPUTE(c, mask, len, -1);
+ UTF8_COMPUTE(c, mask, len, -1);
- (void)mask; /* quiet warning */
+ (void)mask; /* quiet warning */
- return len;
+ return len;
}
/* use when we want to skip errors */
int BLI_str_utf8_size_safe(const char *p)
{
- int mask = 0, len;
- const unsigned char c = (unsigned char) *p;
+ int mask = 0, len;
+ const unsigned char c = (unsigned char)*p;
- UTF8_COMPUTE(c, mask, len, 1);
+ UTF8_COMPUTE(c, mask, len, 1);
- (void)mask; /* quiet warning */
+ (void)mask; /* quiet warning */
- return len;
+ return len;
}
/* was g_utf8_get_char */
@@ -531,98 +577,98 @@ int BLI_str_utf8_size_safe(const char *p)
*/
uint BLI_str_utf8_as_unicode(const char *p)
{
- int i, len;
- uint mask = 0;
- uint result;
- const unsigned char c = (unsigned char) *p;
-
- UTF8_COMPUTE(c, mask, len, -1);
- if (UNLIKELY(len == -1)) {
- return BLI_UTF8_ERR;
- }
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
-
- return result;
+ int i, len;
+ uint mask = 0;
+ uint result;
+ const unsigned char c = (unsigned char)*p;
+
+ UTF8_COMPUTE(c, mask, len, -1);
+ if (UNLIKELY(len == -1)) {
+ return BLI_UTF8_ERR;
+ }
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+
+ return result;
}
/* variant that increments the length */
uint BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *__restrict index)
{
- int i, len;
- unsigned mask = 0;
- uint result;
- const unsigned char c = (unsigned char) *p;
-
- UTF8_COMPUTE(c, mask, len, -1);
- if (UNLIKELY(len == -1)) {
- return BLI_UTF8_ERR;
- }
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
- *index += (size_t)len;
- return result;
+ int i, len;
+ unsigned mask = 0;
+ uint result;
+ const unsigned char c = (unsigned char)*p;
+
+ UTF8_COMPUTE(c, mask, len, -1);
+ if (UNLIKELY(len == -1)) {
+ return BLI_UTF8_ERR;
+ }
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ *index += (size_t)len;
+ return result;
}
uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__restrict index)
{
- int i, len;
- uint mask = 0;
- uint result;
- const unsigned char c = (unsigned char) *p;
-
- UTF8_COMPUTE(c, mask, len, -1);
- if (UNLIKELY(len == -1)) {
- *index += 1;
- return c;
- }
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
- *index += (size_t)len;
- return result;
+ int i, len;
+ uint mask = 0;
+ uint result;
+ const unsigned char c = (unsigned char)*p;
+
+ UTF8_COMPUTE(c, mask, len, -1);
+ if (UNLIKELY(len == -1)) {
+ *index += 1;
+ return c;
+ }
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ *index += (size_t)len;
+ return result;
}
/* another variant that steps over the index,
* note, currently this also falls back to latin1 for text drawing. */
uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
{
- int i, len;
- uint mask = 0;
- uint result;
- unsigned char c;
-
- p += *index;
- c = (unsigned char) *p;
-
- UTF8_COMPUTE(c, mask, len, -1);
- if (UNLIKELY(len == -1)) {
- /* when called with NULL end, result will never be NULL,
- * checks for a NULL character */
- const char *p_next = BLI_str_find_next_char_utf8(p, NULL);
- /* will never return the same pointer unless '\0',
- * eternal loop is prevented */
- *index += (size_t)(p_next - p);
- return BLI_UTF8_ERR;
- }
-
- /* this is tricky since there are a few ways we can bail out of bad unicode
- * values, 3 possible solutions. */
+ int i, len;
+ uint mask = 0;
+ uint result;
+ unsigned char c;
+
+ p += *index;
+ c = (unsigned char)*p;
+
+ UTF8_COMPUTE(c, mask, len, -1);
+ if (UNLIKELY(len == -1)) {
+ /* when called with NULL end, result will never be NULL,
+ * checks for a NULL character */
+ const char *p_next = BLI_str_find_next_char_utf8(p, NULL);
+ /* will never return the same pointer unless '\0',
+ * eternal loop is prevented */
+ *index += (size_t)(p_next - p);
+ return BLI_UTF8_ERR;
+ }
+
+ /* this is tricky since there are a few ways we can bail out of bad unicode
+ * values, 3 possible solutions. */
#if 0
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
#elif 1
- /* WARNING: this is NOT part of glib, or supported by similar functions.
- * this is added for text drawing because some filepaths can have latin1
- * characters */
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
- if (result == BLI_UTF8_ERR) {
- len = 1;
- result = *p;
- }
- /* end warning! */
+ /* WARNING: this is NOT part of glib, or supported by similar functions.
+ * this is added for text drawing because some filepaths can have latin1
+ * characters */
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ if (result == BLI_UTF8_ERR) {
+ len = 1;
+ result = *p;
+ }
+ /* end warning! */
#else
- /* without a fallback like '?', text drawing will stop on this value */
- UTF8_GET(result, p, i, mask, len, '?');
+ /* without a fallback like '?', text drawing will stop on this value */
+ UTF8_GET(result, p, i, mask, len, '?');
#endif
- *index += (size_t)len;
- return result;
+ *index += (size_t)len;
+ return result;
}
/* was g_unichar_to_utf8 */
@@ -639,45 +685,45 @@ uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict i
*/
size_t BLI_str_utf8_from_unicode(uint c, char *outbuf)
{
- /* If this gets modified, also update the copy in g_string_insert_unichar() */
- uint len = 0;
- uint first;
- uint i;
-
- if (c < 0x80) {
- first = 0;
- len = 1;
- }
- else if (c < 0x800) {
- first = 0xc0;
- len = 2;
- }
- else if (c < 0x10000) {
- first = 0xe0;
- len = 3;
- }
- else if (c < 0x200000) {
- first = 0xf0;
- len = 4;
- }
- else if (c < 0x4000000) {
- first = 0xf8;
- len = 5;
- }
- else {
- first = 0xfc;
- len = 6;
- }
-
- if (outbuf) {
- for (i = len - 1; i > 0; --i) {
- outbuf[i] = (c & 0x3f) | 0x80;
- c >>= 6;
- }
- outbuf[0] = c | first;
- }
-
- return len;
+ /* If this gets modified, also update the copy in g_string_insert_unichar() */
+ uint len = 0;
+ uint first;
+ uint i;
+
+ if (c < 0x80) {
+ first = 0;
+ len = 1;
+ }
+ else if (c < 0x800) {
+ first = 0xc0;
+ len = 2;
+ }
+ else if (c < 0x10000) {
+ first = 0xe0;
+ len = 3;
+ }
+ else if (c < 0x200000) {
+ first = 0xf0;
+ len = 4;
+ }
+ else if (c < 0x4000000) {
+ first = 0xf8;
+ len = 5;
+ }
+ else {
+ first = 0xfc;
+ len = 6;
+ }
+
+ if (outbuf) {
+ for (i = len - 1; i > 0; --i) {
+ outbuf[i] = (c & 0x3f) | 0x80;
+ c >>= 6;
+ }
+ outbuf[0] = c | first;
+ }
+
+ return len;
}
/* was g_utf8_find_prev_char */
@@ -698,12 +744,12 @@ size_t BLI_str_utf8_from_unicode(uint c, char *outbuf)
*/
char *BLI_str_find_prev_char_utf8(const char *str, const char *p)
{
- for (--p; p >= str; --p) {
- if ((*p & 0xc0) != 0x80) {
- return (char *)p;
- }
- }
- return NULL;
+ for (--p; p >= str; --p) {
+ if ((*p & 0xc0) != 0x80) {
+ return (char *)p;
+ }
+ }
+ return NULL;
}
/* was g_utf8_find_next_char */
@@ -723,19 +769,19 @@ char *BLI_str_find_prev_char_utf8(const char *str, const char *p)
*/
char *BLI_str_find_next_char_utf8(const char *p, const char *end)
{
- if (*p) {
- if (end) {
- for (++p; p < end && (*p & 0xc0) == 0x80; ++p) {
- /* do nothing */
- }
- }
- else {
- for (++p; (*p & 0xc0) == 0x80; ++p) {
- /* do nothing */
- }
- }
- }
- return (p == end) ? NULL : (char *)p;
+ if (*p) {
+ if (end) {
+ for (++p; p < end && (*p & 0xc0) == 0x80; ++p) {
+ /* do nothing */
+ }
+ }
+ else {
+ for (++p; (*p & 0xc0) == 0x80; ++p) {
+ /* do nothing */
+ }
+ }
+ }
+ return (p == end) ? NULL : (char *)p;
}
/* was g_utf8_prev_char */
@@ -754,61 +800,71 @@ char *BLI_str_find_next_char_utf8(const char *p, const char *end)
*/
char *BLI_str_prev_char_utf8(const char *p)
{
- while (1) {
- p--;
- if ((*p & 0xc0) != 0x80) {
- return (char *)p;
- }
- }
+ while (1) {
+ p--;
+ if ((*p & 0xc0) != 0x80) {
+ return (char *)p;
+ }
+ }
}
/* end glib copy */
-size_t BLI_str_partition_utf8(const char *str, const uint delim[], const char **sep, const char **suf)
+size_t BLI_str_partition_utf8(const char *str,
+ const uint delim[],
+ const char **sep,
+ const char **suf)
{
- return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false);
+ return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false);
}
-size_t BLI_str_rpartition_utf8(const char *str, const uint delim[], const char **sep, const char **suf)
+size_t BLI_str_rpartition_utf8(const char *str,
+ const uint delim[],
+ const char **sep,
+ const char **suf)
{
- return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true);
+ return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true);
}
-size_t BLI_str_partition_ex_utf8(
- const char *str, const char *end, const uint delim[], const char **sep, const char **suf, const bool from_right)
+size_t BLI_str_partition_ex_utf8(const char *str,
+ const char *end,
+ const uint delim[],
+ const char **sep,
+ const char **suf,
+ const bool from_right)
{
- const uint *d;
- const size_t str_len = end ? (size_t)(end - str) : strlen(str);
- size_t index;
-
- /* Note that here, we assume end points to a valid utf8 char! */
- BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR)));
-
- *suf = (char *)(str + str_len);
-
- for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str), index = 0;
- *sep >= str && (!end || *sep < end) && **sep != '\0';
- *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index))
- {
- const uint c = BLI_str_utf8_as_unicode_and_size(*sep, &index);
-
- if (c == BLI_UTF8_ERR) {
- *suf = *sep = NULL;
- break;
- }
-
- for (d = delim; *d != '\0'; ++d) {
- if (*d == c) {
- /* *suf is already correct in case from_right is true. */
- if (!from_right) {
- *suf = (char *)(str + index);
- }
- return (size_t)(*sep - str);
- }
- }
-
- *suf = *sep; /* Useful in 'from_right' case! */
- }
-
- *suf = *sep = NULL;
- return str_len;
+ const uint *d;
+ const size_t str_len = end ? (size_t)(end - str) : strlen(str);
+ size_t index;
+
+ /* Note that here, we assume end points to a valid utf8 char! */
+ BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR)));
+
+ *suf = (char *)(str + str_len);
+
+ for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str),
+ index = 0;
+ *sep >= str && (!end || *sep < end) && **sep != '\0';
+ *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index)) {
+ const uint c = BLI_str_utf8_as_unicode_and_size(*sep, &index);
+
+ if (c == BLI_UTF8_ERR) {
+ *suf = *sep = NULL;
+ break;
+ }
+
+ for (d = delim; *d != '\0'; ++d) {
+ if (*d == c) {
+ /* *suf is already correct in case from_right is true. */
+ if (!from_right) {
+ *suf = (char *)(str + index);
+ }
+ return (size_t)(*sep - str);
+ }
+ }
+
+ *suf = *sep; /* Useful in 'from_right' case! */
+ }
+
+ *suf = *sep = NULL;
+ return str_len;
}