diff options
Diffstat (limited to 'source/blender/blenlib/intern/string_utf8.c')
-rw-r--r-- | source/blender/blenlib/intern/string_utf8.c | 69 |
1 files changed, 51 insertions, 18 deletions
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c index 9697fcf09e9..96033615cf5 100644 --- a/source/blender/blenlib/intern/string_utf8.c +++ b/source/blender/blenlib/intern/string_utf8.c @@ -69,11 +69,11 @@ static const char trailingBytesForUTF8[256] = { int BLI_utf8_invalid_byte(const char *str, int length) { - const unsigned char *p, *pend = (unsigned char *)str + length; + const unsigned char *p, *pend = (const unsigned char *)str + length; unsigned char c; int ab; - for (p = (unsigned char *)str; p < pend; p++) { + for (p = (const unsigned char *)str; p < pend; p++) { c = *p; if (c < 128) continue; @@ -130,7 +130,7 @@ int BLI_utf8_invalid_byte(const char *str, int length) utf8_error: - return (int)((char *)p - (char *)str) - 1; + return (int)((const char *)p - (const char *)str) - 1; } int BLI_utf8_invalid_strip(char *str, int length) @@ -209,6 +209,22 @@ char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t return r_dst; } +size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy) +{ + char *r_dst = dst; + + BLI_assert(maxncpy != 0); + +#ifdef DEBUG_STRSIZE + memset(dst, 0xff, sizeof(*dst) * maxncpy); +#endif + + /* note: currently we don't attempt to deal with invalid utf8 chars */ + BLI_STR_UTF8_CPY(dst, src, maxncpy); + + return (size_t)(dst - r_dst); +} + char *BLI_strncat_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy) { while (*dst && maxncpy > 0) { @@ -233,6 +249,7 @@ char *BLI_strncat_utf8(char *__restrict dst, const char *__restrict src, size_t size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy) { const size_t maxlen = maxncpy - 1; + const int64_t maxlen_secured = (int64_t)maxlen - 6; /* 6 is max utf8 length of an unicode char. */ size_t len = 0; BLI_assert(maxncpy != 0); @@ -241,10 +258,23 @@ size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict memset(dst, 0xff, sizeof(*dst) * maxncpy); #endif - while (*src && len < maxlen) { /* XXX can still run over the buffer because utf8 size isn't known :| */ + while (*src && len <= maxlen_secured) { len += BLI_str_utf8_from_unicode((unsigned int)*src++, dst + len); } + /* We have to be more careful for the last six bytes, to avoid buffer overflow in case utf8-encoded char + * would be too long for our dst buffer. */ + while (*src) { + char t[6]; + size_t l = BLI_str_utf8_from_unicode((unsigned int)*src++, t); + BLI_assert(l <= 6); + if (len + l > maxlen) { + break; + } + memcpy(dst + len, t, l); + len += l; + } + dst[len] = '\0'; return len; @@ -299,8 +329,8 @@ size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_ } /** - * \param start the string to measure the length. - * \param maxlen the string length (in bytes) + * \param strc: the string to measure the length. + * \param maxlen: the string length (in bytes) * \return the unicode length (not in bytes!) */ size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen) @@ -569,14 +599,14 @@ unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__re /* was g_unichar_to_utf8 */ /** * BLI_str_utf8_from_unicode: - * @c a Unicode character code - * \param outbuf output buffer, must have at least 6 bytes of space. + * \param c: a Unicode character code + * \param outbuf: output buffer, must have at least 6 bytes of space. * If %NULL, the length will be computed and returned * and nothing will be written to outbuf. * * Converts a single character to UTF-8. * - * Return value: number of bytes written + * \return number of bytes written **/ size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf) { @@ -704,28 +734,31 @@ char *BLI_str_prev_char_utf8(const char *p) } /* end glib copy */ -size_t BLI_str_partition_utf8(const char *str, const unsigned int delim[], char **sep, char **suf) +size_t BLI_str_partition_utf8(const char *str, const unsigned int delim[], const char **sep, const char **suf) { - return BLI_str_partition_ex_utf8(str, delim, sep, suf, false); + return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false); } -size_t BLI_str_rpartition_utf8(const char *str, const unsigned int delim[], char **sep, char **suf) +size_t BLI_str_rpartition_utf8(const char *str, const unsigned int delim[], const char **sep, const char **suf) { - return BLI_str_partition_ex_utf8(str, delim, sep, suf, true); + return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true); } -size_t BLI_str_partition_ex_utf8(const char *str, const unsigned int delim[], char **sep, char **suf, - const bool from_right) +size_t BLI_str_partition_ex_utf8( + const char *str, const char *end, const unsigned int delim[], const char **sep, const char **suf, const bool from_right) { const unsigned int *d; - const size_t str_len = strlen(str); + const size_t str_len = end ? (size_t)(end - str) : strlen(str); size_t index; + /* Note that here, we assume end points to a valid utf8 char! */ + BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR))); + *suf = (char *)(str + str_len); for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str), index = 0; - *sep != NULL && **sep != '\0'; - *sep = (char *)(from_right ? (char *)BLI_str_find_prev_char_utf8(str, *sep) : str + index)) + *sep >= str && (!end || *sep < end) && **sep != '\0'; + *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index)) { const unsigned int c = BLI_str_utf8_as_unicode_and_size(*sep, &index); |