Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormano-wii <germano.costa@ig.com.br>2019-11-22 18:26:54 +0300
committermano-wii <germano.costa@ig.com.br>2019-11-22 18:27:34 +0300
commit177dfc6384b926dd19e3b7e98a995ccb4da9167c (patch)
tree865abe9f707bee039a506bfc17cb4cff218bd6c6 /source/blender/blenlib
parent1304cee920c5f01fd9f0474ea782db61ac031403 (diff)
Fix T71273: Bad encoding of utf-8 for Text objects
`BLI_strncpy_wchar_from_utf8` internally assumes `wchar_t` is 32 bits which is not the case on windows. The solution is to replace `wchar_t` with `char32_t`. Thanks to @robbott for compatibility on macOS. Differential Revision: https://developer.blender.org/D6198
Diffstat (limited to 'source/blender/blenlib')
-rw-r--r--source/blender/blenlib/BLI_string_cursor_utf8.h2
-rw-r--r--source/blender/blenlib/BLI_string_utf8.h12
-rw-r--r--source/blender/blenlib/BLI_sys_types.h9
-rw-r--r--source/blender/blenlib/intern/string_cursor_utf8.c16
-rw-r--r--source/blender/blenlib/intern/string_utf8.c95
5 files changed, 119 insertions, 15 deletions
diff --git a/source/blender/blenlib/BLI_string_cursor_utf8.h b/source/blender/blenlib/BLI_string_cursor_utf8.h
index 2d0acabc9de..a54089ad8d6 100644
--- a/source/blender/blenlib/BLI_string_cursor_utf8.h
+++ b/source/blender/blenlib/BLI_string_cursor_utf8.h
@@ -45,7 +45,7 @@ void BLI_str_cursor_step_utf8(const char *str,
eStrCursorJumpType jump,
bool use_init_step);
-void BLI_str_cursor_step_wchar(const wchar_t *str,
+void BLI_str_cursor_step_utf32(const char32_t *str,
size_t maxlen,
int *pos,
eStrCursorJumpDirection direction,
diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h
index 0cdd6e94610..1db4cdfecd6 100644
--- a/source/blender/blenlib/BLI_string_utf8.h
+++ b/source/blender/blenlib/BLI_string_utf8.h
@@ -26,6 +26,7 @@ extern "C" {
#endif
#include "BLI_compiler_attrs.h"
+#include "BLI_sys_types.h"
char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
ATTR_NONNULL();
@@ -48,6 +49,13 @@ unsigned int BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p,
unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
ATTR_NONNULL();
size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf);
+size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
+ const char *__restrict src_c,
+ const size_t maxncpy) ATTR_NONNULL();
+size_t BLI_str_utf32_as_utf8(char *__restrict dst,
+ const char32_t *__restrict src,
+ const size_t maxncpy) ATTR_NONNULL();
+size_t BLI_str_utf32_as_utf8_len(const char32_t *src) ATTR_NONNULL();
char *BLI_str_find_prev_char_utf8(const char *str, const char *p) ATTR_NONNULL();
char *BLI_str_find_next_char_utf8(const char *p, const char *end) ATTR_NONNULL(1);
@@ -68,8 +76,8 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst,
const size_t maxcpy) ATTR_NONNULL();
/* count columns that character/string occupies, based on wcwidth.c */
-int BLI_wcwidth(wchar_t ucs);
-int BLI_wcswidth(const wchar_t *pwcs, size_t n) ATTR_NONNULL();
+int BLI_wcwidth(char32_t ucs);
+int BLI_wcswidth(const char32_t *pwcs, size_t n) ATTR_NONNULL();
/* warning, can return -1 on bad chars */
int BLI_str_utf8_char_width(const char *p) ATTR_NONNULL();
int BLI_str_utf8_char_width_safe(const char *p) ATTR_NONNULL();
diff --git a/source/blender/blenlib/BLI_sys_types.h b/source/blender/blenlib/BLI_sys_types.h
index a82e6a562e0..354d27385a2 100644
--- a/source/blender/blenlib/BLI_sys_types.h
+++ b/source/blender/blenlib/BLI_sys_types.h
@@ -72,6 +72,15 @@ typedef uint64_t u_int64_t;
#include <stddef.h> /* size_t define */
#include <stdbool.h>
+#ifndef __cplusplus
+# if defined(__APPLE__)
+/* The <uchar.h> standard header is missing on macOS. */
+typedef unsigned int char32_t;
+# else
+# include <uchar.h>
+# endif
+#endif
+
typedef unsigned int uint;
typedef unsigned short ushort;
typedef unsigned long ulong;
diff --git a/source/blender/blenlib/intern/string_cursor_utf8.c b/source/blender/blenlib/intern/string_cursor_utf8.c
index f0113a7028a..ee4c11b1c04 100644
--- a/source/blender/blenlib/intern/string_cursor_utf8.c
+++ b/source/blender/blenlib/intern/string_cursor_utf8.c
@@ -211,12 +211,12 @@ void BLI_str_cursor_step_utf8(const char *str,
}
}
-/* wchar_t version of BLI_str_cursor_step_utf8 (keep in sync!)
+/* UTF32 version of BLI_str_cursor_step_utf8 (keep in sync!)
* less complex since it doesn't need to do multi-byte stepping.
*/
/* helper funcs so we can match BLI_str_cursor_step_utf8 */
-static bool wchar_t_step_next(const wchar_t *UNUSED(str), size_t maxlen, int *pos)
+static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos)
{
if ((*pos) >= (int)maxlen) {
return false;
@@ -225,7 +225,7 @@ static bool wchar_t_step_next(const wchar_t *UNUSED(str), size_t maxlen, int *po
return true;
}
-static bool wchar_t_step_prev(const wchar_t *UNUSED(str), size_t UNUSED(maxlen), int *pos)
+static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos)
{
if ((*pos) <= 0) {
return false;
@@ -234,7 +234,7 @@ static bool wchar_t_step_prev(const wchar_t *UNUSED(str), size_t UNUSED(maxlen),
return true;
}
-void BLI_str_cursor_step_wchar(const wchar_t *str,
+void BLI_str_cursor_step_utf32(const char32_t *str,
size_t maxlen,
int *pos,
eStrCursorJumpDirection direction,
@@ -245,7 +245,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str,
if (direction == STRCUR_DIR_NEXT) {
if (use_init_step) {
- wchar_t_step_next(str, maxlen, pos);
+ cursor_step_next_utf32(str, maxlen, pos);
}
else {
BLI_assert(jump == STRCUR_JUMP_DELIM);
@@ -259,7 +259,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str,
* look at function cursor_delim_type_unicode() for complete
* list of special character, ctr -> */
while ((*pos) < maxlen) {
- if (wchar_t_step_next(str, maxlen, pos)) {
+ if (cursor_step_next_utf32(str, maxlen, pos)) {
if ((jump != STRCUR_JUMP_ALL) &&
(delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
break;
@@ -273,7 +273,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str,
}
else if (direction == STRCUR_DIR_PREV) {
if (use_init_step) {
- wchar_t_step_prev(str, maxlen, pos);
+ cursor_step_prev_utf32(str, maxlen, pos);
}
else {
BLI_assert(jump == STRCUR_JUMP_DELIM);
@@ -288,7 +288,7 @@ void BLI_str_cursor_step_wchar(const wchar_t *str,
* list of special character, ctr -> */
while ((*pos) > 0) {
const int pos_prev = *pos;
- if (wchar_t_step_prev(str, maxlen, pos)) {
+ if (cursor_step_prev_utf32(str, maxlen, pos)) {
if ((jump != STRCUR_JUMP_ALL) &&
(delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
/* left only: compensate for index/change in direction */
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index 92c4ec73768..63657f33bba 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -430,6 +430,11 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
size_t step = 0;
uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
if (unicode != BLI_UTF8_ERR) {
+ /* TODO: `wchar_t` type is an implementation-defined and may represent
+ * 16-bit or 32-bit depending on operating system.
+ * So the ideal would be to do the corresponding encoding.
+ * But for now just assert that it has no conflicting use. */
+ BLI_assert(step <= sizeof(wchar_t));
*dst_w = (wchar_t)unicode;
src_c += step;
}
@@ -451,12 +456,12 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
/* count columns that character/string occupies, based on wcwidth.c */
-int BLI_wcwidth(wchar_t ucs)
+int BLI_wcwidth(char32_t ucs)
{
return mk_wcwidth(ucs);
}
-int BLI_wcswidth(const wchar_t *pwcs, size_t n)
+int BLI_wcswidth(const char32_t *pwcs, size_t n)
{
return mk_wcswidth(pwcs, n);
}
@@ -468,7 +473,7 @@ int BLI_str_utf8_char_width(const char *p)
return -1;
}
- return BLI_wcwidth((wchar_t)unicode);
+ return BLI_wcwidth((char32_t)unicode);
}
int BLI_str_utf8_char_width_safe(const char *p)
@@ -480,7 +485,7 @@ int BLI_str_utf8_char_width_safe(const char *p)
return 1;
}
- columns = BLI_wcwidth((wchar_t)unicode);
+ columns = BLI_wcwidth((char32_t)unicode);
return (columns < 0) ? 1 : columns;
}
@@ -726,6 +731,88 @@ size_t BLI_str_utf8_from_unicode(uint c, char *outbuf)
return len;
}
+size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
+ const char *__restrict src_c,
+ const size_t maxncpy)
+{
+ const size_t maxlen = maxncpy - 1;
+ size_t len = 0;
+
+ BLI_assert(maxncpy != 0);
+
+#ifdef DEBUG_STRSIZE
+ memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy);
+#endif
+
+ while (*src_c && len != maxlen) {
+ size_t step = 0;
+ uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
+ if (unicode != BLI_UTF8_ERR) {
+ *dst_w = unicode;
+ src_c += step;
+ }
+ else {
+ *dst_w = '?';
+ src_c = BLI_str_find_next_char_utf8(src_c, NULL);
+ }
+ dst_w++;
+ len++;
+ }
+
+ *dst_w = 0;
+
+ return len;
+}
+
+size_t BLI_str_utf32_as_utf8(char *__restrict dst,
+ const char32_t *__restrict src,
+ const size_t maxncpy)
+{
+ const size_t maxlen = maxncpy - 1;
+ /* 6 is max utf8 length of an unicode char. */
+ const int64_t maxlen_secured = (int64_t)maxlen - 6;
+ size_t len = 0;
+
+ BLI_assert(maxncpy != 0);
+
+#ifdef DEBUG_STRSIZE
+ memset(dst, 0xff, sizeof(*dst) * maxncpy);
+#endif
+
+ while (*src && len <= maxlen_secured) {
+ len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
+ }
+
+ /* We have to be more careful for the last six bytes,
+ * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
+ while (*src) {
+ char t[6];
+ size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
+ BLI_assert(l <= 6);
+ if (len + l > maxlen) {
+ break;
+ }
+ memcpy(dst + len, t, l);
+ len += l;
+ }
+
+ dst[len] = '\0';
+
+ return len;
+}
+
+/* utf32 len in utf8 */
+size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
+{
+ size_t len = 0;
+
+ while (*src) {
+ len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
+ }
+
+ return len;
+}
+
/* was g_utf8_find_prev_char */
/**
* BLI_str_find_prev_char_utf8: