Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source/blender/blenfont/intern/blf_font.c24
-rw-r--r--source/blender/blenkernel/intern/text.c4
-rw-r--r--source/blender/blenlib/BLI_string_utf8.h6
-rw-r--r--source/blender/blenlib/intern/string_utf8.c73
-rw-r--r--source/blender/blenlib/tests/BLI_string_utf8_test.cc111
-rw-r--r--source/blender/editors/space_text/text_ops.c7
6 files changed, 179 insertions, 46 deletions
diff --git a/source/blender/blenfont/intern/blf_font.c b/source/blender/blenfont/intern/blf_font.c
index 50b4bb09b7b..5ad48aa08d4 100644
--- a/source/blender/blenfont/intern/blf_font.c
+++ b/source/blender/blenfont/intern/blf_font.c
@@ -298,7 +298,7 @@ static void blf_batch_draw_end(void)
*/
BLI_INLINE GlyphBLF *blf_utf8_next_fast(
- FontBLF *font, GlyphCacheBLF *gc, const char *str, size_t *i_p, uint *r_c)
+ FontBLF *font, GlyphCacheBLF *gc, const char *str, size_t str_len, size_t *i_p, uint *r_c)
{
GlyphBLF *g;
if ((*r_c = str[*i_p]) < GLYPH_ASCII_TABLE_SIZE) {
@@ -309,7 +309,7 @@ BLI_INLINE GlyphBLF *blf_utf8_next_fast(
}
(*i_p)++;
}
- else if ((*r_c = BLI_str_utf8_as_unicode_step(str, i_p)) != BLI_UTF8_ERR) {
+ else if ((*r_c = BLI_str_utf8_as_unicode_step(str, str_len, i_p)) != BLI_UTF8_ERR) {
g = blf_glyph_search(gc, *r_c);
if (UNLIKELY(g == NULL)) {
g = blf_glyph_add(font, gc, FT_Get_Char_Index(font->face, *r_c), *r_c);
@@ -382,7 +382,7 @@ static void blf_font_draw_ex(FontBLF *font,
blf_batch_draw_begin(font);
while ((i < str_len) && str[i]) {
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (UNLIKELY(c == BLI_UTF8_ERR)) {
break;
@@ -478,7 +478,7 @@ int blf_font_draw_mono(FontBLF *font, const char *str, const size_t str_len, int
blf_batch_draw_begin(font);
while ((i < str_len) && str[i]) {
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (UNLIKELY(c == BLI_UTF8_ERR)) {
break;
@@ -535,7 +535,7 @@ static void blf_font_draw_buffer_ex(FontBLF *font,
/* another buffer specific call for color conversion */
while ((i < str_len) && str[i]) {
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (UNLIKELY(c == BLI_UTF8_ERR)) {
break;
@@ -703,7 +703,7 @@ size_t blf_font_width_to_strlen(
for (i_prev = i = 0, width_new = pen_x = 0, g_prev = NULL, c_prev = 0; (i < str_len) && str[i];
i_prev = i, width_new = pen_x, c_prev = c, g_prev = g) {
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (blf_font_width_to_strlen_glyph_process(font, c_prev, c, g_prev, g, &pen_x, width_i)) {
break;
@@ -737,7 +737,7 @@ size_t blf_font_width_to_rstrlen(
i_prev = (size_t)((s_prev != NULL) ? s_prev - str : 0);
i_tmp = i;
- g = blf_utf8_next_fast(font, gc, str, &i_tmp, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c);
for (width_new = pen_x = 0; (s != NULL);
i = i_prev, s = s_prev, c = c_prev, g = g_prev, g_prev = NULL, width_new = pen_x) {
s_prev = BLI_str_find_prev_char_utf8(str, s);
@@ -745,7 +745,7 @@ size_t blf_font_width_to_rstrlen(
if (s_prev != NULL) {
i_tmp = i_prev;
- g_prev = blf_utf8_next_fast(font, gc, str, &i_tmp, &c_prev);
+ g_prev = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c_prev);
BLI_assert(i_tmp == i);
}
@@ -788,7 +788,7 @@ static void blf_font_boundbox_ex(FontBLF *font,
box->ymax = -32000.0f;
while ((i < str_len) && str[i]) {
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (UNLIKELY(c == BLI_UTF8_ERR)) {
break;
@@ -961,7 +961,7 @@ static void blf_font_boundbox_foreach_glyph_ex(FontBLF *font,
while ((i < str_len) && str[i]) {
i_curr = i;
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (UNLIKELY(c == BLI_UTF8_ERR)) {
break;
@@ -1051,7 +1051,7 @@ static void blf_font_wrap_apply(FontBLF *font,
size_t i_curr = i;
bool do_draw = false;
- g = blf_utf8_next_fast(font, gc, str, &i, &c);
+ g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
if (UNLIKELY(c == BLI_UTF8_ERR)) {
break;
@@ -1202,7 +1202,7 @@ int blf_font_count_missing_chars(FontBLF *font,
if ((c = str[i]) < GLYPH_ASCII_TABLE_SIZE) {
i++;
}
- else if ((c = BLI_str_utf8_as_unicode_step(str, &i)) != BLI_UTF8_ERR) {
+ else if ((c = BLI_str_utf8_as_unicode_step(str, str_len, &i)) != BLI_UTF8_ERR) {
if (FT_Get_Char_Index((font)->face, c) == 0) {
missing++;
}
diff --git a/source/blender/blenkernel/intern/text.c b/source/blender/blenkernel/intern/text.c
index 06137f5d110..c2ab91251b6 100644
--- a/source/blender/blenkernel/intern/text.c
+++ b/source/blender/blenkernel/intern/text.c
@@ -1660,7 +1660,7 @@ void txt_insert_buf(Text *text, const char *in_buffer)
/* Read the first line (or as close as possible */
while (buffer[i] && buffer[i] != '\n') {
- txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, &i));
+ txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, len, &i));
}
if (buffer[i] == '\n') {
@@ -1682,7 +1682,7 @@ void txt_insert_buf(Text *text, const char *in_buffer)
}
else {
for (j = i - l; j < i && j < len;) {
- txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, &j));
+ txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, len, &j));
}
break;
}
diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h
index e1d7e2c58f7..b936e39731d 100644
--- a/source/blender/blenlib/BLI_string_utf8.h
+++ b/source/blender/blenlib/BLI_string_utf8.h
@@ -43,8 +43,10 @@ unsigned int BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *
ATTR_NONNULL();
unsigned int BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p,
size_t *__restrict index) ATTR_NONNULL();
-unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
- ATTR_NONNULL();
+unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p,
+ size_t p_len,
+ size_t *__restrict index) ATTR_NONNULL(1, 3);
+
size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf);
size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
const char *__restrict src_c,
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index 5710bd6b150..dbde5221d7e 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -582,49 +582,69 @@ uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__r
/**
* Another variant that steps over the index.
+ *
+ * \param p: The text to step over.
+ * \param p_len: The length of `p`.
+ * \param index: Index of `p` to step over.
+ *
* \note currently this also falls back to latin1 for text drawing.
+ *
+ * \note The behavior for clipped text (where `p_len` limits decoding trailing bytes)
+ * must have the same behavior is encountering a nil byte,
+ * so functions that only use the first part of a string has matching behavior to functions
+ * that null terminate the text.
*/
-uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
+uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
+ const size_t p_len,
+ size_t *__restrict index)
{
int i, len;
uint mask = 0;
uint result;
- unsigned char c;
+ const char c = p[*index];
- p += *index;
- c = (unsigned char)*p;
+ BLI_assert(*index < p_len);
+ BLI_assert(c != '\0');
UTF8_COMPUTE(c, mask, len, -1);
if (UNLIKELY(len == -1)) {
- /* when called with NULL end, result will never be NULL,
- * checks for a NULL character */
- const char *p_next = BLI_str_find_next_char_utf8(p, NULL);
- /* will never return the same pointer unless '\0',
- * eternal loop is prevented */
- *index += (size_t)(p_next - p);
- return BLI_UTF8_ERR;
+ const char *p_next = BLI_str_find_next_char_utf8(p + *index, p + p_len);
+ /* #BLI_str_find_next_char_utf8 ensures the nil byte will terminate.
+ * so there is no chance this sets the index past the nil byte (assert this is the case). */
+ BLI_assert(p_next || (memchr(p + *index, '\0', p_len - *index) == NULL));
+ len = (int)((p_next ? (size_t)(p_next - p) : p_len) - *index);
+ result = BLI_UTF8_ERR;
+ }
+ else if (UNLIKELY(*index + (size_t)len > p_len)) {
+ /* A multi-byte character reads past the buffer bounds,
+ * match the behavior of encountering an byte with invalid encoding below. */
+ len = 1;
+ result = (uint)c;
}
-
- /* this is tricky since there are a few ways we can bail out of bad unicode
- * values, 3 possible solutions. */
+ else {
+ /* This is tricky since there are a few ways we can bail out of bad unicode
+ * values, 3 possible solutions. */
+ p += *index;
#if 0
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
#elif 1
- /* WARNING: this is NOT part of glib, or supported by similar functions.
- * this is added for text drawing because some filepaths can have latin1
- * characters */
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
- if (result == BLI_UTF8_ERR) {
- len = 1;
- result = *p;
- }
- /* end warning! */
+ /* WARNING: this is NOT part of glib, or supported by similar functions.
+ * this is added for text drawing because some filepaths can have latin1
+ * characters */
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ if (result == BLI_UTF8_ERR) {
+ len = 1;
+ result = (uint)c;
+ }
+ /* end warning! */
#else
- /* without a fallback like '?', text drawing will stop on this value */
- UTF8_GET(result, p, i, mask, len, '?');
+ /* Without a fallback like '?', text drawing will stop on this value. */
+ UTF8_GET(result, p, i, mask, len, '?');
#endif
+ }
*index += (size_t)len;
+ BLI_assert(*index <= p_len);
return result;
}
@@ -810,6 +830,7 @@ char *BLI_str_find_next_char_utf8(const char *p, const char *end)
{
if (*p) {
if (end) {
+ BLI_assert(end >= p);
for (++p; p < end && (*p & 0xc0) == 0x80; p++) {
/* do nothing */
}
diff --git a/source/blender/blenlib/tests/BLI_string_utf8_test.cc b/source/blender/blenlib/tests/BLI_string_utf8_test.cc
index 13f5cb7f284..b25f2310e1e 100644
--- a/source/blender/blenlib/tests/BLI_string_utf8_test.cc
+++ b/source/blender/blenlib/tests/BLI_string_utf8_test.cc
@@ -2,6 +2,7 @@
#include "testing/testing.h"
+#include "BLI_rand.h"
#include "BLI_string.h"
#include "BLI_string_utf8.h"
#include "BLI_utildefines.h"
@@ -11,7 +12,8 @@
* quite their share of lines, they deserved their own file. */
/* -------------------------------------------------------------------- */
-/* tests */
+/** \name Test #BLI_str_utf8_invalid_strip
+ * \{ */
/* Breaking strings is confusing here, prefer over-long lines. */
/* clang-format off */
@@ -284,3 +286,110 @@ TEST(string, Utf8InvalidBytes)
EXPECT_STREQ(buff, tst_stripped);
}
}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Test #BLI_str_utf8_as_unicode_step
+ * \{ */
+
+static size_t utf8_as_char32(const char *str, const char str_len, char32_t *r_result)
+{
+ size_t i = 0, result_len = 0;
+ while ((i < str_len) && (str[i] != '\0')) {
+ char32_t c = BLI_str_utf8_as_unicode_step(str, str_len, &i);
+ if (c != BLI_UTF8_ERR) {
+ r_result[result_len++] = c;
+ }
+ }
+ return i;
+}
+
+template<size_t Size, size_t SizeWithPadding>
+void utf8_as_char32_test_compare_with_pad_bytes(const char utf8_src[Size])
+{
+ char utf8_src_with_pad[SizeWithPadding] = {0};
+
+ memcpy(utf8_src_with_pad, utf8_src, Size);
+
+ char32_t unicode_dst_a[Size], unicode_dst_b[Size];
+
+ memset(unicode_dst_a, 0xff, sizeof(unicode_dst_a));
+ const size_t index_a = utf8_as_char32(utf8_src, Size, unicode_dst_a);
+
+ /* Test with padded and un-padded size,
+ * to ensure that extra available space doesn't yield a different result. */
+ for (int pass = 0; pass < 2; pass++) {
+ memset(unicode_dst_b, 0xff, sizeof(unicode_dst_b));
+ const size_t index_b = utf8_as_char32(
+ utf8_src_with_pad, pass ? Size : SizeWithPadding, unicode_dst_b);
+
+ /* Check the resulting content matches. */
+ EXPECT_EQ_ARRAY(unicode_dst_a, unicode_dst_b, Size);
+ /* Check the index of the source strings match. */
+ EXPECT_EQ(index_a, index_b);
+ }
+}
+
+template<size_t Size> void utf8_as_char32_test_compare(const char utf8_src[Size])
+{
+ /* Note that 7 is a little arbitrary,
+ * chosen since it's the maximum length of multi-byte character + 1
+ * to account for any errors that read past null bytes. */
+ utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 1>(utf8_src);
+ utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 7>(utf8_src);
+}
+
+template<size_t Size> void utf8_as_char32_test_at_buffer_size()
+{
+ char utf8_src[Size];
+
+ /* Test uniform bytes, also with offsets ascending & descending. */
+ for (int i = 0; i <= 0xff; i++) {
+ memset(utf8_src, i, sizeof(utf8_src));
+ utf8_as_char32_test_compare<Size>(utf8_src);
+
+ /* Offset trailing bytes up and down in steps of 1, 2, 4 .. etc. */
+ if (Size > 1) {
+ for (int mul = 1; mul < 256; mul *= 2) {
+ for (int ofs = 1; ofs < (int)Size; ofs++) {
+ utf8_src[ofs] = (char)(i + (ofs * mul));
+ }
+ utf8_as_char32_test_compare<Size>(utf8_src);
+
+ for (int ofs = 1; ofs < (int)Size; ofs++) {
+ utf8_src[ofs] = (char)(i - (ofs * mul));
+ }
+ utf8_as_char32_test_compare<Size>(utf8_src);
+ }
+ }
+ }
+
+ /* Random bytes. */
+ RNG *rng = BLI_rng_new(1);
+ for (int i = 0; i < 256; i++) {
+ BLI_rng_get_char_n(rng, utf8_src, sizeof(utf8_src));
+ utf8_as_char32_test_compare<Size>(utf8_src);
+ }
+ BLI_rng_free(rng);
+}
+
+TEST(string, Utf8AsUnicodeStep)
+{
+
+ /* Run tests at different buffer sizes. */
+ utf8_as_char32_test_at_buffer_size<1>();
+ utf8_as_char32_test_at_buffer_size<2>();
+ utf8_as_char32_test_at_buffer_size<3>();
+ utf8_as_char32_test_at_buffer_size<4>();
+ utf8_as_char32_test_at_buffer_size<5>();
+ utf8_as_char32_test_at_buffer_size<6>();
+ utf8_as_char32_test_at_buffer_size<7>();
+ utf8_as_char32_test_at_buffer_size<8>();
+ utf8_as_char32_test_at_buffer_size<9>();
+ utf8_as_char32_test_at_buffer_size<10>();
+ utf8_as_char32_test_at_buffer_size<11>();
+ utf8_as_char32_test_at_buffer_size<12>();
+}
+
+/** \} */
diff --git a/source/blender/editors/space_text/text_ops.c b/source/blender/editors/space_text/text_ops.c
index b5fcadbefe8..f480f60a2b9 100644
--- a/source/blender/editors/space_text/text_ops.c
+++ b/source/blender/editors/space_text/text_ops.c
@@ -3424,25 +3424,26 @@ static int text_insert_exec(bContext *C, wmOperator *op)
SpaceText *st = CTX_wm_space_text(C);
Text *text = CTX_data_edit_text(C);
char *str;
+ int str_len;
bool done = false;
size_t i = 0;
uint code;
text_drawcache_tag_update(st, 0);
- str = RNA_string_get_alloc(op->ptr, "text", NULL, 0, NULL);
+ str = RNA_string_get_alloc(op->ptr, "text", NULL, 0, &str_len);
ED_text_undo_push_init(C);
if (st && st->overwrite) {
while (str[i]) {
- code = BLI_str_utf8_as_unicode_step(str, &i);
+ code = BLI_str_utf8_as_unicode_step(str, str_len, &i);
done |= txt_replace_char(text, code);
}
}
else {
while (str[i]) {
- code = BLI_str_utf8_as_unicode_step(str, &i);
+ code = BLI_str_utf8_as_unicode_step(str, str_len, &i);
done |= txt_add_char(text, code);
}
}