Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2021-08-25 08:18:57 +0300
committerCampbell Barton <ideasman42@gmail.com>2021-08-25 08:27:18 +0300
commitbe906f44c6bb51eb492ecb90dbc1e8e0bc01d1ec (patch)
tree063248784f46dba55c5173ddc28c08e1c3dce9e4 /source/blender/blenlib/intern/string_utf8.c
parent70f890b510562864e8a37d49afb4891bf083bf84 (diff)
BLI_string_utf8: simplify utf8 stepping logic
There were multiple utf8 functions which treated errors slightly differently. Split BLI_str_utf8_as_unicode_step into two functions. - BLI_str_utf8_as_unicode_step_or_error returns error value when decoding fails and doesn't step. - BLI_str_utf8_as_unicode_step always steps forward at least one returning the byte value without decoding (needed to display some latin1 file-paths). Font drawing uses BLI_str_utf8_as_unicode_step and no longer check for error values.
Diffstat (limited to 'source/blender/blenlib/intern/string_utf8.c')
-rw-r--r--source/blender/blenlib/intern/string_utf8.c75
1 files changed, 34 insertions, 41 deletions
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index dbde5221d7e..06fd3168c24 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -581,73 +581,66 @@ uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__r
}
/**
- * Another variant that steps over the index.
+ * UTF8 decoding that steps over the index (unless an error is encountered).
*
* \param p: The text to step over.
* \param p_len: The length of `p`.
* \param index: Index of `p` to step over.
- *
- * \note currently this also falls back to latin1 for text drawing.
+ * \return the code-point or #BLI_UTF8_ERR if there is a decoding error.
*
* \note The behavior for clipped text (where `p_len` limits decoding trailing bytes)
* must have the same behavior is encountering a nil byte,
* so functions that only use the first part of a string has matching behavior to functions
* that null terminate the text.
*/
-uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
- const size_t p_len,
- size_t *__restrict index)
+uint BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p,
+ const size_t p_len,
+ size_t *__restrict index)
{
int i, len;
uint mask = 0;
uint result;
- const char c = p[*index];
+ const unsigned char c = (unsigned char)*(p += *index);
BLI_assert(*index < p_len);
BLI_assert(c != '\0');
UTF8_COMPUTE(c, mask, len, -1);
- if (UNLIKELY(len == -1)) {
- const char *p_next = BLI_str_find_next_char_utf8(p + *index, p + p_len);
- /* #BLI_str_find_next_char_utf8 ensures the nil byte will terminate.
- * so there is no chance this sets the index past the nil byte (assert this is the case). */
- BLI_assert(p_next || (memchr(p + *index, '\0', p_len - *index) == NULL));
- len = (int)((p_next ? (size_t)(p_next - p) : p_len) - *index);
- result = BLI_UTF8_ERR;
- }
- else if (UNLIKELY(*index + (size_t)len > p_len)) {
- /* A multi-byte character reads past the buffer bounds,
- * match the behavior of encountering an byte with invalid encoding below. */
- len = 1;
- result = (uint)c;
+ if (UNLIKELY(len == -1) || (*index + (size_t)len > p_len)) {
+ return BLI_UTF8_ERR;
}
- else {
- /* This is tricky since there are a few ways we can bail out of bad unicode
- * values, 3 possible solutions. */
- p += *index;
-#if 0
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
-#elif 1
- /* WARNING: this is NOT part of glib, or supported by similar functions.
- * this is added for text drawing because some filepaths can have latin1
- * characters */
- UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
- if (result == BLI_UTF8_ERR) {
- len = 1;
- result = (uint)c;
- }
- /* end warning! */
-#else
- /* Without a fallback like '?', text drawing will stop on this value. */
- UTF8_GET(result, p, i, mask, len, '?');
-#endif
+ UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+ if (UNLIKELY(result == BLI_UTF8_ERR)) {
+ return BLI_UTF8_ERR;
}
-
*index += (size_t)len;
BLI_assert(*index <= p_len);
return result;
}
+/**
+ * UTF8 decoding that steps over the index (unless an error is encountered).
+ *
+ * \param p: The text to step over.
+ * \param p_len: The length of `p`.
+ * \param index: Index of `p` to step over.
+ * \return the code-point `(p + *index)` if there is a decoding error.
+ *
+ * \note Falls back to `LATIN1` for text drawing.
+ */
+uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
+ const size_t p_len,
+ size_t *__restrict index)
+{
+ uint result = BLI_str_utf8_as_unicode_step_or_error(p, p_len, index);
+ if (UNLIKELY(result == BLI_UTF8_ERR)) {
+ result = (uint)p[*index];
+ *index += 1;
+ }
+ BLI_assert(*index <= p_len);
+ return result;
+}
+
/* was g_unichar_to_utf8 */
/**
* BLI_str_utf8_from_unicode: