diff options
author | Campbell Barton <ideasman42@gmail.com> | 2021-12-09 12:01:44 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2021-12-09 12:01:44 +0300 |
commit | 9e365069afe156f33fadfad9705e1325f894cd54 (patch) | |
tree | 78373044d029feb51f987b45208e0c1a36958625 /source/blender/blenlib/BLI_string_utf8.h | |
parent | d8b42751625c915113b64f5a2d9c72f19f009fee (diff) |
Cleanup: move public doc-strings into headers for 'blenlib'
- Added space below non doc-string comments to make it clear
these aren't comments for the symbols directly below them.
- Use doxy sections for some headers.
- Minor improvements to doc-strings.
Ref T92709
Diffstat (limited to 'source/blender/blenlib/BLI_string_utf8.h')
-rw-r--r-- | source/blender/blenlib/BLI_string_utf8.h | 114 |
1 files changed, 109 insertions, 5 deletions
diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h index bf7547cc90b..1216af9216a 100644 --- a/source/blender/blenlib/BLI_string_utf8.h +++ b/source/blender/blenlib/BLI_string_utf8.h @@ -32,23 +32,84 @@ char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1, 2); +/** + * Find first UTF-8 invalid byte in given \a str, of \a length bytes. + * + * \return the offset of the first invalid byte. + */ ptrdiff_t BLI_str_utf8_invalid_byte(const char *str, size_t length) ATTR_NONNULL(1); +/** + * Remove any invalid UTF-8 byte (taking into account multi-bytes sequence of course). + * + * \return number of stripped bytes. + */ int BLI_str_utf8_invalid_strip(char *str, size_t length) ATTR_NONNULL(1); -/* warning, can return -1 on bad chars */ +/** + * \return The size (in bytes) of a single UTF-8 char. + * \warning Can return -1 on bad chars. + */ int BLI_str_utf8_size(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); +/** + * Use when we want to skip errors. + */ int BLI_str_utf8_size_safe(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); -/* copied from glib */ +/** + * \param p: a pointer to Unicode character encoded as UTF-8 + * + * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. + * If \a p does not point to a valid UTF-8 encoded character, results are + * undefined. If you are not sure that the bytes are complete + * valid Unicode characters, you should use g_utf8_get_char_validated() + * instead. + * + * Return value: the resulting character + */ unsigned int BLI_str_utf8_as_unicode(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); +/** + * UTF8 decoding that steps over the index (unless an error is encountered). + * + * \param p: The text to step over. + * \param p_len: The length of `p`. + * \param index: Index of `p` to step over. + * \return the code-point `(p + *index)` if there is a decoding error. + * + * \note Falls back to `LATIN1` for text drawing. + */ unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1, 3); +/** + * UTF8 decoding that steps over the index (unless an error is encountered). + * + * \param p: The text to step over. + * \param p_len: The length of `p`. + * \param index: Index of `p` to step over. + * \return the code-point or #BLI_UTF8_ERR if there is a decoding error. + * + * \note The behavior for clipped text (where `p_len` limits decoding trailing bytes) + * must have the same behavior is encountering a nil byte, + * so functions that only use the first part of a string has matching behavior to functions + * that null terminate the text. + */ unsigned int BLI_str_utf8_as_unicode_step_or_error( const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1, 3); size_t BLI_str_utf8_from_unicode_len(unsigned int c) ATTR_WARN_UNUSED_RESULT; +/** + * BLI_str_utf8_from_unicode: + * + * \param c: a Unicode character code + * \param outbuf: output buffer, must have at least `outbuf_len` bytes of space. + * If the length required by `c` exceeds `outbuf_len`, + * the bytes available bytes will be zeroed and `outbuf_len` returned. + * + * Converts a single character to UTF-8. + * + * \return number of bytes written. + */ size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf, const size_t outbuf_len) ATTR_NONNULL(2); size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, @@ -57,19 +118,57 @@ size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, size_t BLI_str_utf32_as_utf8(char *__restrict dst, const char32_t *__restrict src, const size_t maxncpy) ATTR_NONNULL(1, 2); +/** + * \return The UTF-32 len in UTF-8. + */ size_t BLI_str_utf32_as_utf8_len(const char32_t *src) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); +/** + * BLI_str_find_prev_char_utf8: + * \param str: pointer to the beginning of a UTF-8 encoded string + * \param p: pointer to some position within \a str + * + * Given a position \a p with a UTF-8 encoded string \a str, find the start + * of the previous UTF-8 character starting before. \a p Returns \a str_start if no + * UTF-8 characters are present in \a str_start before \a p. + * + * \a p does not have to be at the beginning of a UTF-8 character. No check + * is made to see if the character found is actually valid other than + * it starts with an appropriate byte. + * + * \return A pointer to the found character. + */ const char *BLI_str_find_prev_char_utf8(const char *p, const char *str_start) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1, 2); +/** + * \param p: a pointer to a position within a UTF-8 encoded string + * \param end: a pointer to the byte following the end of the string. + * + * Finds the start of the next UTF-8 character in the string after \a p + * + * \a p does not have to be at the beginning of a UTF-8 character. No check + * is made to see if the character found is actually valid other than + * it starts with an appropriate byte. + * + * \return a pointer to the found character or a pointer to the null terminating character '\0'. + */ const char *BLI_str_find_next_char_utf8(const char *p, const char *str_end) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1, 2); +/** + * \return the `wchar_t` length in UTF-8. + */ size_t BLI_wstrlen_utf8(const wchar_t *src) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT; size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes) ATTR_NONNULL(1, 2) ATTR_WARN_UNUSED_RESULT; size_t BLI_strlen_utf8(const char *strc) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT; size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes) ATTR_NONNULL(1, 3); +/** + * \param strc: the string to measure the length. + * \param maxlen: the string length (in bytes) + * \return the unicode length (not in bytes!) + */ size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT; size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, @@ -79,10 +178,14 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst, const char *__restrict src, const size_t maxncpy) ATTR_NONNULL(1, 2); -/* count columns that character/string occupies, based on wcwidth.c */ +/** + * Count columns that character/string occupies (based on `wcwidth.co`). + */ int BLI_wcwidth(char32_t ucs) ATTR_WARN_UNUSED_RESULT; int BLI_wcswidth(const char32_t *pwcs, size_t n) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); -/* warning, can return -1 on bad chars */ +/** + * \warning can return -1 on bad chars. + */ int BLI_str_utf8_char_width(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); int BLI_str_utf8_char_width_safe(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); @@ -111,7 +214,8 @@ int BLI_str_utf8_offset_to_column(const char *str, int offset) ATTR_WARN_UNUSED_ int BLI_str_utf8_offset_from_column(const char *str, int column) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); -#define BLI_UTF8_MAX 6 /* mem */ +/** Size in bytes. */ +#define BLI_UTF8_MAX 6 #define BLI_UTF8_WIDTH_MAX 2 /* columns */ #define BLI_UTF8_ERR ((unsigned int)-1) |