diff options
author | Campbell Barton <ideasman42@gmail.com> | 2011-09-15 12:07:42 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2011-09-15 12:07:42 +0400 |
commit | 5ba213a424185e723a4de5833931847f0fe38c49 (patch) | |
tree | 13d8a0e8f36ff57434943a1a1d96d148586f4304 /source/blender | |
parent | afbb207a994d09750efab29dc56cfe4c2548a709 (diff) |
move utf8 string.c functions into their own file, also add python tip for printing operators.
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/blenlib/BLI_string.h | 8 | ||||
-rw-r--r-- | source/blender/blenlib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | source/blender/blenlib/intern/string.c | 116 | ||||
-rw-r--r-- | source/blender/blenlib/intern/string_utf8.c | 143 |
4 files changed, 150 insertions, 118 deletions
diff --git a/source/blender/blenlib/BLI_string.h b/source/blender/blenlib/BLI_string.h index 4a0c2ab9482..be77e18c24b 100644 --- a/source/blender/blenlib/BLI_string.h +++ b/source/blender/blenlib/BLI_string.h @@ -139,12 +139,14 @@ size_t BLI_strnlen(const char *str, size_t maxlen); void BLI_timestr(double _time, char *str); /* time var is global */ -int BLI_utf8_invalid_byte(const char *str, int length); -int BLI_utf8_invalid_strip(char *str, int length); - void BLI_ascii_strtolower(char *str, int len); void BLI_ascii_strtoupper(char *str, int len); + +/* string_utf8.c - may move these into their own header some day - campbell */ +int BLI_utf8_invalid_byte(const char *str, int length); +int BLI_utf8_invalid_strip(char *str, int length); + #ifdef __cplusplus } #endif diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index b4fc983008c..aa822731474 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -80,6 +80,7 @@ set(SRC intern/scanfill.c intern/storage.c intern/string.c + intern/string_utf8.c intern/threads.c intern/time.c intern/uvproject.c diff --git a/source/blender/blenlib/intern/string.c b/source/blender/blenlib/intern/string.c index ae5fa40f3b9..8315161aeda 100644 --- a/source/blender/blenlib/intern/string.c +++ b/source/blender/blenlib/intern/string.c @@ -1,8 +1,4 @@ -/* util.c - * - * various string, file, list operations. - * - * +/* * $Id$ * * ***** BEGIN GPL LICENSE BLOCK ***** @@ -399,116 +395,6 @@ size_t BLI_strnlen(const char *str, size_t maxlen) return end ? (size_t) (end - str) : maxlen; } -/* from libswish3, originally called u8_isvalid(), - * modified to return the index of the bad character (byte index not utf). - * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */ - -/* based on the valid_utf8 routine from the PCRE library by Philip Hazel - - length is in bytes, since without knowing whether the string is valid - it's hard to know how many characters there are! */ - -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -int BLI_utf8_invalid_byte(const char *str, int length) -{ - const unsigned char *p, *pend = (unsigned char*)str + length; - unsigned char c; - int ab; - - for (p = (unsigned char*)str; p < pend; p++) { - c = *p; - if (c < 128) - continue; - if ((c & 0xc0) != 0xc0) - goto utf8_error; - ab = trailingBytesForUTF8[c]; - if (length < ab) - goto utf8_error; - length -= ab; - - p++; - /* Check top bits in the second byte */ - if ((*p & 0xc0) != 0x80) - goto utf8_error; - - /* Check for overlong sequences for each different length */ - switch (ab) { - /* Check for xx00 000x */ - case 1: - if ((c & 0x3e) == 0) goto utf8_error; - continue; /* We know there aren't any more bytes to check */ - - /* Check for 1110 0000, xx0x xxxx */ - case 2: - if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error; - break; - - /* Check for 1111 0000, xx00 xxxx */ - case 3: - if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error; - break; - - /* Check for 1111 1000, xx00 0xxx */ - case 4: - if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error; - break; - - /* Check for leading 0xfe or 0xff, - and then for 1111 1100, xx00 00xx */ - case 5: - if (c == 0xfe || c == 0xff || - (c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error; - break; - } - - /* Check for valid bytes after the 2nd, if any; all must start 10 */ - while (--ab > 0) { - if ((*(p+1) & 0xc0) != 0x80) goto utf8_error; - p++; /* do this after so we get usable offset - campbell */ - } - } - - return -1; - -utf8_error: - - return (int)((char *)p - (char *)str) - 1; -} - -int BLI_utf8_invalid_strip(char *str, int length) -{ - int bad_char, tot= 0; - - while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) { - str += bad_char; - length -= bad_char; - - if(length == 0) { - /* last character bad, strip it */ - *str= '\0'; - tot++; - break; - } - else { - /* strip, keep looking */ - memmove(str, str + 1, length); - tot++; - } - } - - return tot; -} - void BLI_ascii_strtolower(char *str, int len) { int i; diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c new file mode 100644 index 00000000000..8f7e4518e03 --- /dev/null +++ b/source/blender/blenlib/intern/string_utf8.c @@ -0,0 +1,143 @@ +/* + * $Id: + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2011 Blender Foundation. + * All rights reserved. + * + * Contributor(s): Campbell Barton. + * + * ***** END GPL LICENSE BLOCK ***** + * + */ + + /** \file blender/blenlib/intern/string_utf8.c + * \ingroup bli + */ + +#include <string.h> + +/* from libswish3, originally called u8_isvalid(), + * modified to return the index of the bad character (byte index not utf). + * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */ + +/* based on the valid_utf8 routine from the PCRE library by Philip Hazel + + length is in bytes, since without knowing whether the string is valid + it's hard to know how many characters there are! */ + +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +int BLI_utf8_invalid_byte(const char *str, int length) +{ + const unsigned char *p, *pend = (unsigned char*)str + length; + unsigned char c; + int ab; + + for (p = (unsigned char*)str; p < pend; p++) { + c = *p; + if (c < 128) + continue; + if ((c & 0xc0) != 0xc0) + goto utf8_error; + ab = trailingBytesForUTF8[c]; + if (length < ab) + goto utf8_error; + length -= ab; + + p++; + /* Check top bits in the second byte */ + if ((*p & 0xc0) != 0x80) + goto utf8_error; + + /* Check for overlong sequences for each different length */ + switch (ab) { + /* Check for xx00 000x */ + case 1: + if ((c & 0x3e) == 0) goto utf8_error; + continue; /* We know there aren't any more bytes to check */ + + /* Check for 1110 0000, xx0x xxxx */ + case 2: + if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error; + break; + + /* Check for 1111 0000, xx00 xxxx */ + case 3: + if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error; + break; + + /* Check for 1111 1000, xx00 0xxx */ + case 4: + if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error; + break; + + /* Check for leading 0xfe or 0xff, + and then for 1111 1100, xx00 00xx */ + case 5: + if (c == 0xfe || c == 0xff || + (c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error; + break; + } + + /* Check for valid bytes after the 2nd, if any; all must start 10 */ + while (--ab > 0) { + if ((*(p+1) & 0xc0) != 0x80) goto utf8_error; + p++; /* do this after so we get usable offset - campbell */ + } + } + + return -1; + +utf8_error: + + return (int)((char *)p - (char *)str) - 1; +} + +int BLI_utf8_invalid_strip(char *str, int length) +{ + int bad_char, tot= 0; + + while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) { + str += bad_char; + length -= bad_char; + + if(length == 0) { + /* last character bad, strip it */ + *str= '\0'; + tot++; + break; + } + else { + /* strip, keep looking */ + memmove(str, str + 1, length); + tot++; + } + } + + return tot; +} |