move utf8 string.c functions into their own file, also add python tip for printing operators.

author: Campbell Barton <ideasman42@gmail.com> 2011-09-15 12:07:42 +0400
committer: Campbell Barton <ideasman42@gmail.com> 2011-09-15 12:07:42 +0400
commit: 5ba213a424185e723a4de5833931847f0fe38c49 (patch)
tree: 13d8a0e8f36ff57434943a1a1d96d148586f4304 /source/blender
parent: afbb207a994d09750efab29dc56cfe4c2548a709 (diff)
4 files changed, 150 insertions, 118 deletions
diff --git a/source/blender/blenlib/BLI_string.h b/source/blender/blenlib/BLI_string.h
index 4a0c2ab9482..be77e18c24b 100644
--- a/source/blender/blenlib/BLI_string.h
+++ b/source/blender/blenlib/BLI_string.h
@@ -139,12 +139,14 @@ size_t BLI_strnlen(const char *str, size_t maxlen);
 
 void BLI_timestr(double _time, char *str); /* time var is global */
 
-int BLI_utf8_invalid_byte(const char *str, int length);
-int BLI_utf8_invalid_strip(char *str, int length);
-
 void BLI_ascii_strtolower(char *str, int len);
 void BLI_ascii_strtoupper(char *str, int len);
 
+
+/* string_utf8.c - may move these into their own header some day - campbell */
+int BLI_utf8_invalid_byte(const char *str, int length);
+int BLI_utf8_invalid_strip(char *str, int length);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index b4fc983008c..aa822731474 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -80,6 +80,7 @@ set(SRC
 	intern/scanfill.c
 	intern/storage.c
 	intern/string.c
+	intern/string_utf8.c
 	intern/threads.c
 	intern/time.c
 	intern/uvproject.c
diff --git a/source/blender/blenlib/intern/string.c b/source/blender/blenlib/intern/string.c
index ae5fa40f3b9..8315161aeda 100644
--- a/source/blender/blenlib/intern/string.c
+++ b/source/blender/blenlib/intern/string.c
@@ -1,8 +1,4 @@
-/* util.c
- *
- * various string, file, list operations.
- *
- *
+/*
  * $Id$
  *
  * ***** BEGIN GPL LICENSE BLOCK *****
@@ -399,116 +395,6 @@ size_t BLI_strnlen(const char *str, size_t maxlen)
 	return end ? (size_t) (end - str) : maxlen;
 }
 
-/* from libswish3, originally called u8_isvalid(),
- * modified to return the index of the bad character (byte index not utf).
- * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
-
-/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
-
-   length is in bytes, since without knowing whether the string is valid
-   it's hard to know how many characters there are! */
-
-static const char trailingBytesForUTF8[256] = {
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-int BLI_utf8_invalid_byte(const char *str, int length)
-{
-	const unsigned char *p, *pend = (unsigned char*)str + length;
-	unsigned char c;
-	int ab;
-
-	for (p = (unsigned char*)str; p < pend; p++) {
-		c = *p;
-		if (c < 128)
-			continue;
-		if ((c & 0xc0) != 0xc0)
-			goto utf8_error;
-		ab = trailingBytesForUTF8[c];
-		if (length < ab)
-			goto utf8_error;
-		length -= ab;
-
-		p++;
-		/* Check top bits in the second byte */
-		if ((*p & 0xc0) != 0x80)
-			goto utf8_error;
-
-		/* Check for overlong sequences for each different length */
-		switch (ab) {
-			/* Check for xx00 000x */
-		case 1:
-			if ((c & 0x3e) == 0) goto utf8_error;
-			continue;   /* We know there aren't any more bytes to check */
-
-			/* Check for 1110 0000, xx0x xxxx */
-		case 2:
-			if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
-			break;
-
-			/* Check for 1111 0000, xx00 xxxx */
-		case 3:
-			if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
-			break;
-
-			/* Check for 1111 1000, xx00 0xxx */
-		case 4:
-			if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
-			break;
-
-			/* Check for leading 0xfe or 0xff,
-			   and then for 1111 1100, xx00 00xx */
-		case 5:
-			if (c == 0xfe || c == 0xff ||
-				(c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
-			break;
-		}
-
-		/* Check for valid bytes after the 2nd, if any; all must start 10 */
-		while (--ab > 0) {
-			if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
-			p++; /* do this after so we get usable offset - campbell */
-		}
-	}
-
-	return -1;
-
-utf8_error:
-
-	return (int)((char *)p - (char *)str) - 1;
-}
-
-int BLI_utf8_invalid_strip(char *str, int length)
-{
-	int bad_char, tot= 0;
-
-	while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) {
-		str += bad_char;
-		length -= bad_char;
-
-		if(length == 0) {
-			/* last character bad, strip it */
-			*str= '\0';
-			tot++;
-			break;
-		}
-		else {
-			/* strip, keep looking */
-			memmove(str, str + 1, length);
-			tot++;
-		}
-	}
-
-	return tot;
-}
-
 void BLI_ascii_strtolower(char *str, int len)
 {
 	int i;
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
new file mode 100644
index 00000000000..8f7e4518e03
--- /dev/null
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -0,0 +1,143 @@
+/*
+ * $Id:
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2011 Blender Foundation.
+ * All rights reserved.
+ *
+ * Contributor(s): Campbell Barton.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ * 
+ */
+ 
+ /** \file blender/blenlib/intern/string_utf8.c
+ *  \ingroup bli
+ */
+ 
+#include <string.h>
+
+/* from libswish3, originally called u8_isvalid(),
+ * modified to return the index of the bad character (byte index not utf).
+ * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
+
+/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
+
+   length is in bytes, since without knowing whether the string is valid
+   it's hard to know how many characters there are! */
+
+static const char trailingBytesForUTF8[256] = {
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+int BLI_utf8_invalid_byte(const char *str, int length)
+{
+	const unsigned char *p, *pend = (unsigned char*)str + length;
+	unsigned char c;
+	int ab;
+
+	for (p = (unsigned char*)str; p < pend; p++) {
+		c = *p;
+		if (c < 128)
+			continue;
+		if ((c & 0xc0) != 0xc0)
+			goto utf8_error;
+		ab = trailingBytesForUTF8[c];
+		if (length < ab)
+			goto utf8_error;
+		length -= ab;
+
+		p++;
+		/* Check top bits in the second byte */
+		if ((*p & 0xc0) != 0x80)
+			goto utf8_error;
+
+		/* Check for overlong sequences for each different length */
+		switch (ab) {
+			/* Check for xx00 000x */
+		case 1:
+			if ((c & 0x3e) == 0) goto utf8_error;
+			continue;   /* We know there aren't any more bytes to check */
+
+			/* Check for 1110 0000, xx0x xxxx */
+		case 2:
+			if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
+			break;
+
+			/* Check for 1111 0000, xx00 xxxx */
+		case 3:
+			if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
+			break;
+
+			/* Check for 1111 1000, xx00 0xxx */
+		case 4:
+			if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
+			break;
+
+			/* Check for leading 0xfe or 0xff,
+			   and then for 1111 1100, xx00 00xx */
+		case 5:
+			if (c == 0xfe || c == 0xff ||
+				(c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
+			break;
+		}
+
+		/* Check for valid bytes after the 2nd, if any; all must start 10 */
+		while (--ab > 0) {
+			if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
+			p++; /* do this after so we get usable offset - campbell */
+		}
+	}
+
+	return -1;
+
+utf8_error:
+
+	return (int)((char *)p - (char *)str) - 1;
+}
+
+int BLI_utf8_invalid_strip(char *str, int length)
+{
+	int bad_char, tot= 0;
+
+	while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) {
+		str += bad_char;
+		length -= bad_char;
+
+		if(length == 0) {
+			/* last character bad, strip it */
+			*str= '\0';
+			tot++;
+			break;
+		}
+		else {
+			/* strip, keep looking */
+			memmove(str, str + 1, length);
+			tot++;
+		}
+	}
+
+	return tot;
+}
author	Campbell Barton <ideasman42@gmail.com>	2011-09-15 12:07:42 +0400
committer	Campbell Barton <ideasman42@gmail.com>	2011-09-15 12:07:42 +0400
commit	5ba213a424185e723a4de5833931847f0fe38c49 (patch)
tree	13d8a0e8f36ff57434943a1a1d96d148586f4304 /source/blender
parent	afbb207a994d09750efab29dc56cfe4c2548a709 (diff)