From 5ba213a424185e723a4de5833931847f0fe38c49 Mon Sep 17 00:00:00 2001
From: Campbell Barton <ideasman42@gmail.com>
Date: Thu, 15 Sep 2011 08:07:42 +0000
Subject: move utf8 string.c functions into their own file, also add python tip
 for printing operators.

---
 source/blender/blenlib/intern/string.c | 116 +--------------------------------
 1 file changed, 1 insertion(+), 115 deletions(-)

(limited to 'source/blender/blenlib/intern/string.c')

diff --git a/source/blender/blenlib/intern/string.c b/source/blender/blenlib/intern/string.c
index ae5fa40f3b9..8315161aeda 100644
--- a/source/blender/blenlib/intern/string.c
+++ b/source/blender/blenlib/intern/string.c
@@ -1,8 +1,4 @@
-/* util.c
- *
- * various string, file, list operations.
- *
- *
+/*
  * $Id$
  *
  * ***** BEGIN GPL LICENSE BLOCK *****
@@ -399,116 +395,6 @@ size_t BLI_strnlen(const char *str, size_t maxlen)
 	return end ? (size_t) (end - str) : maxlen;
 }
 
-/* from libswish3, originally called u8_isvalid(),
- * modified to return the index of the bad character (byte index not utf).
- * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
-
-/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
-
-   length is in bytes, since without knowing whether the string is valid
-   it's hard to know how many characters there are! */
-
-static const char trailingBytesForUTF8[256] = {
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-int BLI_utf8_invalid_byte(const char *str, int length)
-{
-	const unsigned char *p, *pend = (unsigned char*)str + length;
-	unsigned char c;
-	int ab;
-
-	for (p = (unsigned char*)str; p < pend; p++) {
-		c = *p;
-		if (c < 128)
-			continue;
-		if ((c & 0xc0) != 0xc0)
-			goto utf8_error;
-		ab = trailingBytesForUTF8[c];
-		if (length < ab)
-			goto utf8_error;
-		length -= ab;
-
-		p++;
-		/* Check top bits in the second byte */
-		if ((*p & 0xc0) != 0x80)
-			goto utf8_error;
-
-		/* Check for overlong sequences for each different length */
-		switch (ab) {
-			/* Check for xx00 000x */
-		case 1:
-			if ((c & 0x3e) == 0) goto utf8_error;
-			continue;   /* We know there aren't any more bytes to check */
-
-			/* Check for 1110 0000, xx0x xxxx */
-		case 2:
-			if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
-			break;
-
-			/* Check for 1111 0000, xx00 xxxx */
-		case 3:
-			if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
-			break;
-
-			/* Check for 1111 1000, xx00 0xxx */
-		case 4:
-			if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
-			break;
-
-			/* Check for leading 0xfe or 0xff,
-			   and then for 1111 1100, xx00 00xx */
-		case 5:
-			if (c == 0xfe || c == 0xff ||
-				(c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
-			break;
-		}
-
-		/* Check for valid bytes after the 2nd, if any; all must start 10 */
-		while (--ab > 0) {
-			if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
-			p++; /* do this after so we get usable offset - campbell */
-		}
-	}
-
-	return -1;
-
-utf8_error:
-
-	return (int)((char *)p - (char *)str) - 1;
-}
-
-int BLI_utf8_invalid_strip(char *str, int length)
-{
-	int bad_char, tot= 0;
-
-	while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) {
-		str += bad_char;
-		length -= bad_char;
-
-		if(length == 0) {
-			/* last character bad, strip it */
-			*str= '\0';
-			tot++;
-			break;
-		}
-		else {
-			/* strip, keep looking */
-			memmove(str, str + 1, length);
-			tot++;
-		}
-	}
-
-	return tot;
-}
-
 void BLI_ascii_strtolower(char *str, int len)
 {
 	int i;
-- 
cgit v1.2.3