1 files changed, 375 insertions, 0 deletions
diff --git a/src/lua_tstring.c b/src/lua_tstring.c
new file mode 100644
index 0000000..f1ede73
--- /dev/null
+++ b/src/lua_tstring.c
@@ -0,0 +1,375 @@
+#include <limits.h>
+#include <string.h>
+#include <lua.h>
+#include <lualib.h>
+#include <lauxlib.h>
+
+#include "luamacro.h"
+
+size_t lua_utf8towcsZ(lua_State *L, const char *s, int len){
+	wchar_t UNCH = 0;
+    size_t cchWC = 0;                 // # of wchar_t code points generated
+    const unsigned char * pUTF8 = (const unsigned char *)s;
+    const unsigned char * pEnd = (const unsigned char *)(s + len);
+	luaL_Buffer b;
+	luaL_buffinit(L, &b);
+    while (pUTF8 < pEnd){
+        //  See if there are any trail bytes.
+        if (*pUTF8 < 0xC0) {// 192
+            //  Found ASCII.
+			UNCH = pUTF8[0];
+        }else if (pUTF8[0] < 0xE0){ //224
+			if ((pUTF8[1] & 0xC0) == 0x80) {
+				/* A two-byte-character lead-byte not followed by trail-byte represents itself.*/
+				UNCH = (wchar_t) (((pUTF8[0] & 0x1F) << 6) | (pUTF8[1] & 0x3F));
+				pUTF8 += 1;
+			}else{
+				/* A two-byte-character lead-byte not followed by trail-byte represents itself.	 */
+				UNCH = pUTF8[0];
+			}
+		}else if (pUTF8[0] < 0xF0) {//240
+			if (((pUTF8[1] & 0xC0) == 0x80) && ((pUTF8[2] & 0xC0) == 0x80)) {
+				/* Three-byte-character lead byte followed by two trail bytes.*/
+				UNCH = (wchar_t) (((pUTF8[0] & 0x0F) << 12) | ((pUTF8[1] & 0x3F) << 6) | (pUTF8[2] & 0x3F));
+				pUTF8 += 2;
+			}else{
+				/* Three-byte-character lead byte followed by two trail bytes.*/
+				UNCH = pUTF8[0];
+			}
+		}else{
+			UNCH = pUTF8[0];
+		}
+		luaL_addlstring(&b, (const char*)&UNCH, sizeof(wchar_t));
+        cchWC++;
+        pUTF8++;
+    }
+	luaL_addchar(&b, '\0');// the other '\0' will be added by lua
+	luaL_pushresult(&b);
+    //  Return the number of wchar_t characters written.
+    return (cchWC);
+}
+
+size_t lua_utf8towcs(lua_State *L, const char *s, int len){
+	wchar_t UNCH = 0;
+    size_t cchWC = 0;                 // # of wchar_t code points generated
+    const unsigned char * pUTF8 = (const unsigned char *)s;
+    const unsigned char * pEnd = (const unsigned char *)(s + len);
+	luaL_Buffer b;
+	luaL_buffinit(L, &b);
+    while (pUTF8 < pEnd){
+        //  See if there are any trail bytes.
+        if (*pUTF8 < 0xC0) {// 192
+            //  Found ASCII.
+			UNCH = pUTF8[0];
+        }else if (pUTF8[0] < 0xE0){ //224
+			if ((pUTF8[1] & 0xC0) == 0x80) {
+				/* A two-byte-character lead-byte not followed by trail-byte represents itself.*/
+				UNCH = (wchar_t) (((pUTF8[0] & 0x1F) << 6) | (pUTF8[1] & 0x3F));
+				pUTF8 += 1;
+			}else{
+				/* A two-byte-character lead-byte not followed by trail-byte represents itself.	 */
+				UNCH = pUTF8[0];
+			}
+		}else if (pUTF8[0] < 0xF0) {//240
+			if (((pUTF8[1] & 0xC0) == 0x80) && ((pUTF8[2] & 0xC0) == 0x80)) {
+				/* Three-byte-character lead byte followed by two trail bytes.*/
+				UNCH = (wchar_t) (((pUTF8[0] & 0x0F) << 12) | ((pUTF8[1] & 0x3F) << 6) | (pUTF8[2] & 0x3F));
+				pUTF8 += 2;
+			}else{
+				/* Three-byte-character lead byte followed by two trail bytes.*/
+				UNCH = pUTF8[0];
+			}
+		}else{
+			UNCH = pUTF8[0];
+		}
+		luaL_addlstring(&b, (const char*)&UNCH, sizeof(wchar_t));
+        cchWC++;
+        pUTF8++;
+    }
+	luaL_pushresult(&b);
+    //  Return the number of wchar_t characters written.
+    return (cchWC);
+}
+
+//  Constant Declarations.
+#define ASCII             0x007f
+#define UTF8_2_MAX        0x07ff  // max UTF8 2-byte sequence (32 * 64 = 2048)
+#define UTF8_1ST_OF_2     0xc0    // 110x xxxx
+#define UTF8_1ST_OF_3     0xe0    // 1110 xxxx
+#define UTF8_TRAIL        0x80    // 10xx xxxx
+#define HIGER_6_BIT(u)    ((u) >> 12)
+#define MIDDLE_6_BIT(u)   (((u) & 0x0fc0) >> 6)
+#define LOWER_6_BIT(u)    ((u) & 0x003f)
+
+#ifndef HIBYTE
+#define BYTE unsigned char
+#define HIBYTE(w)   ((BYTE) (((wchar_t) (w) >> 8) & 0xFF))  
+#define LOBYTE(w)   ((BYTE) (w))  
+#endif
+
+//  Maps a wchar_t character string to its UTF-8 string counterpart.
+size_t lua_wcstoutf8(lua_State *L, const wchar_t *s, size_t cchSrc){
+	const wchar_t * lpWC = s;
+    size_t cchU8 = 0;                // # of UTF8 chars generated
+
+	luaL_Buffer b;
+	luaL_buffinit(L, &b);
+
+    while (cchSrc--)   {
+        if (*lpWC <= ASCII){
+            //  Found ASCII.
+			luaL_addchar(&b, (char)*lpWC);
+            cchU8++;
+		}else if (*lpWC <= UTF8_2_MAX){
+            //  Found 2 byte sequence if < 0x07ff (11 bits).
+            //  Use upper 5 bits in first byte.
+            //  Use lower 6 bits in second byte.
+			luaL_addchar(&b, (char)(UTF8_1ST_OF_2 | (*lpWC >> 6)));
+			luaL_addchar(&b, (char)(UTF8_TRAIL    | LOWER_6_BIT(*lpWC)));
+			cchU8 += 2;
+        }else{
+            //  Found 3 byte sequence.
+            //  Use upper  4 bits in first byte.
+            //  Use middle 6 bits in second byte.
+            //  Use lower  6 bits in third byte.
+			luaL_addchar(&b, (char)(UTF8_1ST_OF_3 | (*lpWC >> 12)));
+			luaL_addchar(&b, (char)(UTF8_TRAIL    | MIDDLE_6_BIT(*lpWC)));
+			luaL_addchar(&b, (char)(UTF8_TRAIL    | LOWER_6_BIT(*lpWC)));
+			cchU8 += 3;
+        }
+        lpWC++;
+    }
+	luaL_pushresult(&b);
+    //  Return the number of UTF-8 characters written.
+    return (cchU8);
+}
+
+size_t lua_chartowcsZ(lua_State *L, const char *s, int len){
+	luaL_Buffer b;
+    size_t cchWC = 0;
+	wchar_t UNCH = 0;
+    const unsigned char * pCur = (const unsigned char *)s;
+    const unsigned char * pEnd = (const unsigned char *)(s + len);
+	luaL_buffinit(L, &b);
+    while (pCur < pEnd){
+		UNCH = pCur[0];
+		luaL_addlstring(&b, (const char*)&UNCH, sizeof(wchar_t));
+        cchWC++;
+        pCur++;
+	}
+	luaL_addchar(&b, '\0');// the other '\0' will be added by lua
+	luaL_pushresult(&b);
+    //  Return the number of wchar_t characters written.
+    return (cchWC);
+}
+int wc2utf8(char *d, wchar_t ch){
+	if (ch < 0x80) {
+		*d++ = (char)ch;
+		return 1;
+	}
+	if (ch < 0x800) {
+		*d++ = (char)(( ch >>  6)         | 0xc0);
+		*d++ = (char)(( ch        & 0x3f) | 0x80);
+		return 2;
+	}
+	{
+		*d++ = (char)(( ch >> 12)         | 0xe0);
+		*d++ = (char)(((ch >>  6) & 0x3f) | 0x80);
+		*d++ = (char)(( ch        & 0x3f) | 0x80);
+		return 3;
+	}
+}
+// push a wchar_t character value convert equiv utf8 chars
+void lua_pushutf8_from_wchar(lua_State *L, wchar_t ch){
+	char buf[4] = {0,0,0,0};
+	lua_pushlstring(L, buf, wc2utf8(buf, ch));
+}
+// add a wchar_t character to buffer converted to equiv utf8 chars
+void lua_addutf8_from_wchar(luaL_Buffer * pB, wchar_t ch){
+	luaL_addsize(pB, wc2utf8(luaL_prepbuffer (pB), ch));
+}
+// gets an int character value, if string is >= 2 chars checks if utf
+wchar_t lua_checkwchar_from_utf8(lua_State *L, int i){
+	const char* psz = luaL_checkstring(L, i);
+    if ((psz[0] && psz[1] == 0) || psz[0] == 0) {
+		return psz[0];// single character
+    }else if ((psz[0] >= 0xC0) && (psz[0] < 0xE0)
+		&&	(psz[1] & 0xC0) == 0x80
+		&&	(psz[2] == 0)// Two-byte-character lead-byte followed by a trail-byte.
+	){	return  (wchar_t)(((psz[0] & 0x1F) << 6) | (psz[1] & 0x3F));
+	}else if ((psz[0] >= 0xE0) && (psz[0] < 0xF0)
+		&&	((psz[1] & 0xC0) == 0x80)
+		&&	((psz[2] & 0xC0) == 0x80)
+		&&	(psz[3] == 0)// Three-utf-character lead utf followed by two trail bytes.
+	){	return (wchar_t)(((psz[0] & 0x0F) << 12) | ((psz[1] & 0x3F) << 6) | (psz[2] & 0x3F));
+	}else{
+		luaL_argerror(L, i, "character expected");
+	}
+	return 0;
+}
+// lua utf8 string to wide string
+const wchar_t *lua_tolwcs_from_utf8(lua_State *L, int narg, size_t* l){
+	size_t ulen = 0;
+	const char * psz;
+	narg = lua_absindex(L, narg);
+#if LUA_VERSION_NUM >= 501
+	psz = lua_tolstring(L, narg, &ulen);
+#else
+	psz = lua_tostring(L, narg);
+	ulen = lua_strlen(L, narg);
+#endif
+	if(psz){
+		ulen = (size_t)lua_utf8towcsZ(L, psz, (int)ulen);
+		if(l)*l = ulen;
+		lua_replace (L, narg);
+		return (const wchar_t *)lua_tostring(L, narg);
+	}else{
+		return NULL;
+	}
+}
+// lua utf8 string to wide string, with len
+const wchar_t *lua_checklwcs_from_utf8(lua_State *L, int narg, size_t* l){
+	size_t ulen = 0;
+	const char * psz;
+	narg = lua_absindex(L, narg);
+	psz = luaL_checklstring(L, narg, &ulen);
+	ulen = (size_t)lua_utf8towcsZ(L, psz, (int)ulen);
+	if(l)*l = ulen;
+	lua_replace (L, narg);
+	return (const wchar_t *)lua_tostring(L, narg);
+}
+// lua utf8 string to wide string, with len, optional
+const wchar_t *lua_optlwcs_from_utf8(lua_State *L, int narg, const wchar_t *def, size_t *len){
+	if (lua_isnoneornil(L, narg)) {
+		if (len)
+			*len = (def ? wcslen(def) : 0);
+		return def;
+	}
+	else return lua_checklwcs_from_utf8(L, narg, len);
+}
+// lua push wide string, convert to utf8
+void lua_pushutf8_from_wcs (lua_State *L, const wchar_t *s) {
+  if (s == NULL)
+    lua_pushnil(L);
+  else
+    lua_wcstoutf8(L, s, wcslen(s));
+}
+
+
+size_t lua_cstowcsZ(lua_State *L, const char *s, int len){
+	wchar_t UNCH = 0;
+    size_t cchWC = 0;                 // # of wchar_t code points generated
+    const unsigned char * pStr = (const unsigned char *)s;
+    const unsigned char * pEnd = (const unsigned char *)(s + len);
+	luaL_Buffer b;
+	luaL_buffinit(L, &b);
+
+    while (pStr < pEnd){
+		UNCH = *pStr;
+		luaL_addlstring(&b, (const char*)&UNCH, sizeof(wchar_t));
+        cchWC++;
+        pStr++;
+    }
+	luaL_addchar(&b, '\0');// the other '\0' will be added by lua
+	luaL_pushresult(&b);
+    //  Return the number of wchar_t characters written.
+    return (cchWC);
+}
+// Lua char string to wide string, with len
+const wchar_t *lua_checklwcs_from_char(lua_State *L, int narg, size_t* l){
+	size_t ulen = 0;
+	const char * psz;
+	narg = lua_absindex(L, narg);
+	psz = luaL_checklstring(L, narg, &ulen);
+	ulen = (size_t)lua_cstowcsZ(L, psz, (int)ulen);
+	if(l)*l = ulen;
+	lua_replace (L, narg);
+	return (const wchar_t *)lua_tostring(L, narg);
+}
+// Lua char string to wide string, with len, optional
+const wchar_t *lua_optlwcs_from_char (lua_State *L, int narg, const wchar_t *def, size_t *len){
+	if (lua_isnoneornil(L, narg)) {
+		if (len)
+			*len = (def ? wcslen(def) : 0);
+		return def;
+	}
+	else return lua_checklwcs_from_char(L, narg, len);
+}
+//  Maps a wchar_t character string to its char string counterpart.
+size_t lua_wcstochar(lua_State *L, const wchar_t *s, size_t cchSrc){
+	const wchar_t * lpWC = s;
+    size_t cch = 0;                // # of UTF8 chars generated
+
+	luaL_Buffer b;
+	luaL_buffinit(L, &b);
+
+    while (cchSrc--)   {
+        if (*lpWC <= 0x00ff){
+			luaL_addchar(&b, (char)*lpWC);
+		}else{
+			luaL_addchar(&b, '?');// no choice
+        }
+		cch++;
+        lpWC++;
+    }
+	luaL_pushresult(&b);
+    //  Return the number of char characters written.
+    return (cch);
+}
+
+
+
+
+
+
+
+/*
+size_t lua_utf8towcsZ2(lua_State *L, const char *s, int len){
+    size_t cchWC = 0;                 // # of wchar_t code points generated
+    const unsigned char * pUTF8 = (const unsigned char *)s;
+    const unsigned char * pEnd = (const unsigned char *)(s + len);
+    wchar_t * pBuf = NULL;
+	const wchar_t * pBuf0 = NULL;
+	const wchar_t * pBuf1 = NULL;
+	luaL_Buffer b;
+	luaL_buffinit(L, &b);
+    while (pUTF8 < pEnd){
+		pBuf = (wchar_t *)luaL_prepbuffer(&b);
+		pBuf0 = pBuf;
+		pBuf1 = pBuf + (LUAL_BUFFERSIZE-sizeof(wchar_t));
+		while(pUTF8 < pEnd && pBuf <= pBuf1){
+			//  See if there are any trail bytes.
+			if (*pUTF8 < 0xC0) {// 192
+				//  Found ASCII.
+				*pBuf++ = pUTF8[0];
+			}else if (pUTF8[0] < 0xE0){ //224
+				if ((pUTF8[1] & 0xC0) == 0x80) {
+					// A two-byte-character lead-byte not followed by trail-byte represents itself.
+					*pBuf++ = (wchar_t) (((pUTF8[0] & 0x1F) << 6) | (pUTF8[1] & 0x3F));
+					pUTF8 += 1;
+				}else{
+					// A two-byte-character lead-byte not followed by trail-byte represents itself.
+					*pBuf++ = pUTF8[0];
+				}
+			}else if (pUTF8[0] < 0xF0) {//240
+				if (((pUTF8[1] & 0xC0) == 0x80) && ((pUTF8[2] & 0xC0) == 0x80)) {
+					// Three-byte-character lead byte followed by two trail bytes.
+					*pBuf++ = (wchar_t) (((pUTF8[0] & 0x0F) << 12) | ((pUTF8[1] & 0x3F) << 6) | (pUTF8[2] & 0x3F));
+					pUTF8 += 2;
+				}else{
+					// Three-byte-character lead byte followed by two trail bytes.
+					*pBuf++ = pUTF8[0];
+				}
+			}else{
+				*pBuf++ = pUTF8[0];
+			}
+			cchWC++;
+			pUTF8++;
+		}
+		luaL_addsize(&b, (size_t)(pBuf0 - pBuf));
+    }
+	luaL_addchar(&b, '\0');// the other '\0' will be added by lua
+	luaL_pushresult(&b);
+    //  Return the number of wchar_t characters written.
+    return (cchWC);
+}//*/
+\ No newline at end of file