Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kornelski/7z.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'CPP/Common/UTFConvert.cpp')
-rwxr-xr-xCPP/Common/UTFConvert.cpp130
1 files changed, 92 insertions, 38 deletions
diff --git a/CPP/Common/UTFConvert.cpp b/CPP/Common/UTFConvert.cpp
index e15695bb..9d1fd005 100755
--- a/CPP/Common/UTFConvert.cpp
+++ b/CPP/Common/UTFConvert.cpp
@@ -5,87 +5,141 @@
#include "UTFConvert.h"
#include "Types.h"
-static Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-// These functions are for UTF8 <-> UTF16 conversion.
-
-bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
+static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
{
- dest.Empty();
- for(int i = 0; i < src.Length();)
+ size_t destPos = 0, srcPos = 0;
+ for (;;)
{
- Byte c = (Byte)src[i++];
+ Byte c;
+ int numAdds;
+ if (srcPos == srcLen)
+ {
+ *destLen = destPos;
+ return True;
+ }
+ c = (Byte)src[srcPos++];
+
if (c < 0x80)
{
- dest += (wchar_t)c;
+ if (dest)
+ dest[destPos] = (wchar_t)c;
+ destPos++;
continue;
}
- if(c < 0xC0)
- return false;
- int numAdds;
+ if (c < 0xC0)
+ break;
for (numAdds = 1; numAdds < 5; numAdds++)
if (c < kUtf8Limits[numAdds])
break;
UInt32 value = (c - kUtf8Limits[numAdds - 1]);
+
do
{
- if (i >= src.Length())
- return false;
- Byte c2 = (Byte)src[i++];
+ Byte c2;
+ if (srcPos == srcLen)
+ break;
+ c2 = (Byte)src[srcPos++];
if (c2 < 0x80 || c2 >= 0xC0)
- return false;
+ break;
value <<= 6;
value |= (c2 - 0x80);
- numAdds--;
}
- while(numAdds > 0);
+ while (--numAdds != 0);
+
if (value < 0x10000)
- dest += (wchar_t)(value);
+ {
+ if (dest)
+ dest[destPos] = (wchar_t)value;
+ destPos++;
+ }
else
{
value -= 0x10000;
if (value >= 0x100000)
- return false;
- dest += (wchar_t)(0xD800 + (value >> 10));
- dest += (wchar_t)(0xDC00 + (value & 0x3FF));
+ break;
+ if (dest)
+ {
+ dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
+ dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
+ }
+ destPos += 2;
}
}
- return true;
+ *destLen = destPos;
+ return False;
}
-bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
+static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
{
- dest.Empty();
- for(int i = 0; i < src.Length();)
+ size_t destPos = 0, srcPos = 0;
+ for (;;)
{
- UInt32 value = (UInt32)src[i++];
+ unsigned numAdds;
+ UInt32 value;
+ if (srcPos == srcLen)
+ {
+ *destLen = destPos;
+ return True;
+ }
+ value = src[srcPos++];
if (value < 0x80)
{
- dest += (char)value;
+ if (dest)
+ dest[destPos] = (char)value;
+ destPos++;
continue;
}
if (value >= 0xD800 && value < 0xE000)
{
- if (value >= 0xDC00)
- return false;
- if (i >= src.Length())
- return false;
- UInt32 c2 = (UInt32)src[i++];
+ UInt32 c2;
+ if (value >= 0xDC00 || srcPos == srcLen)
+ break;
+ c2 = src[srcPos++];
if (c2 < 0xDC00 || c2 >= 0xE000)
- return false;
+ break;
value = ((value - 0xD800) << 10) | (c2 - 0xDC00);
}
- int numAdds;
for (numAdds = 1; numAdds < 5; numAdds++)
if (value < (((UInt32)1) << (numAdds * 5 + 6)))
break;
- dest += (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
+ if (dest)
+ dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
+ destPos++;
do
{
numAdds--;
- dest += (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
+ if (dest)
+ dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
+ destPos++;
}
- while(numAdds > 0);
+ while (numAdds != 0);
}
- return true;
+ *destLen = destPos;
+ return False;
+}
+
+bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
+{
+ dest.Empty();
+ size_t destLen = 0;
+ Utf8_To_Utf16(NULL, &destLen, src, src.Length());
+ wchar_t *p = dest.GetBuffer((int)destLen);
+ Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
+ p[destLen] = 0;
+ dest.ReleaseBuffer();
+ return res ? true : false;
+}
+
+bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
+{
+ dest.Empty();
+ size_t destLen = 0;
+ Utf16_To_Utf8(NULL, &destLen, src, src.Length());
+ char *p = dest.GetBuffer((int)destLen);
+ Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
+ p[destLen] = 0;
+ dest.ReleaseBuffer();
+ return res ? true : false;
}