Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kornelski/7z.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'CPP/Common/StringConvert.cpp')
-rw-r--r--CPP/Common/StringConvert.cpp350
1 files changed, 249 insertions, 101 deletions
diff --git a/CPP/Common/StringConvert.cpp b/CPP/Common/StringConvert.cpp
index 0443a06c..b55ac171 100644
--- a/CPP/Common/StringConvert.cpp
+++ b/CPP/Common/StringConvert.cpp
@@ -8,158 +8,306 @@
#include <stdlib.h>
#endif
+static const char k_DefultChar = '_';
+
#ifdef _WIN32
-UString MultiByteToUnicodeString(const AString &srcString, UINT codePage)
-{
- UString resultString;
- if (!srcString.IsEmpty())
- {
- int numChars = MultiByteToWideChar(codePage, 0, srcString,
- srcString.Len(), resultString.GetBuffer(srcString.Len()),
- srcString.Len() + 1);
- if (numChars == 0)
- throw 282228;
- resultString.ReleaseBuffer(numChars);
- }
- return resultString;
-}
-void MultiByteToUnicodeString2(UString &dest, const AString &srcString, UINT codePage)
+/*
+MultiByteToWideChar(CodePage, DWORD dwFlags,
+ LPCSTR lpMultiByteStr, int cbMultiByte,
+ LPWSTR lpWideCharStr, int cchWideChar)
+
+ if (cbMultiByte == 0)
+ return: 0. ERR: ERROR_INVALID_PARAMETER
+
+ if (cchWideChar == 0)
+ return: the required buffer size in characters.
+
+ if (supplied buffer size was not large enough)
+ return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
+ The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
+
+ If there are illegal characters:
+ if MB_ERR_INVALID_CHARS is set in dwFlags:
+ - the function stops conversion on illegal character.
+ - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
+
+ if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
+ before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
+ in Vista+: illegal character is not dropped (MSDN). Undocumented: illegal
+ character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
+*/
+
+
+void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
{
dest.Empty();
- if (!srcString.IsEmpty())
+ if (src.IsEmpty())
+ return;
{
- wchar_t *destBuf = dest.GetBuffer(srcString.Len());
- const char *sp = (const char *)srcString;
+ /*
+ wchar_t *d = dest.GetBuf(src.Len());
+ const char *s = (const char *)src;
unsigned i;
+
for (i = 0;;)
{
- char c = sp[i];
- if ((Byte)c >= 0x80 || c == 0)
+ Byte c = (Byte)s[i];
+ if (c >= 0x80 || c == 0)
break;
- destBuf[i++] = (wchar_t)c;
+ d[i++] = (wchar_t)c;
}
- if (i != srcString.Len())
+ if (i != src.Len())
{
- unsigned numChars = MultiByteToWideChar(codePage, 0, sp + i,
- srcString.Len() - i, destBuf + i,
- srcString.Len() + 1 - i);
- if (numChars == 0)
+ unsigned len = MultiByteToWideChar(codePage, 0, s + i,
+ src.Len() - i, d + i,
+ src.Len() + 1 - i);
+ if (len == 0)
throw 282228;
- i += numChars;
+ i += len;
+ }
+
+ d[i] = 0;
+ dest.ReleaseBuf_SetLen(i);
+ */
+ unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
+ if (len == 0)
+ {
+ if (GetLastError() != 0)
+ throw 282228;
+ }
+ else
+ {
+ len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
+ if (len == 0)
+ throw 282228;
+ dest.ReleaseBuf_SetEnd(len);
}
- dest.ReleaseBuffer(i);
}
}
-void UnicodeStringToMultiByte2(AString &dest, const UString &s, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
+/*
+ int WideCharToMultiByte(
+ UINT CodePage, DWORD dwFlags,
+ LPCWSTR lpWideCharStr, int cchWideChar,
+ LPSTR lpMultiByteStr, int cbMultiByte,
+ LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
+
+if (lpDefaultChar == NULL),
+ - it uses system default value.
+
+if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
+ if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
+ return: 0. ERR: ERROR_INVALID_PARAMETER.
+
+The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
+
+*/
+
+static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
{
dest.Empty();
defaultCharWasUsed = false;
- if (!s.IsEmpty())
+ if (src.IsEmpty())
+ return;
{
- unsigned numRequiredBytes = s.Len() * 2;
- char *destBuf = dest.GetBuffer(numRequiredBytes);
+ /*
+ unsigned numRequiredBytes = src.Len() * 2;
+ char *d = dest.GetBuf(numRequiredBytes);
+ const wchar_t *s = (const wchar_t *)src;
unsigned i;
- const wchar_t *sp = (const wchar_t *)s;
+
for (i = 0;;)
{
- wchar_t c = sp[i];
+ wchar_t c = s[i];
if (c >= 0x80 || c == 0)
break;
- destBuf[i++] = (char)c;
+ d[i++] = (char)c;
}
- defaultCharWasUsed = false;
- if (i != s.Len())
+
+ if (i != src.Len())
{
- BOOL defUsed;
- unsigned numChars = WideCharToMultiByte(codePage, 0, sp + i, s.Len() - i,
- destBuf + i, numRequiredBytes + 1 - i,
- &defaultChar, &defUsed);
+ BOOL defUsed = FALSE;
+ defaultChar = defaultChar;
+
+ bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
+ unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
+ d + i, numRequiredBytes + 1 - i,
+ (isUtf ? NULL : &defaultChar),
+ (isUtf ? NULL : &defUsed));
defaultCharWasUsed = (defUsed != FALSE);
- if (numChars == 0)
+ if (len == 0)
throw 282229;
- i += numChars;
+ i += len;
}
- dest.ReleaseBuffer(i);
- }
-}
-void UnicodeStringToMultiByte2(AString &dest, const UString &srcString, UINT codePage)
-{
- bool defaultCharWasUsed;
- UnicodeStringToMultiByte2(dest, srcString, codePage, '_', defaultCharWasUsed);
-}
+ d[i] = 0;
+ dest.ReleaseBuf_SetLen(i);
+ */
-AString UnicodeStringToMultiByte(const UString &s, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
-{
- AString dest;
- defaultCharWasUsed = false;
- if (!s.IsEmpty())
- {
- unsigned numRequiredBytes = s.Len() * 2;
- BOOL defUsed;
- int numChars = WideCharToMultiByte(codePage, 0, s, s.Len(),
- dest.GetBuffer(numRequiredBytes), numRequiredBytes + 1,
- &defaultChar, &defUsed);
- defaultCharWasUsed = (defUsed != FALSE);
- if (numChars == 0)
- throw 282229;
- dest.ReleaseBuffer(numChars);
- }
- return dest;
-}
+ /*
+ if (codePage != CP_UTF7)
+ {
+ const wchar_t *s = (const wchar_t *)src;
+ unsigned i;
+ for (i = 0;; i++)
+ {
+ wchar_t c = s[i];
+ if (c >= 0x80 || c == 0)
+ break;
+ }
+
+ if (s[i] == 0)
+ {
+ char *d = dest.GetBuf(src.Len());
+ for (i = 0;;)
+ {
+ wchar_t c = s[i];
+ if (c == 0)
+ break;
+ d[i++] = (char)c;
+ }
+ d[i] = 0;
+ dest.ReleaseBuf_SetLen(i);
+ return;
+ }
+ }
+ */
-AString UnicodeStringToMultiByte(const UString &srcString, UINT codePage)
-{
- bool defaultCharWasUsed;
- return UnicodeStringToMultiByte(srcString, codePage, '_', defaultCharWasUsed);
+ unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
+ if (len == 0)
+ {
+ if (GetLastError() != 0)
+ throw 282228;
+ }
+ else
+ {
+ BOOL defUsed = FALSE;
+ bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
+ defaultChar = defaultChar;
+ len = WideCharToMultiByte(codePage, 0, src, src.Len(),
+ dest.GetBuf(len), len,
+ (isUtf ? NULL : &defaultChar),
+ (isUtf ? NULL : &defUsed)
+ );
+ if (!isUtf)
+ defaultCharWasUsed = (defUsed != FALSE);
+ if (len == 0)
+ throw 282228;
+ dest.ReleaseBuf_SetEnd(len);
+ }
+ }
}
+/*
#ifndef UNDER_CE
-AString SystemStringToOemString(const CSysString &srcString)
+AString SystemStringToOemString(const CSysString &src)
{
- AString result;
- CharToOem(srcString, result.GetBuffer(srcString.Len() * 2));
- result.ReleaseBuffer();
- return result;
+ AString dest;
+ const unsigned len = src.Len() * 2;
+ CharToOem(src, dest.GetBuf(len));
+ dest.ReleaseBuf_CalcLen(len);
+ return dest;
}
#endif
+*/
#else
-UString MultiByteToUnicodeString(const AString &srcString, UINT codePage)
+void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
{
- UString resultString;
- for (unsigned i = 0; i < srcString.Len(); i++)
- resultString += (wchar_t)srcString[i];
- /*
- if (!srcString.IsEmpty())
+ dest.Empty();
+ if (src.IsEmpty())
+ return;
+
+ size_t limit = ((size_t)src.Len() + 1) * 2;
+ wchar_t *d = dest.GetBuf((unsigned)limit);
+ size_t len = mbstowcs(d, src, limit);
+ if (len != (size_t)-1)
{
- int numChars = mbstowcs(resultString.GetBuffer(srcString.Len()), srcString, srcString.Len() + 1);
- if (numChars < 0) throw "Your environment does not support UNICODE";
- resultString.ReleaseBuffer(numChars);
+ dest.ReleaseBuf_SetEnd((unsigned)len);
+ return;
+ }
+
+ {
+ unsigned i;
+ const char *s = (const char *)src;
+ for (i = 0;;)
+ {
+ Byte c = (Byte)s[i];
+ if (c == 0)
+ break;
+ d[i++] = (wchar_t)c;
+ }
+ d[i] = 0;
+ dest.ReleaseBuf_SetLen(i);
}
- */
- return resultString;
}
-AString UnicodeStringToMultiByte(const UString &srcString, UINT codePage)
+static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
{
- AString resultString;
- for (unsigned i = 0; i < srcString.Len(); i++)
- resultString += (char)srcString[i];
- /*
- if (!srcString.IsEmpty())
+ dest.Empty();
+ defaultCharWasUsed = false;
+ if (src.IsEmpty())
+ return;
+
+ size_t limit = ((size_t)src.Len() + 1) * 6;
+ char *d = dest.GetBuf((unsigned)limit);
+ size_t len = wcstombs(d, src, limit);
+ if (len != (size_t)-1)
{
- int numRequiredBytes = srcString.Len() * 6 + 1;
- int numChars = wcstombs(resultString.GetBuffer(numRequiredBytes), srcString, numRequiredBytes);
- if (numChars < 0) throw "Your environment does not support UNICODE";
- resultString.ReleaseBuffer(numChars);
+ dest.ReleaseBuf_SetEnd((unsigned)len);
+ return;
+ }
+
+ {
+ const wchar_t *s = (const wchar_t *)src;
+ unsigned i;
+ for (i = 0;;)
+ {
+ wchar_t c = s[i];
+ if (c == 0)
+ break;
+ if (c >= 0x100)
+ {
+ c = defaultChar;
+ defaultCharWasUsed = true;
+ }
+ d[i++] = (char)c;
+ }
+ d[i] = 0;
+ dest.ReleaseBuf_SetLen(i);
}
- */
- return resultString;
}
#endif
+
+
+UString MultiByteToUnicodeString(const AString &src, UINT codePage)
+{
+ UString dest;
+ MultiByteToUnicodeString2(dest, src, codePage);
+ return dest;
+}
+
+void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
+{
+ bool defaultCharWasUsed;
+ UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
+}
+
+AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
+{
+ AString dest;
+ UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
+ return dest;
+}
+
+AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
+{
+ AString dest;
+ bool defaultCharWasUsed;
+ UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
+ return dest;
+}