diff options
Diffstat (limited to 'CPP/Common/StringConvert.cpp')
-rw-r--r-- | CPP/Common/StringConvert.cpp | 350 |
1 files changed, 249 insertions, 101 deletions
diff --git a/CPP/Common/StringConvert.cpp b/CPP/Common/StringConvert.cpp index 0443a06c..b55ac171 100644 --- a/CPP/Common/StringConvert.cpp +++ b/CPP/Common/StringConvert.cpp @@ -8,158 +8,306 @@ #include <stdlib.h> #endif +static const char k_DefultChar = '_'; + #ifdef _WIN32 -UString MultiByteToUnicodeString(const AString &srcString, UINT codePage) -{ - UString resultString; - if (!srcString.IsEmpty()) - { - int numChars = MultiByteToWideChar(codePage, 0, srcString, - srcString.Len(), resultString.GetBuffer(srcString.Len()), - srcString.Len() + 1); - if (numChars == 0) - throw 282228; - resultString.ReleaseBuffer(numChars); - } - return resultString; -} -void MultiByteToUnicodeString2(UString &dest, const AString &srcString, UINT codePage) +/* +MultiByteToWideChar(CodePage, DWORD dwFlags, + LPCSTR lpMultiByteStr, int cbMultiByte, + LPWSTR lpWideCharStr, int cchWideChar) + + if (cbMultiByte == 0) + return: 0. ERR: ERROR_INVALID_PARAMETER + + if (cchWideChar == 0) + return: the required buffer size in characters. + + if (supplied buffer size was not large enough) + return: 0. ERR: ERROR_INSUFFICIENT_BUFFER + The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex) + + If there are illegal characters: + if MB_ERR_INVALID_CHARS is set in dwFlags: + - the function stops conversion on illegal character. + - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION. + + if MB_ERR_INVALID_CHARS is NOT set in dwFlags: + before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0. + in Vista+: illegal character is not dropped (MSDN). Undocumented: illegal + character is converted to U+FFFD, which is REPLACEMENT CHARACTER. +*/ + + +void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage) { dest.Empty(); - if (!srcString.IsEmpty()) + if (src.IsEmpty()) + return; { - wchar_t *destBuf = dest.GetBuffer(srcString.Len()); - const char *sp = (const char *)srcString; + /* + wchar_t *d = dest.GetBuf(src.Len()); + const char *s = (const char *)src; unsigned i; + for (i = 0;;) { - char c = sp[i]; - if ((Byte)c >= 0x80 || c == 0) + Byte c = (Byte)s[i]; + if (c >= 0x80 || c == 0) break; - destBuf[i++] = (wchar_t)c; + d[i++] = (wchar_t)c; } - if (i != srcString.Len()) + if (i != src.Len()) { - unsigned numChars = MultiByteToWideChar(codePage, 0, sp + i, - srcString.Len() - i, destBuf + i, - srcString.Len() + 1 - i); - if (numChars == 0) + unsigned len = MultiByteToWideChar(codePage, 0, s + i, + src.Len() - i, d + i, + src.Len() + 1 - i); + if (len == 0) throw 282228; - i += numChars; + i += len; + } + + d[i] = 0; + dest.ReleaseBuf_SetLen(i); + */ + unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0); + if (len == 0) + { + if (GetLastError() != 0) + throw 282228; + } + else + { + len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len); + if (len == 0) + throw 282228; + dest.ReleaseBuf_SetEnd(len); } - dest.ReleaseBuffer(i); } } -void UnicodeStringToMultiByte2(AString &dest, const UString &s, UINT codePage, char defaultChar, bool &defaultCharWasUsed) +/* + int WideCharToMultiByte( + UINT CodePage, DWORD dwFlags, + LPCWSTR lpWideCharStr, int cchWideChar, + LPSTR lpMultiByteStr, int cbMultiByte, + LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar); + +if (lpDefaultChar == NULL), + - it uses system default value. + +if (CodePage == CP_UTF7 || CodePage == CP_UTF8) + if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL) + return: 0. ERR: ERROR_INVALID_PARAMETER. + +The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL) + +*/ + +static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed) { dest.Empty(); defaultCharWasUsed = false; - if (!s.IsEmpty()) + if (src.IsEmpty()) + return; { - unsigned numRequiredBytes = s.Len() * 2; - char *destBuf = dest.GetBuffer(numRequiredBytes); + /* + unsigned numRequiredBytes = src.Len() * 2; + char *d = dest.GetBuf(numRequiredBytes); + const wchar_t *s = (const wchar_t *)src; unsigned i; - const wchar_t *sp = (const wchar_t *)s; + for (i = 0;;) { - wchar_t c = sp[i]; + wchar_t c = s[i]; if (c >= 0x80 || c == 0) break; - destBuf[i++] = (char)c; + d[i++] = (char)c; } - defaultCharWasUsed = false; - if (i != s.Len()) + + if (i != src.Len()) { - BOOL defUsed; - unsigned numChars = WideCharToMultiByte(codePage, 0, sp + i, s.Len() - i, - destBuf + i, numRequiredBytes + 1 - i, - &defaultChar, &defUsed); + BOOL defUsed = FALSE; + defaultChar = defaultChar; + + bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7); + unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i, + d + i, numRequiredBytes + 1 - i, + (isUtf ? NULL : &defaultChar), + (isUtf ? NULL : &defUsed)); defaultCharWasUsed = (defUsed != FALSE); - if (numChars == 0) + if (len == 0) throw 282229; - i += numChars; + i += len; } - dest.ReleaseBuffer(i); - } -} -void UnicodeStringToMultiByte2(AString &dest, const UString &srcString, UINT codePage) -{ - bool defaultCharWasUsed; - UnicodeStringToMultiByte2(dest, srcString, codePage, '_', defaultCharWasUsed); -} + d[i] = 0; + dest.ReleaseBuf_SetLen(i); + */ -AString UnicodeStringToMultiByte(const UString &s, UINT codePage, char defaultChar, bool &defaultCharWasUsed) -{ - AString dest; - defaultCharWasUsed = false; - if (!s.IsEmpty()) - { - unsigned numRequiredBytes = s.Len() * 2; - BOOL defUsed; - int numChars = WideCharToMultiByte(codePage, 0, s, s.Len(), - dest.GetBuffer(numRequiredBytes), numRequiredBytes + 1, - &defaultChar, &defUsed); - defaultCharWasUsed = (defUsed != FALSE); - if (numChars == 0) - throw 282229; - dest.ReleaseBuffer(numChars); - } - return dest; -} + /* + if (codePage != CP_UTF7) + { + const wchar_t *s = (const wchar_t *)src; + unsigned i; + for (i = 0;; i++) + { + wchar_t c = s[i]; + if (c >= 0x80 || c == 0) + break; + } + + if (s[i] == 0) + { + char *d = dest.GetBuf(src.Len()); + for (i = 0;;) + { + wchar_t c = s[i]; + if (c == 0) + break; + d[i++] = (char)c; + } + d[i] = 0; + dest.ReleaseBuf_SetLen(i); + return; + } + } + */ -AString UnicodeStringToMultiByte(const UString &srcString, UINT codePage) -{ - bool defaultCharWasUsed; - return UnicodeStringToMultiByte(srcString, codePage, '_', defaultCharWasUsed); + unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL); + if (len == 0) + { + if (GetLastError() != 0) + throw 282228; + } + else + { + BOOL defUsed = FALSE; + bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7); + defaultChar = defaultChar; + len = WideCharToMultiByte(codePage, 0, src, src.Len(), + dest.GetBuf(len), len, + (isUtf ? NULL : &defaultChar), + (isUtf ? NULL : &defUsed) + ); + if (!isUtf) + defaultCharWasUsed = (defUsed != FALSE); + if (len == 0) + throw 282228; + dest.ReleaseBuf_SetEnd(len); + } + } } +/* #ifndef UNDER_CE -AString SystemStringToOemString(const CSysString &srcString) +AString SystemStringToOemString(const CSysString &src) { - AString result; - CharToOem(srcString, result.GetBuffer(srcString.Len() * 2)); - result.ReleaseBuffer(); - return result; + AString dest; + const unsigned len = src.Len() * 2; + CharToOem(src, dest.GetBuf(len)); + dest.ReleaseBuf_CalcLen(len); + return dest; } #endif +*/ #else -UString MultiByteToUnicodeString(const AString &srcString, UINT codePage) +void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */) { - UString resultString; - for (unsigned i = 0; i < srcString.Len(); i++) - resultString += (wchar_t)srcString[i]; - /* - if (!srcString.IsEmpty()) + dest.Empty(); + if (src.IsEmpty()) + return; + + size_t limit = ((size_t)src.Len() + 1) * 2; + wchar_t *d = dest.GetBuf((unsigned)limit); + size_t len = mbstowcs(d, src, limit); + if (len != (size_t)-1) { - int numChars = mbstowcs(resultString.GetBuffer(srcString.Len()), srcString, srcString.Len() + 1); - if (numChars < 0) throw "Your environment does not support UNICODE"; - resultString.ReleaseBuffer(numChars); + dest.ReleaseBuf_SetEnd((unsigned)len); + return; + } + + { + unsigned i; + const char *s = (const char *)src; + for (i = 0;;) + { + Byte c = (Byte)s[i]; + if (c == 0) + break; + d[i++] = (wchar_t)c; + } + d[i] = 0; + dest.ReleaseBuf_SetLen(i); } - */ - return resultString; } -AString UnicodeStringToMultiByte(const UString &srcString, UINT codePage) +static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed) { - AString resultString; - for (unsigned i = 0; i < srcString.Len(); i++) - resultString += (char)srcString[i]; - /* - if (!srcString.IsEmpty()) + dest.Empty(); + defaultCharWasUsed = false; + if (src.IsEmpty()) + return; + + size_t limit = ((size_t)src.Len() + 1) * 6; + char *d = dest.GetBuf((unsigned)limit); + size_t len = wcstombs(d, src, limit); + if (len != (size_t)-1) { - int numRequiredBytes = srcString.Len() * 6 + 1; - int numChars = wcstombs(resultString.GetBuffer(numRequiredBytes), srcString, numRequiredBytes); - if (numChars < 0) throw "Your environment does not support UNICODE"; - resultString.ReleaseBuffer(numChars); + dest.ReleaseBuf_SetEnd((unsigned)len); + return; + } + + { + const wchar_t *s = (const wchar_t *)src; + unsigned i; + for (i = 0;;) + { + wchar_t c = s[i]; + if (c == 0) + break; + if (c >= 0x100) + { + c = defaultChar; + defaultCharWasUsed = true; + } + d[i++] = (char)c; + } + d[i] = 0; + dest.ReleaseBuf_SetLen(i); } - */ - return resultString; } #endif + + +UString MultiByteToUnicodeString(const AString &src, UINT codePage) +{ + UString dest; + MultiByteToUnicodeString2(dest, src, codePage); + return dest; +} + +void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage) +{ + bool defaultCharWasUsed; + UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed); +} + +AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed) +{ + AString dest; + UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed); + return dest; +} + +AString UnicodeStringToMultiByte(const UString &src, UINT codePage) +{ + AString dest; + bool defaultCharWasUsed; + UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed); + return dest; +} |